Strip HTML from strings in Python

前端未结

关注

 26  2335

难免孤独 2020-11-22 02:50

from mechanize import Browser
br = Browser()
br.open(\'http://somewebpage\')
html = br.response().readlines()
for line in html:
  print line

When p

26条回答

悲哀的现实 (楼主)

2020-11-22 03:09

# This is a regex solution.
import re
def removeHtml(html):
  if not html: return html
  # Remove comments first
  innerText = re.compile('').sub('',html)
  while innerText.find('>')>=0: # Loop through nested Tags
    text = re.compile('<[^<>]+?>').sub('',innerText)
    if text == innerText:
      break
    innerText = text

  return innerText.strip()

0 讨论(0)

查看其它26个回答