from mechanize import Browser
br = Browser()
br.open(\'http://somewebpage\')
html = br.response().readlines()
for line in html:
print line
When p
Here is a simple solution that strips HTML tags and decodes HTML entities based on the amazingly fast lxml library:
from lxml import html
def strip_html(s):
return str(html.fromstring(s).text_content())
strip_html('Ein schöner Text.') # Output: Ein schöner Text.