from mechanize import Browser
br = Browser()
br.open(\'http://somewebpage\')
html = br.response().readlines()
for line in html:
print line
When p
Simple code!. This will remove all kind of tags and content inside of it.
def rm(s):
start=False
end=False
s=' '+s
for i in range(len(s)-1):
if i':
end=i
s=s[:start]+s[end+1:]
start=end=False
else:
if s[i]=='<':
start=i
if s.count('<')>0:
self.rm(s)
else:
s=s.replace(' ', ' ')
return s
But it won't give full result if text contains <> symbols inside it.