I am opening a file which has 100,000 URL\'s. I need to send an HTTP request to each URL and print the status code. I am using Python 2.6, and so far looked at the many con
I found that using the tornado
package to be the fastest and simplest way to achieve this:
from tornado import ioloop, httpclient, gen
def main(urls):
"""
Asynchronously download the HTML contents of a list of URLs.
:param urls: A list of URLs to download.
:return: List of response objects, one for each URL.
"""
@gen.coroutine
def fetch_and_handle():
httpclient.AsyncHTTPClient.configure(None, defaults=dict(user_agent='MyUserAgent'))
http_client = httpclient.AsyncHTTPClient()
waiter = gen.WaitIterator(*[http_client.fetch(url, raise_error=False, method='HEAD')
for url in urls])
results = []
# Wait for the jobs to complete
while not waiter.done():
try:
response = yield waiter.next()
except httpclient.HTTPError as e:
print(f'Non-200 HTTP response returned: {e}')
continue
except Exception as e:
print(f'An unexpected error occurred querying: {e}')
continue
else:
print(f'URL \'{response.request.url}\' has status code <{response.code}>')
results.append(response)
return results
loop = ioloop.IOLoop.current()
web_pages = loop.run_sync(fetch_and_handle)
return web_pages
my_urls = ['url1.com', 'url2.com', 'url100000.com']
responses = main(my_urls)
print(responses[0])