Read timeout using either urllib2 or any other http library

后端未结

关注

 8  825

I have code for reading an url like this:

from urllib2 import Request, urlopen
req = Request(url)
for key, val in headers.items():
    req.add_header(key, va


                      
              相关标签:


      
      
        
          8条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  隐瞒了意图╮        
                
              
                            
                2020-11-29 06:00
              
            
            
                                                                       
Any asynchronous network library should allow to enforce the total timeout on any I/O operation e.g., here's gevent code example:

#!/usr/bin/env python2
import gevent
import gevent.monkey # $ pip install gevent
gevent.monkey.patch_all()

import urllib2

with gevent.Timeout(2): # enforce total timeout
    response = urllib2.urlopen('http://localhost:8000')
    encoding = response.headers.getparam('charset')
    print response.read().decode(encoding)


And here's asyncio equivalent:

#!/usr/bin/env python3.5
import asyncio
import aiohttp # $ pip install aiohttp

async def fetch_text(url):
    response = await aiohttp.get(url)
    return await response.text()

text = asyncio.get_event_loop().run_until_complete(
    asyncio.wait_for(fetch_text('http://localhost:8000'), timeout=2))
print(text)


The test http server is defined here.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  無奈伤痛        
                
              
                            
                2020-11-29 06:01
              
            
            
                                                                       
pycurl.TIMEOUT option works for the whole request:

#!/usr/bin/env python3
"""Test that pycurl.TIMEOUT does limit the total request timeout."""
import sys
import pycurl

timeout = 2 #NOTE: it does limit both the total *connection* and *read* timeouts
c = pycurl.Curl()
c.setopt(pycurl.CONNECTTIMEOUT, timeout)
c.setopt(pycurl.TIMEOUT, timeout)
c.setopt(pycurl.WRITEFUNCTION, sys.stdout.buffer.write)
c.setopt(pycurl.HEADERFUNCTION, sys.stderr.buffer.write)
c.setopt(pycurl.NOSIGNAL, 1)
c.setopt(pycurl.URL, 'http://localhost:8000')
c.setopt(pycurl.HTTPGET, 1)
c.perform()


The code raises the timeout error in ~2 seconds. I've tested the total read timeout with the server that sends the response in multiple chunks with the time less than the timeout between chunks:

$ python -mslow_http_server 1


where slow_http_server.py:

#!/usr/bin/env python
"""Usage: python -mslow_http_server [<read_timeout>]

   Return an http response with *read_timeout* seconds between parts.
"""
import time
try:
    from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer, test
except ImportError: # Python 3
    from http.server import BaseHTTPRequestHandler, HTTPServer, test

def SlowRequestHandlerFactory(read_timeout):
    class HTTPRequestHandler(BaseHTTPRequestHandler):
        def do_GET(self):
            n = 5
            data = b'1\n'
            self.send_response(200)
            self.send_header("Content-type", "text/plain; charset=utf-8")
            self.send_header("Content-Length", n*len(data))
            self.end_headers()
            for i in range(n):
                self.wfile.write(data)
                self.wfile.flush()
                time.sleep(read_timeout)
    return HTTPRequestHandler

if __name__ == "__main__":
    import sys
    read_timeout = int(sys.argv[1]) if len(sys.argv) > 1 else 5
    test(HandlerClass=SlowRequestHandlerFactory(read_timeout),
         ServerClass=HTTPServer)


I've tested the total connection timeout with http://google.com:22222.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
   
          
     上一页
1
2
           
           
        
                                  
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复