1:引入第三方类库
vendor下:
<?php namespace Curlroll; class CurlRoll { /** * @var int * 并发请求数,设置此值过大,同一时间内如果请求远端主机会很容易被判定为DDos攻击 */ private $window_size = 5; /** * @var float * curl_multi_select 处理超时时间. */ private $timeout = 10; /** * @var array * 请求对象 CurlRequest 实例数组 */ private $requests = array(); /** * @var array * 并发请求map */ private $requestMap = array(); /** * @var string|array * callback function,结果处理回调函数. */ private $callback; /** * @var array * HTTP request default options. */ private $options = array( CURLOPT_SSL_VERIFYPEER => 0, //不开启https请求 CURLOPT_RETURNTRANSFER => 1, //请求信息以文件流方式返回 CURLOPT_CONNECTTIMEOUT => 10, //连接超时时间 CURLOPT_TIMEOUT => 20, //设置curl执行最大时间 CURLOPT_FOLLOWLOCATION => 1, //curl允许根据response location的值重定向请求 CURLOPT_MAXREDIRS => 5, //CURLOPT_FOLLOWLOCATION为真后,此值设定重定向递归最大次数 CURLOPT_HEADER => 0, //设置为true,请求返回的文件流中就会包含response header CURLOPT_AUTOREFERER => true, //当根据Location重定向时,自动设置header中的referer信息 CURLOPT_ENCODING => "", //HTTP请求头中"Accept-Encoding"的值,为空发送所有支持的编码类型 ); /** * @var array * HTTP Request发送的header信息 */ private $headers = array( 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3', 'Connection: close', 'Cache-Control: max-age=0', //'X-FORWARD-FOR:8.8.8.8', //代理ip地址 //'CLIENT-IP:3.3.3.3', //客户端ip,REMOTE_ADDR不为空的情况下,是比较真是ip,不好伪造 ); private static $agent = array( //google chrome 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36', 'Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0', //firefox 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0', 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0', 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', //ie 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)', 'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)', 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)', ); /** * @param int * $window_size */ public function __construct($window_size = 5) { $this->window_size = (int)$window_size ? : 5; } /** * @return void */ public function __destruct() { unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests); } /** * @param string $name * @return mixed */ public function __get($name) { return isset($this->{$name}) ? $this->{$name} : null; } /** * @param string $name * @param mixed $value * @return bool */ public function __set($name, $value) { // append the base options & headers if ($name == "options" || $name == "headers") { $this->{$name} = $value + $this->{$name}; } else { $this->{$name} = $value; } return true; } /** * Add a request to the request queue * * @param $url * @return bool */ public function add($url) { $this->requests[] = $this->createRequest($url, 'GET', $this->headers, $this->options); return true; } /** * Perform GET request * * @param string $url * @param $headers 不是key-value数组,http请求request header部分的内容 * $headers = array( * "POST ".$page." HTTP/1.0", * "Content-type: text/xml;charset=\"utf-8\"", * "Accept: text/xml", * "Cache-Control: no-cache", * "Pragma: no-cache", * "SOAPAction: \"run\"", * "Content-length: ".strlen($xml_data), * "Authorization: Basic " . base64_encode($credentials) * ); * @param $options * @return bool */ public function get($url, $headers = array(), $options = array()) { $this->requests[] = $this->createRequest($url, "GET", $headers, $options); return true; } /** * Perform POST request * * @param string $url * @param $post_data * @param $headers * @param $options * @return bool */ public function post($url, $headers = array(), $options = array(), $post_data) { $this->requests[] = $this->createRequest($url, "POST", $headers, $options, $post_data); return true; } /** * Execute processing * * @param mixed $callback * @return string|null */ public function execute($callback = null) { $ret = null; if ($callback) { $this->callback = $callback; } if (count($this->requests) == 1) { $ret = $this->single_curl(); } else { $ret = $this->rolling_curl(); } //clear all request once time $this->requests = $this->requestMap = array(); return $ret; } /** * Performs a single curl request * * @access private * @return string */ private function single_curl() { $ch = curl_init(); $request = array_shift($this->requests); $options = $this->get_options($request); curl_setopt_array($ch, $options); $output = curl_exec($ch); $info = curl_getinfo($ch); if ($this->callback && is_callable($this->callback)) { $callback = $this->callback; return call_user_func($callback, $output, $info, $request); } else { return $output; } } /** * Performs multiple curl requests * * @access private * @return bool */ private function rolling_curl() { $n = count($this->requests); if ($n < $this->window_size) { $this->window_size = $n; } if ($this->window_size < 2) { return false; } $master = curl_multi_init(); // start the first batch of requests //注意变量i的作用域不是for循环体内,在后续还是可以使用的 for($i = 0; $i < $this->window_size; $i++) { $ch = curl_init(); $options = $this->get_options($this->requests[$i]); curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); $key = (string)$ch; $this->requestMap[$key] = $i; } do { while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ; if ($execrun != CURLM_OK) { break; } // a request was just completed -- find out which one while ($done = curl_multi_info_read($master)) { // get the info and content returned on the request $info = curl_getinfo($done['handle']); $output = curl_multi_getcontent($done['handle']); // send the return values to the callback function. $callback = $this->callback; if (is_callable($callback)) { $key = (string)$done['handle']; $request = $this->requests[$this->requestMap[$key]]; unset($this->requestMap[$key]); call_user_func($callback, $output, $info, $request); } // start a new request (it's important to do this before removing the old one) $n = count($this->requests); if (($i < $n) && isset($this->requests[$i])) { $ch = curl_init(); $options = $this->get_options($this->requests[$i]); curl_setopt_array($ch, $options); curl_multi_add_handle($master, $ch); // Add to our request Maps $key = (string)$ch; $this->requestMap[$key] = $i; $i++; } // remove the curl handle that just completed curl_multi_remove_handle($master, $done['handle']); } // Block for data in / output; error handling is done by curl_multi_exec if ($running) { curl_multi_select($master, $this->timeout); } } while ($running); return true; } /** * Helper function to set up a new request by setting the appropriate options * * @access private * @param Request $request * @return array */ private function get_options($request) { $options = $this->__get('options'); $headers = $this->__get('headers'); // set the request URL $options[CURLOPT_URL] = $request->url; // set the request method // curl默认就是get,设定post_data,既可认为请求是post请求 // posting data w/ this request? if ($request->post_data) { $options[CURLOPT_POST] = true; $options[CURLOPT_POSTFIELDS] = $request->post_data; } // append custom options for this specific request if ($request->options) { $options = $options + $request->options; } // 添加个性header if ($request->headers) { $headers = $headers + $request->headers; } $options[CURLOPT_HTTPHEADER] = $headers; return $options; } private function createRequest($url, $method, $headers, $options, $data = array()) { $o = new \stdClass(); $o->url = $url; $o->method = $method; $o->headers = $headers; $o->options = $options; $o->post_data = $data; if (!isset($options[CURLOPT_USERAGENT])) { $o->options[CURLOPT_USERAGENT] = self::$agent[array_rand(self::$agent)]; } return $o; } }
2:创建对象
来源:oschina
链接:https://my.oschina.net/u/4479011/blog/3215523