How to parse the data from Google Alerts?

后端 未结 3 1919
挽巷
挽巷 2020-11-29 21:31

Firstly, How would you get Google Alerts information into a database other than to parse the text of the email message that Google sends you?

It seems that there is

相关标签:
3条回答
  • 2020-11-29 22:11

    I found a Google Alerts API here. It's pretty minimal and I haven't tested it.

    0 讨论(0)
  • 2020-11-29 22:17

    When you create the alert, set the "Deliver To" to "Feed" and then you can consume the feed XML as you would any other feed. This is much easier to parse and digest into a database.

    0 讨论(0)
  • class googleAlerts{
        public function createAlert($alert){
            $USERNAME = 'XXXXXX@gmail.com';
            $PASSWORD = 'YYYYYY';
            $COOKIEFILE = 'cookies.txt';
    
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
            curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
            curl_setopt($ch, CURLOPT_COOKIEJAR, $COOKIEFILE);
            curl_setopt($ch, CURLOPT_COOKIEFILE, $COOKIEFILE);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
            curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
            curl_setopt($ch, CURLOPT_TIMEOUT, 120);
    
            curl_setopt($ch, CURLOPT_URL,
                'https://accounts.google.com/ServiceLogin?hl=en&service=alerts&continue=http://www.google.com/alerts/manage');
            $data = curl_exec($ch);
    
            $formFields = $this->getFormFields($data);
    
            $formFields['Email']  = $USERNAME;
            $formFields['Passwd'] = $PASSWORD;
            unset($formFields['PersistentCookie']);
    
            $post_string = '';
            foreach($formFields as $key => $value) {
                $post_string .= $key . '=' . urlencode($value) . '&';
            }
    
            $post_string = substr($post_string, 0, -1);
    
            curl_setopt($ch, CURLOPT_URL, 'https://accounts.google.com/ServiceLoginAuth');
            curl_setopt($ch, CURLOPT_POST, 1);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
    
            $result = curl_exec($ch);
    
            if (strpos($result, '<title>') === false) {
                return false;
    
            } else {
                curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts');
                curl_setopt($ch, CURLOPT_POST, 0);
                curl_setopt($ch, CURLOPT_POSTFIELDS, null);
    
                $result = curl_exec($ch);
    
                curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/create');
                curl_setopt($ch, CURLOPT_POST, 0);
                $result = curl_exec($ch);
                //var_dump($result);
                $result = $this->getFormFieldsCreate($result);
                $result['q'] = $alert;
                $result['t'] = '7';
                $result['f'] = '1';
                $result['l'] = '0';
                $result['e'] = 'feed';
                unset($result['PersistentCookie']);
    
                $post_string = '';
                foreach($result as $key => $value) {
                    $post_string .= $key . '=' . urlencode($value) . '&';
                }
    
                $post_string = substr($post_string, 0, -1);
                curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
                $result = curl_exec($ch);
                curl_setopt($ch, CURLOPT_URL, 'http://www.google.com/alerts/manage');
                $result = curl_exec($ch);
                if (preg_match_all('%'.$alert.'(?=</a>).*?<a href=[\'"]http://www.google.com/alerts/feeds/([^\'"]+)%i', $result, $matches)) {
                    return ('http://www.google.com/alerts/feeds/'.$matches[1][0]);
                } else {
                    return false;
                }
    
    
            }
        }
    
        private function getFormFields($data)
        {
            if (preg_match('/(<form.*?id=.?gaia_loginform.*?<\/form>)/is', $data, $matches)) {
                $inputs = $this->getInputs($matches[1]);
    
                return $inputs;
            } else {
                die('didnt find login form');
            }
        }
        private function getFormFieldsCreate($data)
        {
            if (preg_match('/(<form.*?name=.?.*?<\/form>)/is', $data, $matches)) {
                $inputs = $this->getInputs($matches[1]);
    
                return $inputs;
            } else {
                die('didnt find login form1');
            }
        }
    
    
        private function getInputs($form)
        {
            $inputs = array();
    
            $elements = preg_match_all('/(<input[^>]+>)/is', $form, $matches);
    
            if ($elements > 0) {
                for($i = 0; $i < $elements; $i++) {
                    $el = preg_replace('/\s{2,}/', ' ', $matches[1][$i]);
    
                    if (preg_match('/name=(?:["\'])?([^"\'\s]*)/i', $el, $name)) {
                        $name  = $name[1];
                        $value = '';
    
                        if (preg_match('/value=(?:["\'])?([^"\'\s]*)/i', $el, $value)) {
                            $value = $value[1];
                        }
    
                        $inputs[$name] = $value;
                    }
                }
            }
    
            return $inputs;
        }
    }
    $alert = new googleAlerts;
    echo $alert->createAlert('YOUR ALERT');
    

    It will return link to rss feed of your newly created alert

    0 讨论(0)
提交回复
热议问题