Multiproccessing twitter api calls using python

问题

I want to do multiproccesing on twitter search API. I have got the code below and it do 1 call every time instead of multiproccesing.

from multiprocessing import Process
from twitter import *

config = {}
exec(compile(open("config.py", "rb").read(), "config.py", 'exec'), config)
twitter = Twitter(
    auth=OAuth(config["access_key"], config["access_secret"], config["consumer_key"], config["consumer_secret"]))


def twitterSearch(word):
    tweetsWithWord = twitter.search.tweets(q=word, count=100)
    print(tweetsWithWord)


if __name__ == '__main__':
    for i in range(8):
        p = Process(target=twitterSearch, args=('racist',))
        p.start()
        p.join()

Please kindly help me to fix this.

回答1:

If I understand correctly, what you want is to have a continuous stream of results for your search term. I don't know the package you're working with, but i do know that both twython and tweepy can use twitter's streaming API.

In any case, you'll need to process each and every tweet that comes up your stream when it arrives, and you can use processes/threads at that stage if needed.

Code example for streaming:

from threading import Thread
from queue import Queue
from twython import TwythonStreamer
from requests.exceptions import ChunkedEncodingError

CONSUMER_KEY = 'AAA'
CONSUMER_SECRET = 'BBB'
ACCESS_KEY = 'CCC'
ACCESS_SECRET = 'DDD'


class TwitterStream(TwythonStreamer):

    def __init__(self, consumer_key, consumer_secret, token, token_secret, tqueue):
        self.tweet_queue = tqueue
        super(TwitterStream, self).__init__(consumer_key, consumer_secret, token, token_secret)

    def on_success(self, data):
        if 'text' in data:
            self.tweet_queue.put(data)

    def on_error(self, status_code, data):
        #print(status_code)
        #with open(logfile,'a') as f:
        #        f.write(time.asctime(time.gmtime()) + ' ' + status_code + '\n')
        # Want to stop trying to get data because of the error?
        # Uncomment the next line!
        # self.disconnect()
        pass


def stream_tweets(tweets_queue,track):
    # Input your credentials below
    consumer_key = CONSUMER_KEY
    consumer_secret = CONSUMER_SECRET
    token = ACCESS_KEY
    token_secret = ACCESS_SECRET
    try:
        stream = TwitterStream(consumer_key, consumer_secret, token, token_secret, tweets_queue)
        stream.statuses.filter(track=track) 
    except ChunkedEncodingError:
        # Sometimes the API sends back one byte less than expected which results in an exception in the
        # current version of the requests library
        stream_tweets(tweet_queue)


def process_tweets(tweets_queue, reply_dict, api, logfile):
    while True:
        twt = tweets_queue.get()
        # Do something with the tweet
        # You can start a new thread for actually proccessing each tweet
        tweets_queue.task_done()


tweet_queue = Queue()
track = 'whatever you want to filter by' # Search terms go here
Thread(target=stream_tweets, 
            args=(tweet_queue, track,), 
            daemon=True).start()
process_tweets(tweet_queue, reply_dict, api, logfile)

来源：https://stackoverflow.com/questions/41419227/multiproccessing-twitter-api-calls-using-python

标签

python

twitter