python imaplib to get gmail inbox subjects titles and sender name

后端 未结 5 1313
攒了一身酷
攒了一身酷 2020-12-04 13:40

I\'m using pythons imaplib to connect to my gmail account. I want to retrieve the top 15 messages (unread or read, it doesn\'t matter) and display just the subjects and sen

相关标签:
5条回答
  • 2020-12-04 14:21
        c.select('INBOX', readonly=True)
    
        for i in range(1, 30):
            typ, msg_data = c.fetch(str(i), '(RFC822)')
            for response_part in msg_data:
                if isinstance(response_part, tuple):
                    msg = email.message_from_string(response_part[1])
                    for header in [ 'subject', 'to', 'from' ]:
                        print '%-8s: %s' % (header.upper(), msg[header])
    

    This should give you an idea on how to retrieve the subject and from?

    0 讨论(0)
  • 2020-12-04 14:24

    This was my solution to get the useful bits of information from emails:

    import datetime
    import email
    import imaplib
    import mailbox
    
    
    EMAIL_ACCOUNT = "your@gmail.com"
    PASSWORD = "your password"
    
    mail = imaplib.IMAP4_SSL('imap.gmail.com')
    mail.login(EMAIL_ACCOUNT, PASSWORD)
    mail.list()
    mail.select('inbox')
    result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
    i = len(data[0].split())
    
    for x in range(i):
        latest_email_uid = data[0].split()[x]
        result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
        # result, email_data = conn.store(num,'-FLAGS','\\Seen') 
        # this might work to set flag to seen, if it doesn't already
        raw_email = email_data[0][1]
        raw_email_string = raw_email.decode('utf-8')
        email_message = email.message_from_string(raw_email_string)
    
        # Header Details
        date_tuple = email.utils.parsedate_tz(email_message['Date'])
        if date_tuple:
            local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
            local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
        email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
        email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
        subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))
    
        # Body details
        for part in email_message.walk():
            if part.get_content_type() == "text/plain":
                body = part.get_payload(decode=True)
                file_name = "email_" + str(x) + ".txt"
                output_file = open(file_name, 'w')
                output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
                output_file.close()
            else:
                continue
    
    0 讨论(0)
  • 2020-12-04 14:26

    For those looking for how to check mail and parse the headers, this is what I used:

    def parse_header(str_after, checkli_name, mailbox) :
        #typ, data = m.search(None,'SENTON', str_after)
        print mailbox
        m.SELECT(mailbox)
        date = (datetime.date.today() - datetime.timedelta(1)).strftime("%d-%b-%Y")
        #date = (datetime.date.today().strftime("%d-%b-%Y"))
        #date = "23-Jul-2012"
    
        print date
        result, data = m.uid('search', None, '(SENTON %s)' % date)
        print data
    
        doneli = []
        for latest_email_uid in data[0].split():
            print latest_email_uid
            result, data = m.uid('fetch', latest_email_uid, '(RFC822)')
            raw_email = data[0][1]
    
            import email
            email_message = email.message_from_string(raw_email)
            print email_message['To']
            print email_message['Subject']
            print email.utils.parseaddr(email_message['From'])
            print email_message.items() # print all headers
    
    0 讨论(0)
  • 2020-12-04 14:38

    I was looking for a ready made simple script to list last inbox via IMAP without sorting through all messages. The information here is useful, though DIY and misses some aspects. First, IMAP4.select returns message count. Second, subject header decoding isn't straightforward.

    #! /usr/bin/env python
    # -*- coding: utf-8 -*-
    
    
    import imaplib
    import email
    from email.header import decode_header
    import HTMLParser
    
    
    # to unescape xml entities
    _parser = HTMLParser.HTMLParser()
    
    def decodeHeader(value):
      if value.startswith('"=?'):
        value = value.replace('"', '')
    
      value, encoding = decode_header(value)[0]
      if encoding:
        value = value.decode(encoding)
    
      return _parser.unescape(value)
    
    def listLastInbox(top = 4):
      mailbox = imaplib.IMAP4_SSL('imap.gmail.com')
      mailbox.login('mygmail@gmail.com', 'somecrazypassword')
    
      selected = mailbox.select('INBOX')
      assert selected[0] == 'OK'
      messageCount = int(selected[1][0])
    
      for i in range(messageCount, messageCount - top, -1):
        reponse = mailbox.fetch(str(i), '(RFC822)')[1]
        for part in reponse:
          if isinstance(part, tuple):
            message = email.message_from_string(part[1])
            yield {h: decodeHeader(message[h]) for h in ('subject', 'from', 'date')}
    
      mailbox.logout()
    
    
    if __name__ == '__main__':
      for message in listLastInbox():
        print '-' * 40
        for h, v in message.items():
          print u'{0:8s}: {1}'.format(h.upper(), v)
    
    0 讨论(0)
  • 2020-12-04 14:42

    BODY gets almost everything and marks the message as read. BODY[<parts>] gets just those parts. BODY.PEEK[<parts>] gets the same parts, but doesn't mark the message read. <parts> can be HEADER or TEXT or HEADER.FIELDS (<list of fields>) or HEADER.FIELDS.NOT (<list of fields>)

    This is what I use: typ, data = connection.fetch(message_num_s, b'(BODY.PEEK[HEADER.FIELDS (SUBJECT FROM)])')

    `

    def safe_encode(seq):
        if seq not in (list,tuple):
            seq = [seq]
        for i in seq:
            if isinstance(i, (int,float)):
                yield str(i).encode()
            elif isinstance(i, str):
                yield i.encode()
            elif isinstance(i, bytes):
                yield i
            else:
                raise ValueError
    
    def fetch_fields(connection, message_num, field_s):
        """Fetch just the fields we care about. Parse them into a dict"""
        if isinstance(field_s, (list,tuple)):
            field_s = b' '.join(safe_encode(field_s))
        else:
            field_s = tuple(safe_encode(field_s))[0]
    
        message_num = tuple(safe_encode(message_num))[0]
    
        typ, data = connection.fetch(message_num, b'(BODY.PEEK[HEADER.FIELDS (%s)])'%(field_s.upper()))
        if typ != 'OK':
            return typ, data  #change this to an exception if you'd rather
    
        items={}
        lastkey = None
        for line in data[0][1].splitlines():
            if b':' in line:
                lastkey, value = line.strip().split(b':', 1)
                lastkey = lastkey.capitalize()
                #not all servers capitalize the same, and some just leave it
                #as however it arrived from some other mail server.
    
                items[lastkey]=value
            else:
                #subject was so long it ran onto the next line, luckily it didn't have a ':' in it so its easy to recognize.
                items[lastkey]+=line
                #print(items[lastkey])
        return typ, items
    `
    

    You drop it into your code example: by replacing the call to 'mail.fetch()' with fetch_fields(mail, i, 'SUBJECT FROM') or fetch_fields(mail, i, ('SUBJECT' 'FROM'))

    0 讨论(0)
提交回复
热议问题