Iterate over python dictionary to retrieve only required rows

后端 未结 5 994
长发绾君心
长发绾君心 2021-01-24 01:42

I am getting the data in HTML table format from external source -

from xml.etree import ElementTree as ET

s = \"\"\"
Release
相关标签:
5条回答
  • 2021-01-24 02:08

    Assuming that there's only one row per version and you don't need the other versions at all you could create a function that parses the HTML and returns dict representing version as soon as it's found. If version is not found it could return None instead:

    from xml.etree import ElementTree as ET
    
    s = """<table>
      <tr><th>Release</th><th>REFDB</th><th>URL</th></tr>
      <tr><td>3.7.3</td><td>12345</td><td>http://google.com</td></tr>
      <tr><td>3.7.4</td><td>456789</td><td>http://foo.com</td></tr>
    </table>
    """
    
    def find_version(ver):
        table = ET.XML(s)
        rows = iter(table)
        headers = [col.text for col in next(rows)]
        for row in rows:
            values = [col.text for col in row]
            out = dict(zip(headers, values))
            if out['Release'] == ver:
                return out
    
        return None
    
    res = find_version('3.7.3')
    if res:
        for x in res.items():
            print(' - '.join(x))
    else:
        print 'Version not found'
    

    Output:

    Release - 3.7.3
    URL - http://google.com
    REFDB - 12345
    
    0 讨论(0)
  • 2021-01-24 02:22

    If you accumulate the dictionaries in a list:

    result = []
    for row in rows:
        values = [col.text for col in row]
        result.append(dict(zip(headers, values)))
    

    You can filter the list -

    import operator
    value = '3.7.3'
    release = operator.itemgetter('Release')
    refdb = operator.itemgetter('REFDB')
    url = operator.itemgetter('URL')
    data = [d for d in result if release(d) == value]
    

    Then print all the dictionaries that got past the filter -

    f_string = 'Release Version - {}\nREFDB - {}\nURL - {}'
    for d in data:
        print(f_string.format(release(d), refdb(d), url(d)))
    
    0 讨论(0)
  • 2021-01-24 02:23
    import lxml.html
    from collections import namedtuple
    s = """<table>
      <tr><th>Release</th><th>REFDB</th><th>URL</th></tr>
      <tr><td>3.7.3</td><td>12345</td><td>http://google.com</td></tr>
      <tr><td>3.7.4</td><td>456789</td><td>http://foo.com</td></tr>
      <tr><td>3.7.5</td><td>151515</td><td>http://foo.com</td></tr>
    </table>
    """
    def info_gen(rows):
    
        info = namedtuple('info', ['Release', 'REFDB', 'URL'])
        for row in rows:
            yield info(*row.xpath('.//text()'))
    
    html = lxml.html.fromstring(s)
    rows = html.xpath('//table//tr[td]')
    
    Release = input("Enter Release:")
    for info in info_gen(rows):
        if Release in info:
            print(info)
            break
    

    out:

     Enter Release:3.7.5
    info(Release='3.7.5', REFDB='151515', URL='http://foo.com')
    
    0 讨论(0)
  • 2021-01-24 02:25
    from xml.etree import ElementTree as ET
    
    s = """<table>
      <tr><th>Release</th><th>REFDB</th><th>URL</th></tr>
      <tr><td>3.7.3</td><td>12345</td><td>http://google.com</td></tr>
      <tr><td>3.7.4</td><td>456789</td><td>http://foo.com</td></tr>
    </table>
    """
    
    table = ET.XML(s)
    rows = iter(table)
    headers = [col.text for col in next(rows)]
    master = {}
    
    for row in rows:
        values = [col.text for col in row]
        out = dict(zip(headers, values))
        if 'Release' in out:
            master[out['Release']] = out
    
    # Use the release to get the right dict out of master
    print(master)
    if in_data in master:
        for k, v in master[in_data]:
            # print here
            pass
    else:
        print('Error')
    
    0 讨论(0)
  • 2021-01-24 02:30

    You don't need a dictionary. Just parse each row's content and see if release version matches your input:

    #coding:utf-8
    
    import sys
    from lxml import html
    
    if len(sys.argv) != 2:
        raise Exception("Please provide release version only")
    
    release_input = sys.argv[1].strip()
    
    data = """<table>
      <tr><th>Release</th><th>REFDB</th><th>URL</th></tr>
      <tr><td>3.7.3</td><td>12345</td><td>http://google.com</td></tr>
      <tr><td>3.7.4</td><td>456789</td><td>http://foo.com</td></tr>
    </table>
    """
    
    tree = html.fromstring(data)
    for row in tree.xpath('//tr')[1:]:
        release, refbd, url = row.xpath('.//td/text()')
        if release_input == release:
            print("Release Version - {}".format(release))
            print("REFBD - {}".format(refbd))
            print("URL - {}".format(url))
            break
    
    print("{} release version wasn't found".format(release_input))
    
    0 讨论(0)
提交回复
热议问题