Perl LibXML findvalues(…) concatenates values

前端 未结 2 765
南方客
南方客 2021-01-14 05:27

I am trying to extract node values from an XML file using LibXML. When I call findvalue all of the nodes of the same element type are concatenated. I am totally

相关标签:
2条回答
  • 2021-01-14 06:02

    Change the XPaths as follows:

    //playlists/dict        →  /playlists/dict
    ./string                →  key[text()="Name"]/following-sibling::*[1]
    //playlists/dict/array  →  key[text()="Playlist Items"]/following-sibling::*[1]/*
    ./dict/integer          →  key[text()="Track ID"]/following-sibling::*[1]
    

    Yeah, those XPaths are pretty messy, but that's because we're dealing with a horrible schema.

    Fixed:

    use strict;
    use warnings;
    use feature qw( say );
    
    use XML::LibXML qw( );
    
    my $doc = XML::LibXML->load_xml( location => $ARGV[0] );
    
    my @playlist_nodes = $doc->findnodes('/playlists/dict');
    for my $playlist_idx (0..$#$playlist_nodes) {
       my $playlist_node = $playlist_nodes->[$playlist_idx];
    
       say "" if $playlist_idx;
    
       my $name = $playlist_node->findvalue('key[text()="Name"]/following-sibling::*[1]');
       say $name;
    
       for my $track_node ($playlist_node->findnodes('key[text()="Playlist Items"]/following-sibling::*[1]/*')) {
          my $id = $track_node->findvalue('key[text()="Track ID"]/following-sibling::*[1]');
          say $id;
       }
    }
    

    Above, I mentioned that the schema being used is horrible. Whoever designed that XML schema was told to use XML, but clearly didn't understand XML. It's bad even for a schema to encode arbitrary data structures like JSON. (This would be better.) Whoever designed it only intended for the data to be converted into a different format before being used. The following does this:

    use strict;
    use warnings;
    use feature qw( say state );
    
    use Carp              qw( croak );
    use Types::Serialiser qw( );
    use XML::LibXML       qw( );
    
    
    sub qname {
       my ($node) = @_;
       my $ns   = $node->namespaceURI();
       my $name = $node->nodeName();
       return defined($ns) ? "{$ns}$name" : $name;
    }
    
    sub deserialize_array {
       my ($array_node) = @_;
       return [ map { deserialize_value($_) } $array_node->findnodes("*") ];
    }
    
    sub deserialize_dict {
       my ($dict_node) = @_;
    
       my $dict = {};
       my @children = $dict_node->findnodes("*");
       while (@children) {
          my $key_node = shift(@children);
          qname($key_node) eq "key"
             or croak("Expected key");
    
          my $val_node = shift(@children)
             or croak("Expected value");
    
          my $key = $key_node->textContent();
          my $val = deserialize_value($val_node);
          $dict->{$key} = $val;
       }
    
       return $dict;
    }
    
    sub deserialize_value {
       my ($val_node) = @_;
    
       state $deserializers = {
          string  => sub { $_[0]->textContent() },
          integer => sub { 0 + $_[0]->textContent() },
          true    => sub { $Types::Serialiser::true },
          false   => sub { $Types::Serialiser::false },
          array   => \&deserialize_array,
          dict    => \&deserialize_dict,
       };
    
       my $val_type = qname($val_node);
       my $deserializer = $deserializers->{$val_type}
          or croak("Unrecognized value type \"$val_type\"");
    
       return $deserializer->($val_node);
    }
    
    sub deserialize_doc {
       my ($doc) = @_;
       return deserialize_array($doc->documentElement());
    }
    

    With the above, the solution becomes the following:

    my $doc = XML::LibXML->load_xml( location => $ARGV[0] );
    my $playlists = deserialize_doc($doc);
    
    for my $playlist_idx (0..$#$playlists) {
        my $playlist = $playlists->[$playlist_idx];
    
        say "" if $playlist_idx;
    
        my $name = $playlist->{"Name"};
        say $name;
    
        for my $track (@{ $playlist->{"Playlist Items"} }) {
           my $id = $track->{"Track ID"};
           say $id;
        }
    }
    
    0 讨论(0)
  • 2021-01-14 06:13

    Your input data is not easy to process as was indicated by other posters.

    Your code could be as following with provided sample of input data.

    use strict;
    use warnings;
    use feature 'say';
    
    use XML::LibXML;
    
    my $playlistxml = 'playlist.xml';
    
    my $dom = XML::LibXML->load_xml(location => $playlistxml);
    
    foreach my $title ($dom->findnodes('//playlist')) {
        say 'Title: ', $title->findvalue('./title');
        my $tracks = join "\n", map {
            $_->to_literal();
        } $title->findnodes('./tracks/track/@id');
        say $tracks;
        say '';
    }
    

    Sample of input data 'playlist.xml'

    <playlists>
        <playlist id="67312">
            <title>Yes - Tales From Topographic Oceans</title>
            <persistent_id>F28F195257143396</persistent_id> 
            <tracks> 
                <track id="25912" />
                <track id="25914" />
                <track id="25916" />
                <track id="25918" />
            </tracks>
        </playlist>
        <playlist id="67319">
            <title>Yes - Yessongs</title>
            <description>Live Album</description>
            <persistent_id>405B144877D8B8E4</persistent_id>
            <tracks>
                <track id="25920" />
                <track id="25922" />
                <track id="25924" />
                <track id="25926" />
                <track id="25928" />
                <track id="25930" />
            </tracks>
        </playlist> 
    </playlists>
    

    Output

    Title: Yes - Tales From Topographic Oceans
    25912
    25914
    25916
    25918
    
    Title: Yes - Yessongs
    25920
    25922
    25924
    25926
    25928
    25930
    
    0 讨论(0)
提交回复
热议问题