Perl: Generating Arrays inside a Complex Hash

后端 未结 3 1624
臣服心动
臣服心动 2021-01-16 09:54

In the quest to make my data more accessible, I want to store my tabulated data in a complex hash. I am trying to grow a \'HoHoHoA\' as the script loops over my data. As per

相关标签:
3条回答
  • 2021-01-16 10:16

    This should do it for you.

    #!/usr/bin/perl
    
    use strict;
    use warnings;
    
    use List::Util qw/sum/;
    sub avg { sum(@_) / @_ }
    
    my $fileName = shift;
    
    open my $fh, "<", $fileName
        or die "Unable to open $fileName: $!\n";
    
    my %monthData;
    
    chomp(my @headers = split /\t+/, <$fh>);
    
    while (<$fh>) {
        chomp;
        my %rec;
        @rec{@headers} = split /\t+/;
        my ($hour) = split /:/, $rec{TIME}, 2;
    
        for my $key (grep { not /^(DATE|TIME)$/ } keys %rec) {
            push @{ $monthData{$key}{$rec{DATE}}{$hour} }, $rec{$key};
        }
    }
    
    for my $column (keys %monthData) {
        for my $date (keys %{ $monthData{$column} }) {
            for my $hour (keys %{ $monthData{$column}{$date} }) {
                my $avg = avg @{ $monthData{$column}{$date}{$hour} };
                print "average of $column for $date $hour is $avg\n";
            }
        }
    }
    

    Things to pay attention to:

    • strict and warnings pragmas
    • List::Util module to get the sum function
    • putting an array in scalar context to get the number of items in the array (in the avg function)
    • the safer three argument version of open
    • the lexical filehandle (rather than the old bareword style filehandle)
    • reading the headers first outside the loop to avoid having to have special logic inside it
    • using a hash slice to get the file data into a structured record
    • avoiding splitting the time more than necessary with the third argument to split
    • avoiding useless variables by only specifying the variable we want to catch in the list assignment
    • using grep to prevent the DATE and TIME keys from being put in %monthData
    • the nested for loops each dealing with a level in the hash
    0 讨论(0)
  • 2021-01-16 10:38

    Here's how I would write a program to do that.

    #! /usr/bin/env perl
    use strict;
    use warnings;
    use 5.010; # for say and m'(?<name>)'
    
    use YAML;
    use Data::Dump 'dump';
    
    my(%data,%original);
    while( my $line = <> ){
      next unless $line =~ m'
        ^ \s*
          (?<day>   0?[1-9] | [12][0-9] | 3[0-1] ) /
          (?<month> 0?[1-9] | 1[0-2] ) /
          (?<year>  [0-9]{4} )
          \s+
          (?<hour>   0?[1-9] | 1[0-9] | 2[0-4] ) :
          (?<minute> 0?[1-9] | [1-5][0-9] ) :
          (?<second> 0?[1-9] | [1-5][0-9] )
          \s+
          (?<columns> .* )
      'x;
      my @columns = split ' ', $+{columns};
    
      push @{
        $data{ $+{year}  }
             { $+{month} }
             { $+{day}   }
             { $+{hour}  }
      }, \@columns; # or [@columns]
    
      # If you insist on having it in that data structure you can do this:
      my $count = 1;
      my $date = "$+{day}/$+{month}/$+{year}";
      for my $column ( @columns ){
        my $col = 'COLUMN'.$count++;
        push @{ $original{$col}{$date}{$+{hour}} }, $column;
      }
    }
    
    say Dump \%data, \%original; # YAML
    say dump \%data, \%original; # Data::Dump
    

    Given this input

    DATE       TIME     COLUMN1 COLUMN2 COLUMN3
    09/06/2008 06:12:56 56.23   54.23   56.35
    09/06/2008 06:42:56 56.73   55.28   54.52
    09/06/2008 07:12:56 57.31   56.79   56.41
    09/06/2008 07:42:56 58.24   57.30   58.86
    

    Either "perl program.pl datafile" or "perl program.pl < datafile"

    YAML

    ---
    2008:
      06:
        09:
          06:
            -
              - 56.23
              - 54.23
              - 56.35
            -
              - 56.73
              - 55.28
              - 54.52
          07:
            -
              - 57.31
              - 56.79
              - 56.41
            -
              - 58.24
              - 57.30
              - 58.86
    ---
    COLUMN1:
      09/06/2008:
        06:
          - 56.23
          - 56.73
        07:
          - 57.31
          - 58.24
    COLUMN2:
      09/06/2008:
        06:
          - 54.23
          - 55.28
        07:
          - 56.79
          - 57.30
    COLUMN3:
      09/06/2008:
        06:
          - 56.35
          - 54.52
        07:
          - 56.41
          - 58.86
    

    Data::Dump

    (
      {
        2008 => {
              "06" => {
                    "09" => {
                          "06" => [["56.23", "54.23", "56.35"], ["56.73", "55.28", "54.52"]],
                          "07" => [["57.31", "56.79", "56.41"], ["58.24", "57.30", "58.86"]],
                        },
                  },
            },
      },
      {
        COLUMN1 => {
                     "09/06/2008" => { "06" => ["56.23", "56.73"], "07" => ["57.31", "58.24"] },
                   },
        COLUMN2 => {
                     "09/06/2008" => { "06" => ["54.23", "55.28"], "07" => ["56.79", "57.30"] },
                   },
        COLUMN3 => {
                     "09/06/2008" => { "06" => ["56.35", "54.52"], "07" => ["56.41", "58.86"] },
                   },
      },
    )
    
    0 讨论(0)
  • 2021-01-16 10:40

    I hope the following program populates the data structure you want:

    #!/usr/bin/perl                        
    
    use strict;
    use warnings;
    use Data::Dumper;
    
    open my $fh, '<', 'input' or die $!;
    
    my @headers;
    for ( split /\t/, ~~ <$fh> ) {
        chomp;
        push @headers, $_ unless /^\t?$/;
    }
    
    my %monthData;
    while (<$fh>) {
        my @line;
        for ( split /\t/ ) {
            chomp;
            push @line, $_ unless /^\t?$/;
        }
    
        for my $i ( 2 .. $#headers ) {
            my ($hour) = split /:/, $line[1];
            push @{ $monthData{ $headers[$i] }->{ $line[0] }->{$hour} }, $line[$i];
        }
    }
    
    print Dumper \%monthData;
    
    0 讨论(0)
提交回复
热议问题