How to do alpha numeric sort perl?

前端 未结 3 375
迷失自我
迷失自我 2021-01-28 02:46

I have a file which looks like this:

80,1p21  
81,19q13  
82,6p12.3  
83,Xp11.22  
84,3pter-q21  
86,3q26.33  
87,14q24.1-q24.2|14q24|14q22-q24  
88,1q42-q43  
8         


        
相关标签:
3条回答
  • 2021-01-28 03:26

    I actually found the answer to this. The code looks a bit complicated though.

    #!/usr/bin/env perl
    
    use strict;  
    use warnings;
    
    sub main {   
    my $file;  
    if (@ARGV != 1) {   
        die "Usage: perl hashofhash_sort.pl <filename>\n";
    }   
    else {  
        $file = $ARGV[0];   
    }  
    
    open(IN, $file) or die "Error!! Cannot open the $file file: $!\n";
    my @file = <IN>;
    chomp @file;
    my ($entrez_gene, $loci, $chr, $band, $pq, $band_num);
    my (%chromosome, %loci_entrez);
    
    foreach my $line (@file) {
        if ($line =~ /(\d+),(.+)/) {
            # Entrez genes
            $entrez_gene = $1;
    
            # Locus like 12p23.4
            $loci = $2;
    
            if ($loci =~ /^(\d+)(.+)?/) {
                # chromosome number alone (only numericals)
                $chr = $1;
                if ($2) {
                    # locus minus chromosome number. If 12p23.4, then $band is p23.4
                    $band = "$2";
                    if ($band =~ /^([pq])(.+)/) {
                        # either p or q
                        $pq = $1;
                        # stores the numericals. for p23.4, stores 23.4
                        $band_num = $2;
                    }
    
                    if (exists $chromosome{$chr}) {
                        if (exists $chromosome{$chr}{$pq}) {
                            push (@{$chromosome{$chr}{$pq}}, $band_num);
                        }
                        else {
                            $chromosome{$chr}{$pq} = [$band_num];
                        }
                    }
    
                    else {
                        $chromosome{$chr}{$pq} = [$band_num];
                    }
                }
            }
        }
    } # End of foreach loop
    
    foreach my $key (sort {$a <=> $b} keys %chromosome) {
        my %seen = ();
        foreach my $key2 (sort {$a cmp $b } keys %{$chromosome{$key}}) {
            my @unique = grep { ! $seen{$_}++ } @{$chromosome{$key}{$key2}};
            my @sorted = sort @unique;
            foreach my $element (@sorted) {
                my $sorted_locus = "$key$key2$element";
                if (exists $loci_entrez{$sorted_locus}) {
                    foreach my $element2 (@{$loci_entrez{$sorted_locus}}) {
                            print "$element2,$sorted_locus\n";
    
                    }
                }
            }
        }
    
    }
    
    
    } # End of main
    
    main();
    
    0 讨论(0)
  • 2021-01-28 03:30
    #!/usr/bin/env perl
    
    use strict;
    use warnings;
    
    my @datas   = map { /^(\d+),(\d*)(.*)$/; [$1, $2, $3]; } <DATA>;
    my @res     = sort {$a->[1] <=> $b->[1] or $a->[2] cmp $b->[2]} @datas;
    foreach my $data (@res) {
        my ($x, $y, $z) = @{$data};
        print "$x,$y$z\n";
    }
    
    __DATA__
    80,1p21
    81,19q13
    82,6p12.3
    83,Xp11.22
    84,3pter-q21
    86,3q26.33
    87,14q24.1-q24.2|14q24|14q22-q24
    88,1q42-q43
    89,11q13.1
    90,2q23-q24
    91,12q13
    92,2q22.3
    93,3p22
    94,12q11-q14
    95,3p21.1
    97,14q24.3
    98,2p16.2 
    
    0 讨论(0)
  • 2021-01-28 03:36

    If you read the documentation for sort, you'll see that you don't need to do a numeric sort in Perl. You can do string comparisons too.

    @sorted = sort { $a cmp $b } @unsorted;
    

    But that still leaves you with a problem as, for example, 19q will sort before 6p. So you can write your own sort function which can make whatever transformations you want before doing the comparison.

    @sorted = sort my_complex_sort @unsorted;
    
    sub my_complex_sort {
      # code that compares $a and $b and returns -1, 0 or 1 as appropriate
      # It's probably best in most cases to do the actual comparison using cmp or <=>
    
      # Extract the digits following the first comma
      my ($number_a) = $a =~ /,(\d+)/;
      my ($number_b) = $b =~ /,(\d+)/;
    
      # Extract the letter following those digits
      my ($letter_a) = $a =~ /,\d+(a-z)/;
      my ($letter_b) = $b =~ /,\d+(a-z)/;
    
      # Compare and return
      return $number_a <=> $number_b or $letter_a cmp $letter_b;
    }
    
    0 讨论(0)
提交回复
热议问题