I have a file which looks like this:
80,1p21
81,19q13
82,6p12.3
83,Xp11.22
84,3pter-q21
86,3q26.33
87,14q24.1-q24.2|14q24|14q22-q24
88,1q42-q43
8
I actually found the answer to this. The code looks a bit complicated though.
#!/usr/bin/env perl
use strict;
use warnings;
sub main {
my $file;
if (@ARGV != 1) {
die "Usage: perl hashofhash_sort.pl <filename>\n";
}
else {
$file = $ARGV[0];
}
open(IN, $file) or die "Error!! Cannot open the $file file: $!\n";
my @file = <IN>;
chomp @file;
my ($entrez_gene, $loci, $chr, $band, $pq, $band_num);
my (%chromosome, %loci_entrez);
foreach my $line (@file) {
if ($line =~ /(\d+),(.+)/) {
# Entrez genes
$entrez_gene = $1;
# Locus like 12p23.4
$loci = $2;
if ($loci =~ /^(\d+)(.+)?/) {
# chromosome number alone (only numericals)
$chr = $1;
if ($2) {
# locus minus chromosome number. If 12p23.4, then $band is p23.4
$band = "$2";
if ($band =~ /^([pq])(.+)/) {
# either p or q
$pq = $1;
# stores the numericals. for p23.4, stores 23.4
$band_num = $2;
}
if (exists $chromosome{$chr}) {
if (exists $chromosome{$chr}{$pq}) {
push (@{$chromosome{$chr}{$pq}}, $band_num);
}
else {
$chromosome{$chr}{$pq} = [$band_num];
}
}
else {
$chromosome{$chr}{$pq} = [$band_num];
}
}
}
}
} # End of foreach loop
foreach my $key (sort {$a <=> $b} keys %chromosome) {
my %seen = ();
foreach my $key2 (sort {$a cmp $b } keys %{$chromosome{$key}}) {
my @unique = grep { ! $seen{$_}++ } @{$chromosome{$key}{$key2}};
my @sorted = sort @unique;
foreach my $element (@sorted) {
my $sorted_locus = "$key$key2$element";
if (exists $loci_entrez{$sorted_locus}) {
foreach my $element2 (@{$loci_entrez{$sorted_locus}}) {
print "$element2,$sorted_locus\n";
}
}
}
}
}
} # End of main
main();
#!/usr/bin/env perl
use strict;
use warnings;
my @datas = map { /^(\d+),(\d*)(.*)$/; [$1, $2, $3]; } <DATA>;
my @res = sort {$a->[1] <=> $b->[1] or $a->[2] cmp $b->[2]} @datas;
foreach my $data (@res) {
my ($x, $y, $z) = @{$data};
print "$x,$y$z\n";
}
__DATA__
80,1p21
81,19q13
82,6p12.3
83,Xp11.22
84,3pter-q21
86,3q26.33
87,14q24.1-q24.2|14q24|14q22-q24
88,1q42-q43
89,11q13.1
90,2q23-q24
91,12q13
92,2q22.3
93,3p22
94,12q11-q14
95,3p21.1
97,14q24.3
98,2p16.2
If you read the documentation for sort, you'll see that you don't need to do a numeric sort in Perl. You can do string comparisons too.
@sorted = sort { $a cmp $b } @unsorted;
But that still leaves you with a problem as, for example, 19q will sort before 6p. So you can write your own sort function which can make whatever transformations you want before doing the comparison.
@sorted = sort my_complex_sort @unsorted;
sub my_complex_sort {
# code that compares $a and $b and returns -1, 0 or 1 as appropriate
# It's probably best in most cases to do the actual comparison using cmp or <=>
# Extract the digits following the first comma
my ($number_a) = $a =~ /,(\d+)/;
my ($number_b) = $b =~ /,(\d+)/;
# Extract the letter following those digits
my ($letter_a) = $a =~ /,\d+(a-z)/;
my ($letter_b) = $b =~ /,\d+(a-z)/;
# Compare and return
return $number_a <=> $number_b or $letter_a cmp $letter_b;
}