-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTT_ranks_odd.pl
executable file
·62 lines (56 loc) · 1.23 KB
/
TT_ranks_odd.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/perl
use strict;
use locale;
# The script reads the list of tokens and outputs the rank list of
# types. The list is enriched with the counts of types for decimated
# sublists of tokens.
my ($bins,$tokens)=(shift,shift);
my %count;
my %countB;
my %Hcount;
my %HcountB;
my ($bin,$token)=(0,0);
while(<>){
chomp;
$Hcount{$_}++;
$HcountB{$_}[$bin]++;
$token++;
if($token>=$tokens){
$token=0;
$count{$_}++;
$countB{$_}[$bin]++;
$bin++;
if($bin>=$bins){
my ($mbin,$type);
for $type (keys %Hcount){
$count{$type}+=$Hcount{$type};
}
for $type (keys %HcountB){
for $mbin (0..$bins-1){
$countB{$type}[$mbin]+=$HcountB{$type}[$mbin];
}
}
%Hcount=();
%HcountB=();
$bin=0;
}
}
}
sub my_cmp{
my ($a,$b)=@_;
return -1 if($count{$a}>$count{$b});
return 1 if($count{$a}<$count{$b});
return -1 if($a lt $b);
return 1 if($a gt $b);
return 0;
}
my ($type,$rank);
foreach $type (sort {my_cmp($a,$b)} keys %count){
$rank++;
my ($var,$mbin);
for $mbin (0..$bins-1){
$var+=($count{$type}/$bins-$countB{$type}[$mbin])**2;
}
$var=$var*$bins/($bins-1);
print "$rank $count{$type} $type $var ",join(" ",@{$countB{$type}}),"\n";
}