#usage
#perl allelic3.pl ExperimentTrSNPCounts3.txt [optional list of transcripts] >Experiment.LOG
use strict;
open (IN,"<".$ARGV[0])||die "cannot open ".$ARGV[0]."\n";
open (RUK,">".$ARGV[0].".RUK")||die "cannot open ".$ARGV[0].".RUK\n";
open (MOEAHH,">".$ARGV[0].".MOEAHH")||die "cannot open ".$ARGV[0].".MOEAHH\n";
open (WAUW,">".$ARGV[0].".WAUW")||die "cannot open ".$ARGV[0].".WAUW\n";
open (SINGLESNP,">".$ARGV[0].".sSNP")||die "cannot open ".$ARGV[0].".sSNP\n";
open (STDEM,">".$ARGV[0].".STDEM")||die "cannot open ".$ARGV[0].".STDEM\n";

my $covSum=5;

my %optional;
if ($ARGV[1]){
    open (OPTIONAL,"<".$ARGV[1])||die "cannot open ".$ARGV[1]."\n";
    while (<OPTIONAL>){
	~m/^(\w+)\t(.+)$/;
	my ($tr, $data)=($1,$2);
	$optional{$tr}=$data;
    }
}
while (<IN>){
    chomp;
    my @SNPratios;
    my ($tr,$trRatio,$exSNPratio)=split ("\t",$_);
    my ($aCount,$bCount)=split ("/",$trRatio);

    #In case of a transcript list break out if not listed 
    next if ((! $optional{$tr})&&($ARGV[1]));

    print $tr,"\t",$optional{$tr},"\t",$trRatio,"\t",$exSNPratio;    
    #next if (($aCount<80)||($bCount<80)); #Hendrik's sanity requirement per transcript after summing experiments
    #next if (($aCount<5)&&($bCount<5)); #Hendrik's sanity requirement per transcript experiment
    #next if (($aCount<5)||($bCount<5)); #Hendrik's sanity requirement per transcript after summing experiments to test whether 80 is good cut-off and 5-80 cat. is bad
    if (($aCount<$covSum) || ($bCount<$covSum)){
	print "\tTrAlleleCount<$covSum\n";
	next;
    }
    my $STDEMdef="STDEM";
    
    if  (($aCount<5)||($bCount<5)){
	$STDEMdef="stdemLower80";
    }

    my @dtrRatio = decRatio ($trRatio);
    my @dSNPratios = decRatio ($exSNPratio);
    #ALSO print trs with single SNPs;
    if (scalar(@dSNPratios) == 0){
	print "\tSNPmajAllelcount<5\n";
    }
    if (scalar(@dSNPratios) == 1){
	my $line=$tr."\t".$trRatio."\t".$exSNPratio."\t".join (" ",@dtrRatio)."\t".join (" ",@dSNPratios)."\n";
	print SINGLESNP $line;
	print "\tSingleSNP\t",join (" ",@dtrRatio),"\t",join (" ",@dSNPratios),"\t-\t".scalar @dSNPratios."\n";
    }
    if (scalar(@dSNPratios) >= 2){
	my ($stddev, $stdem) = statistics (@dSNPratios);
	my $line = $tr."\t".$trRatio."\t".$exSNPratio."\t".join ("",@dtrRatio)."\t".join (" ",@dSNPratios)."\t".$stddev."\t".$stdem."\t".scalar @dSNPratios."\n";
	print STDEM $stdem,"\t",($aCount+$bCount),"\n" if ($optional{$tr});
	print "\t",$STDEMdef,"\t",join (" ",@dtrRatio),"\t",join (" ",@dSNPratios),"\t",$stdem,"\t",scalar @dSNPratios,"\n";
	if ($stdem > 0.1){
	    print RUK $line;
	}
	if (($stdem <= 0.1) && ($stdem > 0.05)){
	    print MOEAHH $line;
	}
	if ($stdem <= 0.05){
	    print WAUW $line;
	}
    }
}
close (IN); close (RUK) ; close (MOEAHH); close (WAUW); close (SINGLESNP); close (STDEM);

sub decRatio {
    my ($ratios) = @_;
    my @dSNPratios;
    while ($ratios =~m/(\d+)\/(\d+)/g){
	my @Scalls=sort {$a<=>$b} ($1,$2);
	if (@Scalls[1] >= 3){
	    my $dSNPratio = sprintf ("%.3f", @Scalls[1]/(@Scalls[0]+@Scalls[1]));
	    $dSNPratio = (1-$dSNPratio) if ($2 > $1);
	    push (@dSNPratios,$dSNPratio);
	}
    }
    return @dSNPratios;
                                                                                                                                                }

sub statistics {
    my (@numbers) = @_;
    my $stddev = standard_deviation(@numbers);
    my $stdem = $stddev / sqrt(scalar @numbers);
    return (sprintf ("%.3f",$stddev), sprintf ("%.3f", $stdem));
}

sub standard_deviation {
    my(@numbers) = @_;
    #Prevent division by 0 error in case you get junk data
    return undef unless(scalar(@numbers));

    # Step 1, find the mean of the numbers
    my $total1 = 0;
    foreach my $num (@numbers) {
	$total1 += $num;
    }
    my $mean1 = $total1 / (scalar @numbers);

    # Step 2, find the mean of the squares of the differences
    # between each number and the mean
    my $total2 = 0;
    foreach my $num (@numbers) {
	$total2 += ($mean1-$num)**2;
    }
    my $mean2 = $total2 / (scalar @numbers);#this is not (n-1) method used in office aplications
    
    # Step 3, standard deviation is the square root of the
    # above mean
    my $std_dev = sqrt($mean2);
    return $std_dev;
}
