#Usage mergeGenomatixTrSNPCounts.pl 129_Cast_3_61KGLAAXXTrSNPCounts.txt
use strict;
my $inputCounts= $ARGV[0];
my $expression_profile = $ARGV[1]; #sorted on symbol
my $out=$ARGV[0];
$out=~s/.txt/.genomatixSORT.tsv/;
$out=~s/SNPCountPerExp/SNPCountsGenomatix/;
my %inC;
my %genes;

#CACHE SNP statistics

open (INC,"<".$ARGV[0])||die "cannot open $ARGV[0]";
while (<INC>){
    chomp;
    my @line=split ("\t",$_);
    if (@line[3]){
	$inC{@line[0]}=[@line[1..$#line]];
    }
}
#foreach my $tr (keys %inC){
#    print $tr,"  ",join (" ",@{$inC{$tr}}),"\n";
#    exit;
#}

open (EXPRO,"<".$ARGV[1])||die "connot open $ARGV[1]";
open (OUT,">".$out)||die "cannot open $out";

my %genomatix;

while (<EXPRO>){
    chomp;
    my @line=split ("\t",$_);
    my ($transcript,$geneSymbol,$TranscriptLength)=@line[0,3,10];
    if ($transcript eq "TranscriptId"){
	print OUT $_,"\n";
	next;
    }
    
#    print $transcript,$geneSymbol,$TranscriptLength,"\n";
#    if ($TranscriptLength > $genes{$geneSymbol}[1]){
#	print $TranscriptLength,"is larger than ",$genes{$geneSymbol}[1],"\n";
#	$genes{$geneSymbol}=[$transcript,$TranscriptLength];

    $genomatix{$geneSymbol}{$transcript}=[@line[1,2,4..$#line]];

#    print join("\t",@{$genomatix{$geneSymbol}{$transcript}});
#    exit;
}

foreach my $geneSymbol (sort keys %genomatix){
    foreach my $transcript (sort keys %{$genomatix{$geneSymbol}}){
	my $elements=$#{$genomatix{$geneSymbol}{$transcript}};
#	print "elements ",$elements,"\n";
	print OUT $transcript,"\t",join("\t",@{$genomatix{$geneSymbol}{$transcript}}[0,1]),"\t",$geneSymbol,"\t",join("\t",@{$genomatix{$geneSymbol}{$transcript}}[2..$elements]);
    

	if ($inC{$transcript}){
	    print OUT "\t",join ("\t",@{$inC{$transcript}});
	}
	print OUT "\n";
    }
}
