// This program takes a set of hybridizations for genotyping across a known CNV and 
//separates the sample into two groups, one corres[ponding to the carrier and the second 
//corresponding to the non-carrierer.
//Individuals that are homozygous across all markers are concidered.

//4/5 added analysis of signal in hets and between GTs 
//6/3 Smaal changes to allow comments in input file, make intensity output optional and flag indiv with all missing gt.
//6/4 Analyzes Multiple CNVs version 0.7;
//6/8 Fixed monomorphic SNPs in border calculations; minor other fixes
//7/15 Removed bug that read in SNPs one position off
//7/18 Removed bug that skipped some permutations of borders
//7/24 Removed bug in a calculation that stopped on underflow & reduced chance of underflow
//7/28 Reduced computation time for large CNVs; allowed input of only one intensity
//8/1  Added flag that indicates if the imputed thing is actually a deletion
//8/11 Bug hunting in the new features
//8/12 More Bug Hunting; added the Qc option
//8/13 Screwed with the prior some more
//8/14 Redone underflow handling & Bug Hunt
//9/4 Fixed prior and output

#include <math.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

struct signal
{
  char * name;
  float *maxsig;    //intensity of the higher signal
  float *lrr;
  float *sigA;
  float *sigB;
  int *gt;			// genotyp 0 =AA 1= AB 2=BB 3=NC
  int kn;          //flag for known status (in heterozygotes 0 no hets in all snps, 1 hets somewhere,-1 if all NC
  float *inf;      // inferred state inf[0]=prob of baseline; inf[1]=prob of deletion
  //stuff for the haplotyping EM
  double sufreq;
  int mdc;//missing data counter
  int noh;//number of haplotypes
  int *hap;
  int *md;
  float prior;

};
struct cnv
{
	char *name;
	int type;
	int pstart;
	int pend;
	int sstart;
	int snps;
	int estart;
	int esnps;
	float *cprobs;
	float carr;
	float blr;
	int po;
	float initfr;

};



float prob(float signal,float mu, float sig);
float logprob(float signal,float mu, float sig);
double RandDbl(void);
float calc_bl(struct signal * data,int indiv,int start,int end);

/*******Memory handling***********/
struct cnv * makecnvarr(int d1,int d2);
void freecnvarr(struct cnv * data, int d1);
char ** make2carr(int,int);
void rem2carr(char **, int);
float ** make2arr(int d1,int d2);
void rem2arr(float **arr,int d1);
struct signal *makedarr(int d1,int d2);
void freedarr(struct signal * data, int d1);

float R2p=2.506628;//root of 2Pi
int Op=0;//Flag for the output of intermediate values during the EM
int Lrr=0;//Flag for analyzing the ratio -1: the Lrr is in the input file as well; 1: only Lrr is in the input file
int Bknown=0;//Flag for CNV borders known (0) or unknown(1)
int Gte_flag=0;//Flag whether to model genotyping error
int AddAn_flag=0;//Flag whether to do additional analysis
float Prior=0.0;
int Colsk=0;
int V=0;
int Outlieflag=0;
float Qc=1.0;
float Version=0.710;

  main(int argc, char * argv[])
{
  float runem(int steps,struct signal * data,int indiv, int snps, int, int,float *,int, int *,float *,float *);
  float results(int indiv,struct signal * data, int snps);
  void summout(char * sumfile,struct cnv *cnvs,int indiv,int nocnv,char **);

  void threshold(int indiv,struct signal * data,float thresh);
  void readdata(int indiv,struct signal *data,int nsnps,char * infile,int);
  struct cnv * readpos(char *posfile,int *nocnv,int indiv,char *mapfile);
  void calc_stuff(struct signal * res, int snps, int indiv);

  void subsample(struct signal *data,struct signal *sdat,int indiv,int snps,int subs);
  void subsample2(struct signal *data,struct signal *sdat,int indiv,int sind, int snps,float);

  float compare(struct signal *data,struct signal *sdat, int indiv);
  float compare2(struct signal *data,struct signal *sdat, int indiv, int sind);

  void calcss(float * results, int noe);
  void icl(int noa, char *argum[],int *indiv, int *snps, int *subs,int *steps, int *reps,int *,
	   char * infile, float *,int *,float *,int *,int *,char *,char*,int *);
  void initial_out(int seed, int indiv,int snps,int steps, int lrr_flag, float thresh, int subs,int reps);
  float bound_an(int steps,struct signal * data,int indiv, int snps,int lrr_flag,int *, int *,int, int *,float *);
  int remove_empty_markers(struct signal *data, int snps,int indiv);

  int i,j,k;
  struct signal * data;
  struct cnv * cnvs;
 
  char ** names=NULL;
  struct signal * sdat;
  int indiv=4001;
  int sind=0;
  int snps=12;
  int subs=0;
  int steps=30;
  char infile[100]="input.csv";
  char posfile[100]="nofile";
  char mapfile[100]="nofile";
  char sumfile[30]="sumout.";
  int fm=1,lm=0;
  int nocnv=1;
  int reps=100;
  float *diff;
  int seed=time(NULL); 
  float cnvfreq=0.0435;
  float noc;
  int lrr_flag=0;
  float thresh=0.0;
  int best_start,best_end;
  float noca,blr;
  int minl;
  int scr;
  float initfr;

  printf("CNVEM Version %.3f\n",Version);
  srand(seed);

  icl(argc,argv,&indiv, &snps,&subs,&steps,&reps,&sind,infile,&cnvfreq,&lrr_flag,&thresh,&fm,&lm,posfile,mapfile,&minl);
  initial_out(seed, indiv, snps, steps, lrr_flag,  thresh, subs,reps);

  if(strcmp(posfile,"nofile")!=0)
    {
      cnvs=readpos(posfile, &nocnv,indiv,mapfile);
      strcat(sumfile,posfile);
    }
  for(j=0;j<nocnv;++j)
    {
      printf("\nCNV %d:\n\n",j+1);
      snps=lm-fm+1;
      if(strcmp(posfile,"nofile")!=0)
	{
	  fm=cnvs[j].sstart;
	  snps=cnvs[j].snps;	
	  lm=fm+snps-1;
	}
      if(snps>0)
	{
	  data=makedarr(indiv,snps);
 	
	  printf("Memory allocated\n");

	  //read in data
	  readdata(indiv,data,snps,infile,fm);
	  if(names==NULL && strcmp(posfile,"nofile")!=0)
	    {
	      names=make2carr(indiv,30);
	      for(k=0;k<indiv;++k)
		strcpy(names[k],data[k].name);
	    }
	  
	  snps=remove_empty_markers(data,snps,indiv);

	  printf("Data read in; %d individuals %d markers %d -%d \n",indiv,snps,fm,lm);
	  if(snps>0)
	    {
	      if(Bknown==0 || minl>=snps)
		{
		  //blr=runem(steps,data,indiv,snps,0,snps,&noc,lrr_flag,&scr,&initfr);
		  blr=bound_an(steps,data,indiv, snps,lrr_flag,&best_start,&best_end,snps, &scr,&initfr);
		  best_start=0;
		  best_end=snps;
		  blr-=calc_bl(data,indiv,0,snps);
		}
	      else
		blr=bound_an(steps,data,indiv, snps,lrr_flag,&best_start,&best_end,minl, &scr,&initfr);

	      printf("EM finished\n");

	      if(thresh>0.0)
		threshold(indiv,data,thresh);

	      noca=results(indiv,data,snps);
 
	      if(AddAn_flag>0)
		calc_stuff(data,snps,indiv);

	      if(strcmp(posfile,"nofile")!=0)

		if(nocnv>0)
		  {
		    cnvs[j].estart=best_start;
		    cnvs[j].esnps=best_end-best_start;
		    cnvs[j].carr=noca;
		    cnvs[j].blr=blr;
		    for(k=0;k<indiv;++k)
		      cnvs[j].cprobs[k]=data[k].inf[1];
		    cnvs[j].po=scr;
		    cnvs[j].initfr=initfr;
		  }
		else
		  {
		    cnvs[j].estart=0;
		    cnvs[j].esnps=0;
		    cnvs[j].carr=0.0;
		    cnvs[j].blr=0.0;
		  }

	      /*********************End of main calculations******************************/
 
	      //Subsampling

	      if(subs>0)
		{
		  threshold(indiv,data,0.8);
		  diff=malloc(reps*sizeof(float));  
		  sdat=makedarr(indiv,subs);
		  printf("subsampling\n");
		  for(i=0;i<reps;++i)
		    {
		      subsample(data,sdat,indiv,snps,subs);
		      bound_an(steps,data,indiv, snps,lrr_flag,&best_start,&best_end,snps, &scr,&initfr);
		      //runem(steps,sdat,indiv,subs,0,subs,&noc,lrr_flag,&scr,&initfr);
		      if(thresh>0.0)
			threshold(indiv,sdat,thresh);
		      diff[i]=compare(data,sdat,indiv);
		      printf("Simulated dataset %d:\t%f\n",i,diff[i]);
		    }
		  calcss(diff, reps);
		  free(diff);
		  freedarr(sdat,indiv);
		}
	      if (sind>0) 
		{
		  threshold(indiv,data,0.8);
		  diff=malloc(reps*sizeof(float));  
		  sdat=makedarr(sind,snps);
		  printf("subsampling\n");
		  for(i=0;i<reps;++i)
		    {
		      subsample2(data,sdat,indiv,sind,snps,cnvfreq);

		      //runem(steps,sdat,sind,snps,0,snps,&noc,lrr_flag,&scr,&initfr);
		      bound_an(steps,data,indiv, snps,lrr_flag,&best_start,&best_end,snps, &scr,&initfr);
		      if(thresh>0.0)
			threshold(sind,sdat,thresh);
		      diff[i]=compare2(data,sdat,indiv,sind);
		      printf("Simulated dataset %d:\t%f\n",i,diff[i]);
		    }
		  calcss(diff, reps);

		  free(diff);
		  freedarr(sdat,sind);
		} 
  
  
	      freedarr(data,indiv);
	    }
	}
    }
 
  if(strcmp(posfile,"nofile")!=0)
    { 
      summout(sumfile,cnvs,indiv,nocnv,names);
      rem2carr(names,indiv);
      freecnvarr(cnvs,nocnv);
    }
  printf("Program terminating\n");
  return(0);
}

/**************************************************************/
/*****************end of main**********************************/
/**************************************************************/

float runem(int steps,struct signal * data,int indiv, int snps,int start, int end, float * noc,
	    int lrr_flag, int *rm,float *prfreq,float * estf)
{
  float initialize_em(int indiv, int snps, struct signal * data,int start, int end, float* prior,
		     float*gte_prior,float *,float *);
  float estimate_status(int indiv, float ** mu, float ** var, struct signal *data,int start, 
			int end,int lrr_flag, float* prior);
  void estimate_distrib(int indiv, float ** mu, float ** var, struct signal * data,int start, int end,float);
  void estimate_distrib_lrr(int indiv, int snps, float ** mu, float ** var, struct signal * data,int start, int end);
  double calculate_lik(int indiv, int snps, float ** mu, float ** var, struct signal * data,int start, int end,int lrr_flag);
  float estimate_status_gte(int indiv, float ** mu, float ** var, struct signal *data, int start, int end, 
			    int lrr_flag, float* prior, float *gte_prior);
  int sanity_check(int indiv, int start, int end,struct signal * data,float ** mu);

  float **mu;
  float **var;
  int i,j,k;
  double ll;
  float prior[2],gte_prior[2];

  float rn;
  float sc;
  float defv;//default variance

  mu=make2arr(snps,4);
  var=make2arr(snps,4);

  defv=initialize_em(indiv,snps,data,start,end,prior,gte_prior,prfreq,estf);

  *rm=0;
  do 
    {
      for(i=0;i<steps;++i)
	{
	  if(lrr_flag==0)     		
	    estimate_distrib(indiv,mu,var,data,start,end,defv);

	  else
	    estimate_distrib_lrr(indiv,snps,mu,var,data,start,end);

	  if(Gte_flag==0)      
	    *noc=estimate_status(indiv,mu,var,data,start, end,lrr_flag,prior);

	  else
	    *noc=estimate_status_gte(indiv,mu,var,data,start, end,lrr_flag,prior,gte_prior);
      
	  //ll=calculate_lik(indiv,snps,mu,var,data,start, end,lrr_flag);
	
	  if(Op==1)
	    {
	      ll=calculate_lik(indiv,snps,mu,var,data,start, end,lrr_flag);
	      printf("Step %d  loglikelihood %e carriers %.2f\n",i,ll,*noc);
	    }
	} //printf("test1\n");

      sc=sanity_check(indiv,start, end,data,mu);

      if(sc==1 && Outlieflag==1)
	{
	  *rm=0;
	  for(i=0;i<indiv;++i)
	    if(data[i].inf[1]>0.5)
	      {
		data[i].kn=-1;
		printf("%s removed\n",data[i].name);
		++*rm;
			
	      }
	    else
	      if(data[i].kn==0)
		{
		  rn=RandDbl();	
		  data[i].inf[0]=rn;
		  data[i].inf[1]=1-rn;
		}
	  printf("%d outliers removed\n",*rm);
	}
      if(sc==1 && Outlieflag==0)
	{
	  *rm=1;
	  sc=0;
	}
    }
  while(sc==1);

  ll=calculate_lik(indiv,snps,mu,var,data,start, end,lrr_flag);


  //printf("test2\n");
  rem2arr(mu,snps);
  rem2arr(var,snps);

  return (ll);
}
/***************************************************************************************/
float initialize_em(int indiv, int snps, struct signal * data, int start, int end,float * prior,
		   float * gte_prior,float *meanf,float * estf)
{
  int i,k,c=0;
  float rn;
  int sh=0;
  float pofc; 
  int mdc;
  int count;
  int rcount=0;
  float hemf;
  float ret=0.0;
  float ms=0.0;

  //estimate frequency from HWE

  *meanf=0.0;
  printf("%d-%d\n",start,end);
  for(i=start;i<end;++i)
    *meanf+=estf[i];
  *meanf=*meanf/(end-start);
  //set prior
  count=0;
  //identify obligate non-carriers 
  for(k=0;k<indiv;++k)
    {
      data[k].kn=0;
      mdc=0;
      for(i=start;i<end;++i)
	{
	  if(data[k].gt[i]==1)
	    data[k].kn++;
	  if(data[k].gt[i]==3)
	    mdc++;
	}
      if(mdc==end-start)
	{
	data[k].kn=-1;
	rcount++;
	}
      for(i=start;i<end;++i)
	if(data[k].gt[i]==0 || data[k].gt[i]==2)
	  {
	  ms+=data[k].maxsig[i];
	  ret+=pow(data[k].maxsig[i],2);
	  ++count;
	  }
    }
  ret=ret/count-pow(ms/count,2);

  //assign random carrier status to everyone else
  count=0;
  for(i=0;i<indiv;++i) 
    {
      if(data[i].kn==0)
	{
	  rn=RandDbl();	
	  data[i].inf[0]=rn;
	  data[i].inf[1]=1-rn;
	  ++c;	  
	}
      else      
	{
	  data[i].inf[0]=1.0;
	  data[i].inf[1]=0.0;
	}
      if(data[i].kn==1)
	++sh;
      if(data[i].kn>=0)
	++count;
    }
  printf("%d individuals with unknown deletion status\n",c);
  printf("%d individuals removed for missing data\n",rcount);
  if(Op==1)
    {
      printf("%d individuals with unknown deletion status\n",c);
      if(Gte_flag==1)
	printf("%d single heterozygotes\n",sh);
    }
  //we may wantto modify those according to c and sh 
  if(Prior>0)
    {
      prior[1]=(Prior*count)/c;
      if(prior[1]>0.5)
	prior[1]=0.5;
    }
  else
    {
      hemf=2*(*meanf)*(1-*meanf);
      prior[1]=(hemf*count)/c;
      //printf("%f %f %f\n",prior[1],*meanf,(float)c/count);
      if(hemf<0.01)prior[1]=(0.01*count)/c;
      if(prior[1]>0.5)prior[1]=0.5;

    }
  prior[0]=(1-prior[1]);
  printf("Modified priors: %f %f\n",prior[0],prior[1]);
  pofc=prior[1]*c/pow(0.99,end-start);


  gte_prior[1]=pow(0.99,end-start-1)*0.01*(end-start)*pofc/sh;
  gte_prior[0]=1-gte_prior[1];

  //printf("Priors: %f  %f\n",gte_prior[0],gte_prior[1]);
  return;
}
/**************************************************************************/
int sanity_check(int indiv, int start, int end,struct signal * data,float ** mu)
{
  float asd,asb;
  float c[2],as[2];
  int ret=0;
  int i,j,k;


  for(k=0;k<2;++k)
    {
      as[k]=0;c[k]=0;
      for(i=0;i<indiv;++i)
	if(data[i].kn>=0)
	  for(j=start;j<end;++j)
	    if(data[i].gt[j]==0 ||data[i].gt[j]==2)		
	      {
		as[k]+=data[i].maxsig[j]*data[i].inf[k];
		c[k]+=data[i].inf[k];
	      }
		
      if(c[k]>0)
	as[k]=as[k]/c[k];
    }

  if(as[1]>as[0]&&c[1]>0.0001)
    {
      printf("Sanity check failed:\t");
      for(k=0;k<2;++k)
	printf("%d %f %f\t",k,as[k],c[k]);
      printf("\n");
      ret=1;
    }
  return(ret);
}
/*****************************************************************************************/
float estimate_status(int indiv, float ** mu, float ** var, struct signal *data, int start, int end, int lrr_flag, float * prior)
{
  int i,j,k;
  double  lik[2];
  float ret=0.0;
  float lprior[2];

  for(i=0;i<2;++i)
    lprior[i]=log(prior[i]);

  for(i=0;i<indiv;++i)
    if(data[i].kn==0)
      {
	lik[0]=lprior[0];lik[1]=lprior[1];
      
	for(j=start;j<end;++j)
	  for(k=0;k<2;++k)
	    if(data[i].gt[j]!=3)
	      {
	      if(lrr_flag==0)
		
		lik[k]+=logprob(data[i].maxsig[j],mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]]);
	      else
		lik[k]+=logprob(data[i].lrr[j],mu[0][k],var[0][k])+lprior[k];
		if(lik[k]>-1e50)
		  ;
		else
		  printf("Problem: %d %d %f %f %f=>%f\n",j,k,data[i].maxsig[j],mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]],logprob(data[i].maxsig[j],mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]]));
		}
	for(k=0;k<2;++k)
	  lik[k]=exp(lik[k]);

	if(lik[1]>=0 && lik[0]>=0)
	  if(lik[0]+lik[1]>0)
	    lik[0]=lik[0]/(lik[1]+lik[0]);
	  else
	    {
	      printf("Underflow error in estimate_status\n");
	      lik[0]=0.5;
	    }
	
	else
	  {	
	    printf("ERROR:lik1 lik2 %f %f\n",lik[0],lik[1]);
	    for(j=start;j<end;++j)
	      for(k=0;k<2;++k)
		printf("%d %d %f %f\n",j,k,mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]]);
	    exit(8);
	  }
	data[i].inf[0]=lik[0];
	data[i].inf[1]=1-lik[0];
	ret+=data[i].inf[1];
      }
  
  return(ret);
}

/****************************************************************************/
float estimate_status_gte(int indiv, float ** mu, float ** var, struct signal *data, int start, int end, int lrr_flag,float *prior,float * gte_prior)
{
  int i,j,k;
  double  lik[2];
  float ret=0.0;
  float lprior[2];
  float l_gte_prior[2];
  
  for(i=0;i<2;++i)
    {
      lprior[i]=log(prior[i]);
      l_gte_prior[i]=log(gte_prior[i]);
    }

  for(i=0;i<indiv;++i)
    if(data[i].kn<2 && data[i].kn>=0)
      {
		  if(data[i].kn==0)
			  for(k=0;k<2;++k)
				lik[k]=lprior[k];
      
		  else
			for(k=0;k<2;++k)
				lik[k]=l_gte_prior[k];

	for(j=start;j<end;++j)
	  if(data[i].gt[j]!=3 &&data[i].gt[j]!=1)
	      for(k=0;k<2;++k)			
		if(lrr_flag==0)	      
		  lik[k]+=logprob(data[i].maxsig[j],mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]]);
		else
		  lik[k]+=logprob(data[i].lrr[j],mu[0][k],var[0][k]);

	for(k=0;k<2;++k)
	  lik[k]=exp(lik[k]);

	if(lik[1]>=0 && lik[0]>=0)
	  if(lik[0]+lik[1]>0)
	    lik[0]=lik[0]/(lik[1]+lik[0]);
	  else
	    {
	      printf("Underflow error in estimate_status_gte\n");
	      lik[0]=0.5;
	    }
	
	else
	  {	
	    printf("ERROR:l1 l2 %f %f\n",lik[0],lik[1]);
	    for(j=start;j<end;++j)
	      for(k=0;k<2;++k)
		printf("%d %d %f %f\n",j,k,mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]]);
	    exit(8);
	  }
		
	data[i].inf[0]=lik[0];
	data[i].inf[1]=1-lik[0];
	//printf("%f %f\n",data[i].inf[0],data[i].inf[1]);
	ret+=data[i].inf[1];
      }
  
  return(ret);
}

/****************************************************************************/
void estimate_distrib(int indiv, float ** mu, float ** var, struct signal * data,int start, int end,float defv)
{
  void fix_variances(float **c,float ** var,int m,float defv);

  int i,j,k,m;
  float **c;
  float ac;

c=malloc(end*sizeof(float*));
for(j=0;j<end;++j)
c[j]=malloc(4*sizeof(float));

  for(j=start;j<end;++j)
    {
		
      for(k=0;k<2;++k)
	{
	  mu[j][k]=0.0;	mu[j][k+2]=0.0;
	  var[j][k]=0.0;	var[j][k+2]=0.0;
	  c[j][k]=0.0;c[j][2+k]=0.0;

	  for(i=0;i<indiv;++i)
	    if((data[i].gt[j]==0||data[i].gt[j]==2)&&data[i].kn>=0)
	      {
		mu[j][k+data[i].gt[j]]+=data[i].maxsig[j]*data[i].inf[k];	      
		c[j][data[i].gt[j]+k]+=data[i].inf[k];
	      }
	  if(c[j][0+k]>0)
	    mu[j][k]=mu[j][k]/c[j][k];
	  if(c[j][2+k]>0)
	    mu[j][k+2]=mu[j][k+2]/c[j][2+k];

	  for(i=0;i<indiv;++i)
	    if(data[i].gt[j]==0||data[i].gt[j]==2&&data[i].kn>=0)
	      var[j][k+data[i].gt[j]]+=pow(data[i].maxsig[j]- mu[j][k+data[i].gt[j]],2)*data[i].inf[k];

	  if(c[j][k]>1)
	    var[j][k]=sqrt(var[j][k]/(c[j][k]-1));
	  else
	    var[j][k]=0.0;
	  if(c[j][2+k]>1)
	    var[j][k+2]=sqrt(var[j][k+2]/(c[j][2+k]-1));
	  else
	    var[j][k+2]=0.0;
	}

      //Variance of rare observation cannot be less then 1/3 of other variance
      fix_variances(c,var,j,defv);
    
    }	
  if(Op==1)
    for(k=0;k<2;++k)	     
	{
		printf("State %d Mean: ",k);
	for(j=start;j<end;++j)	
	  printf("%d %.2f %.2f ",j,mu[j][k],mu[j][k+2]);

	printf("\n");
	printf("State %d stdvs: ",k); 
	for(j=start;j<end;++j)
	  printf("%d %.2f %.2f ",j,var[j][k],var[j][k+2]);
	printf("\n");
	printf("Stade %d Counts: ",k);
for(j=start;j<end;++j)
	  printf("%d %.2f %.2f ",j,c[j][k],c[j][k+2]);
printf("\n");
      }
  for(j=0;j<end;++j)
	  free(c[j]);
  free(c);
  return;
}
/*****************************************************************************/
void fix_variances(float **c,float ** var,int m,float defv)
{

  int relf[3][2];
  int i,j;
  int relt =10;
  float fac=0.3;


  /* for(i=0;i<4;++i)
     printf ("%d %f\t%f\n",m,c[i],var[m][i]);*/
  for(i=0;i<3;i+=2)
    for(j=0;j<2;++j)
      if(c[m][i+j]>=relt)
	relf[i][j]=1;
      else 
	relf[i][j]=0;
    
  
  if(relf[0][0]+relf[2][0]==0)
    {
      printf("Very little genotype information at marker %d\n",m);
      printf("Setting default variance\n");

      for(i=0;i<3;i+=2)
	for(j=0;j<2;++j)
	  var[m][j+i]=defv;
    }
  else
    {
      if(relf[2][0]==0 && var[m][2]<fac*var[m][0])
	var[m][2]=fac*var[m][0];

      if(relf[0][0]==0 && var[m][0]<fac* var[m][2])
	var[m][0]=fac* var[m][2];


      if(relf[0][1]==0 && var[m][1]<fac*var[m][0])
	if(relf[0][0]==1)
	  var[m][1]=fac*var[m][0];
	else
	  var[m][1]=fac*var[m][2];

      if(relf[2][1]==0 && var[m][3]<fac*var[m][2])
	if(relf[2][0]==1)
	  var[m][3]=fac*var[m][2];
	else
	  var[m][3]=fac*var[m][0];

    }
  /*for(i=0;i<4;++i)
    printf ("out: %d %f\t%f\n",m,c[i],var[m][i]);*/
  return;
}
/********************************************************************************/
void estimate_distrib_lrr(int indiv, int snps, float ** mu, float ** var, struct signal * data,int start, int end)
{
	 int i,j,k;
  float c;

 
    for(k=0;k<2;++k)
      {
	mu[0][k]=0.0;
	var[0][k]=0.0;
	c=0.0;

	for(i=0;i<indiv;++i)
		 for(j=start;j<end;++j)
	  if(data[i].gt[j]==0||data[i].gt[j]==2&&data[i].kn>=0)
	    {
	      mu[0][k]+=data[i].lrr[j]*data[i].inf[k];
	      var[0][k]+=pow(data[i].lrr[j],2)*data[i].inf[k];
	      c+=data[i].inf[k];
	    }
	if(c>0)
	{
	mu[0][k]=mu[0][k]/c;
	var[0][k]=sqrt(var[0][k]/c-pow(mu[0][k],2));
	}
	else
		var[0][k]=1e-10;
	}
	if(Op==1)
	{
	  /*for(k=0;k<2;++k)	
	    printf("%d %.2f %.2f \n",j,mu[0][k],var[0][k]);*/

      printf("\n");
	}
 
  return;
}
/****************************************************************************/
double calculate_lik(int indiv, int snps, float ** mu, float ** var, struct signal * data, int start, int end,int lrr_flag)
//this function calculates the fit of the observed hybridization intensity to the inferred carrier status.
{
  int i,j,k;
  double ret=0.0;
  double sum;
  float count[4];
  float ts;
  
  for(j=start;j<end;++j)   
    {
      for(i=0;i<4;++i)
		  count[i]=0;
      for(i=0;i<indiv;++i)
    if(data[i].kn>=0)

	if(data[i].gt[j]!=1&&data[i].gt[j]!=3)
	  {
	    sum=0.0;
		ts=0.0;
	    for(k=0;k<2;++k)
	      {

			  if(lrr_flag==0)		
				  sum+=data[i].inf[k]*prob(data[i].maxsig[j],mu[j][k+data[i].gt[j]],var[j][k+data[i].gt[j]]);
			  else
				  sum+=data[i].inf[k]*prob(data[i].lrr[j],mu[0][k],var[0][k]);
			  //printf("%f %f %f %f %f %e\n",sum,data[i].inf[k],prob(data[i].lrr[j],mu[0][k],var[0][k]),data[i].lrr[j],mu[0][k],var[0][k]);		
			  count[k+data[i].gt[j]]+=data[i].inf[k];
		//printf("%f %f %f\n",ret,data[i].inf[k],prob(data[i].maxsig[j],mu[j][k],var[j][k]));
		ts+=data[i].inf[k];
	      }
	    if(sum==0.0||ts<0.99 ||ts>1.01)
	      {
			  printf("Warning: Error in calculate_lik (sum=%e, ts=%f), count= %d %d\n",sum,ts,count[0],count[1],count[2],count[3]);			
			  //exit(8);
	      }
		else
	    ret+=log(sum);
	  }
	if(V==1)
      for(k=0;k<4;k+=2)
      printf("%marker %d: genotype %d count %.3f  %.3f mu %.3f %.3f sig %.4f %.4f \n",j,k,count[k],count[k+1], mu[j][k],mu[j][k+1],var[j][k],var[j][k+1]); 
    }
  
  return ret;
}
/****************************************************************************/
double correct_lik(int indiv, struct signal * data, int start, int end,float ll)
//this function corrects the likelihood for lots of missing data through removed outliers.
{
  int i,j,k;
  double ret=0.0;
  int c1=0,c2=0;

  for(j=start;j<end;++j)   
    
      for(i=0;i<indiv;++i)
    
	if(data[i].gt[j]!=1&&data[i].gt[j]!=3)
	  if(data[i].kn>=0)
		  ++c1;
	  else
++c2;
printf ("correction factor %f\n",1.0+(float)c2/c1);
  ret=ll/c1*(c2+c1);
  
  return ret;
  }
/***********************************************************************/
/*               Finding Boundaries                                    */
/***********************************************************************/
float bound_an(int steps,struct signal * data,int indiv, int snps,int lrr_flag,int *bs, int *be, int minl,int * rsc,float *initfr)
{
  struct conf{
    float ll;
    int start;
    int end;
    float *cs;
    float noc;
    int sc;//sanity check; sc 1 indicates no deletion
  };

  void calc_estf(int indiv,int start,int end,struct signal *data, float *estf);

  int b,e,i,j;
	
  float bl;
  int max=0;
  float err;
  struct conf * all;
  int noe=0;
  float ret;
  int s1,s2;
  int maxs=400;
  float * estf;

  estf=malloc(snps*sizeof(float));
  calc_estf(indiv,0,snps,data, estf);

  noe=(snps-minl+1)*(snps-minl+2)/2;

  printf("Maximizing Boundaries, minimum length %d, number of elements %d\n",minl,noe);

  //reducing number of computations
  j=0;
  s1=1;s2=1;
  while(noe>maxs)
    {
      noe=noe/2;
      ++j;
    }
	
  while(j>0)
    {	
      if(j>0)
	{	
	  --j;
	  ++s1;
	}
      if(j>0)
	{
	  --j;
	  ++s2;
	}
    }

  if(s1>1)
    printf("To reduce computation, only rough borders are calculated, reduced iterations to steps of  %d %d\n",s1,s2);
  noe=0;
  for(b=0;b<snps-minl+1;b+=s1)
    for(e=b+minl;e<=snps;e+=s2)
      ++noe;
  //printf("testing %d %d\n",noe,j);


  //exit(8);

  all=malloc((noe+1)*sizeof(struct conf));
  for(i=0;i<=noe;++i)
    all[i].cs=malloc(indiv*sizeof(float));

  bl=calc_bl(data,indiv,0,snps);
  printf("Baseline %e\n",bl);

  all[0].start=0;
  all[0].end=snps;
  all[0].ll=runem(steps,data,indiv, snps,0,snps,&all[0].noc,lrr_flag,&all[0].sc,initfr,estf);
	
  for(j=0;j<indiv;++j)
    all[0].cs[j]=data[j].inf[0];
  printf("Full length %e\n",all[0].ll);

  i=1;
  for(b=0;b<snps-minl+1;b+=s1)
    for(e=b+minl;e<=snps;e+=s2)
      {
	all[i].start=b;
	all[i].end=e;
	all[i].ll=runem(steps,data,indiv, snps,b,e,&all[i].noc,lrr_flag,&all[i].sc,initfr,estf);

	all[i].ll+=calc_bl(data,indiv,0,b);		
	all[i].ll+=calc_bl(data,indiv,e,snps);
	if(Outlieflag>0)
	  all[i].ll=correct_lik(indiv, data, 0, snps,all[i].ll);
	if(all[i].noc<0.01 && all[i].ll>bl+100)
	  {
	    V=1;
	    printf("This is more than weird error\n");
	    printf("ll %e > bl %e\n",all[i].ll,bl);
	    printf("noc=%e\n%d-%d\n",all[i].noc,b,e);
	    /*printf("ll1=%e\n",calc_bl(data,indiv,0,b));
	      printf("ll2=%e\n",runem(steps,data,indiv, snps,b,e,&all[i].noc,lrr_flag,&all[i].sc));
	      printf("ll3=%e\n",calc_bl(data,indiv,e,snps));*/
				
	  }
			
			
	for(j=0;j<indiv;++j)
	  all[i].cs[j]=data[j].inf[0];
	//printf(" Marker %d-%d ll %e\n",b,e,ll);	
	if (all[i].ll>all[max].ll)
	  max=i;	
	++i;	
      }

  printf("highest likelihood %e(%d) markers %d-%d\n",all[max].ll,all[max].sc,all[max].start,all[max].end);

  ret=all[max].ll-bl;
  for(i=0;i<noe;++i)
    {
      err=0.0;
      for(j=0;j<indiv;++j)
	err+=fabs(all[i].cs[j]-all[max].cs[j]);		
      printf("%d %d %e %e %f %f %d\n",all[i].start+1,all[i].end,all[i].ll,all[i].ll-bl,all[i].noc,err/all[max].noc,all[max].sc);
    }
  *bs=all[max].start;
  *be=all[max].end;
  if(noe>1)
    runem(steps,data,indiv, snps,all[max].start,all[max].end,&all[i].noc,lrr_flag,rsc,initfr,estf);

  for(i=0;i<noe;++i)
    free(all[i].cs);
  free(all);
  free(estf);
  return(ret);
}
/****************************************************************************/
void calc_estf(int indiv,int start,int end,struct signal *data, float *estf)
{
  int i,count,k;
  int gt[3];

  for(i=start;i<end;++i)
    {
      gt[0]=0;gt[1]=0;gt[2]=0;count=0;
      for(k=0;k<indiv;++k)
	if(data[k].gt[i]<3)
	  {
	    ++gt[data[k].gt[i]];
	    ++count;
	  }
      if(count>0 &&gt[1]==0)
	{
	  ++gt[1];
	  ++count;
	}
      if(count>0)
	estf[i]=(float)gt[0]/count*(float)gt[2]/gt[1]-(float)gt[1]/(4.0*count);
		
      estf[i]=estf[i]/(1.0+estf[i]);
      printf("Estimated frequency Marker %d  %f\n",i,estf[i]);
		
    }
  return;
}
/*************************************************************************/
float calc_bl(struct signal * data,int indiv,int start,int end)//start is included, end is not
{
  float mu[3],var[3];
  int c[4];
  float ll=0.0;
  int i,j,k,tot;
  int mdtest=0;

  for(j=start;j<end;j++)
    {
      for(k=0;k<4;k++)
	{
	  mu[k]=0.0;
	  var[k]=0.0;	
	  c[k]=0;
	}
      tot=0;
      for(i=0;i<indiv;++i)
	if(data[i].kn>=0)
	  {
	    ++tot;	
	    c[data[i].gt[j]]++;	  
	    if(data[i].gt[j]==0||data[i].gt[j]==2)
	      {
		mu[data[i].gt[j]]+=data[i].maxsig[j];	      	      
		var[data[i].gt[j]]+=pow(data[i].maxsig[j],2);
	      }
	  }

      for(k=0;k<3;k+=2)    
	if(c[k]>0)
	  {
	    mu[k]=mu[k]/c[k];
	    if(var[k]/c[k]-pow(mu[k],2)>=0)
	      var[k]=sqrt(var[k]/c[k]-pow(mu[k],2));
	    else{
	      printf("variance error: %f %f\n",var[k]/c[k],pow(mu[k],2));
	      var[k]=0.0;
	    }
	    if(var[k]==0.0 && c[k]>1)
	      {
		printf("Variance =0; why?\n");

		for(i=0;i<indiv;++i)
		  if(data[i].kn>=0)
		    if(data[i].gt[j]==k)
		      printf("%d %f\n",i,data[i].maxsig[j]);
	      }
	    if(V==1 ||Op==1)
	      printf("Baseline calculations SNP %d: gt %d count %d mu %e sigma %e \n",j,k,c[k],mu[k],var[k]);
	  }
      //else		printf("calc_bl: Genotype %d not observed in marker %d\n",k,j);

      if(c[0]==1 || var[0]==0.0)
	var[0]=var[2];
      if(c[2]==1 || var[2]==0.0)
	var[2]=var[0];
      if(c[2]>1 ||c[0]>1)
	for(i=0;i<indiv;++i)	
	  if(data[i].gt[j]!=1 && data[i].gt[j]!=3)
	    {
	      ll+=logprob(data[i].maxsig[j],mu[data[i].gt[j]],var[data[i].gt[j]]);
	      if(ll>-1e50)
		;
	      else
		{
		  printf("Error in calculating likelihood for boundary configuration\n");
		  printf("marker %d individual %d %e\n",j+1,i+1,ll);
		  printf("Counts: Gt0 %d Gt1 %d Gt2 %d Gt3 %d =%d \n",c[0],c[1],c[2],c[3],tot);
		  printf("Genotype %d\tsignal %e\tmu %e\tsigma %e\tlik %e\n",data[i].gt[j],data[i].maxsig[j],mu[data[i].gt[j]],
			 var[data[i].gt[j]],prob(data[i].maxsig[j],mu[data[i].gt[j]],var[data[i].gt[j]]));
		  exit(8);
		}
	    }
    }
  return (ll);
}
/***********************************************************************/
/*                    Memory handling                                  */
/***********************************************************************/

float ** make2arr(int d1,int d2)
{
  float ** ret;
  int i;
  ret=malloc(d1*sizeof(float *));
  if(ret==NULL)
    {
      printf("Out of memory in make2arr\n");
      exit(8);
    }
  for(i=0;i<d1;++i)
    {
      ret[i]=malloc(d2*sizeof(float));
      if(ret[i]==NULL)
	{
	  printf("Out of memory in make2arr\n");
	  exit(8);
	}     
    }
  return (ret);
}
/*************************************************************************/
char ** make2carr(int d1,int d2)
{
  char ** ret;
  int i;
  ret=malloc(d1*sizeof(char *));
  if(ret==NULL)
    {
      printf("Out of memory in make2carr\n");
      exit(8);
    }
  for(i=0;i<d1;++i)
    {
      ret[i]=malloc(d2*sizeof(char));
      if(ret[i]==NULL)
	{
	  printf("Out of memory in make2carr\n");
	  exit(8);
	}     
    }
  return (ret);
}
/*************************************************************************/
void rem2arr(float **arr,int d1)
{
  int i;

  for(i=0;i<d1;++i)
    free (arr[i]);
	
  free(arr);
  return;
}
/*************************************************************************/
void rem2carr(char **arr,int d1)
{
  int i;

  for(i=0;i<d1;++i)
    free (arr[i]);
	
  free(arr);
  return;
}
/***********************************************************************/
struct signal *makedarr(int d1,int d2)
{
  int i;
  struct signal * ret;

  ret=malloc(d1*sizeof(struct signal));

  if(ret==NULL)
    {
      printf("Out of memory in makedarr\n");
      exit(8);
    }

  for(i=0;i<d1;++i)
    {
      ret[i].maxsig=malloc(d2*sizeof(float));
      ret[i].gt=malloc(d2*sizeof(int));
      ret[i].sigA=malloc(d2*sizeof(float));
      ret[i].sigB=malloc(d2*sizeof(float));
	  ret[i].lrr=malloc(d2*sizeof(float));
      ret[i].name=malloc(50*sizeof(char));
      ret[i].inf=malloc(2*sizeof(float));

      if(ret[i].maxsig==NULL||ret[i].gt==NULL||ret[i].sigA==NULL || ret[i].sigB==NULL ||ret[i].name==NULL || ret[i].inf==NULL)
	{
	  printf("Out of memory in makedarr\n");
	  exit(8);
	}
      ret[i].kn=0;
    }

  return(ret);
}
/*****************************************************************************/
struct cnv * makecnvarr(int d1,int d2)
{
	int i;
	struct cnv *ret;
	ret=malloc(d1*sizeof(struct cnv));

	if(ret==NULL)
    {
      printf("Out of memory in makecnvarr\n");
      exit(8);
    }
	for(i=0;i<d1;++i)
	{
      ret[i].cprobs=malloc(d2*sizeof(float));
	ret[i].name=malloc(30*sizeof(char));
	if(ret[i].cprobs==NULL||ret[i].name==NULL)
	{
	  printf("Out of memory in makecnvarr\n");
	  exit(8);
	}
	ret[i].pstart=0;
	ret[i].pend=0;
	ret[i].po=0;
	}

	return(ret);
}
/***************************************************************************/
void freedarr(struct signal * data, int d1)
{
  int i;

  for(i=0;i<d1;++i)
    {
      free(data[i].name);
	  free(data[i].gt);
      free(data[i].maxsig);
      free(data[i].inf);
	  free(data[i].lrr);
    }
  free(data);

  return;
}
/**********************************************************************/
void freecnvarr(struct cnv * data, int d1)
{
  int i;

  for(i=0;i<d1;++i)
      free(data[i].cprobs);
	
  free(data);

  return;
}
/**************************************************************************/
double RandDbl(void)
{
  return rand()/((double)RAND_MAX +1.0);
}
/***************************************************************************/
float prob(float signal,float mu, float sig)
{
  return (exp(-pow((signal-mu)/sig,2)/2)/(R2p*sig));
}
/***************************************************************************/
float logprob(float signal,float mu, float sig)
{
  return ((-pow((signal-mu)/sig,2)/2)-log(R2p*sig));
}
/***************************************************************************/
/*********************************I/O***************************************/
/***************************************************************************/
void initial_out(int seed, int indiv,int snps,int steps, int lrr_flag, float thresh, int subs,int reps)
{
printf("Seed: \t\t\t%d\n",seed);
printf("number of individuals\t%d\n",indiv);
printf("number of markers\t%d\n",snps);
printf("number of EM-steps\t%d\n\n",steps);
if(lrr_flag<0)
printf("analyzing LRR as well\n");
if(Gte_flag>0)
printf("Modelling genotyping error\n");
if(thresh>0.0)
printf("Calling threshold %.3f\n",thresh);

if(subs>0)
{
	printf("resampling:\n");
	printf("number of resampled SNPs\t%d\n",subs);
	printf("number of subsamples taken\t%d\n",reps);
}
}
/*************************************************************************/
void icl(int noa, char *argum[],int *indiv, int *snps, int *subs,int *steps, int *reps,int * sind,
	char * infile, float *cnvfreq, int *lrr_flag, float *thresh,int *first, int *last, char* posfile,char*mapfile,int *minl)
{
	void help_output(void);

  while((noa>1) && argum[1][0]=='-')
    {
      switch (argum[1][1])
	{
	  case 'F':
		  *first=atoi(&argum[1][2]);
		  break;	
	  case 'L':
		  *last=atoi(&argum[1][2]);
		  break;
	  case 'b':
		  strcpy(posfile,&argum[1][2]);	  
	  break;
	  case 'm'://Mapfile
		  strcpy(mapfile,&argum[1][2]);	  
	  break;
	case 'e':/*Rounds of EM*/
	  *steps=atoi(&argum[1][2]);
	  break;
	case 's':/*Sample Size*/
	  *indiv=atoi(&argum[1][2]);
	  break;
	case 'p':/*Number of SNPs*/
	  *snps=atoi(&argum[1][2]);
	  break;
	case 'u':/*number of subsampled snps*/
	  *subs=atoi(&argum[1][2]);
	  break;
	case 'r':/*subsampled sample size*/
		*sind=atoi(&argum[1][2]);
	  break;
	case 'n':/*number of subsamples taken*/
	  *reps=atoi(&argum[1][2]);
	  break;
	case 'S':/*silent EM off*/
	  Op=1;
	  break; 
	case 'i':/*inputfile*/
	  strcpy(infile,&argum[1][2]);	  
	  break;  
	case 'f':/*frequency of  the CNV in resampled datasets*/
	*cnvfreq=atof(&argum[1][2]);
	  break;
	case 'B'://determine best boundaries
		Bknown=1;
		*minl=atoi(&argum[1][2]);
		break;
	case 'R'://use LRR for EM
		*lrr_flag=1;
		Lrr=-1;
		break;
	case 'G'://model genotyping error
		Gte_flag=1;
		break;
	case 'P'://prior
		Prior=atof(&argum[1][2]);
	  break;
	  case 'T'://prior
		*thresh=atof(&argum[1][2]);
	  break;
	  case 'A'://calculate distributions of hybridization intensity
		  AddAn_flag=1;
		  break;
	  case 'O':// only a single hybridization value is available
		  Lrr=1;
		  break;
	  case 'c'://skip this many columns in the beginning of the input file
		  Colsk=atoi(&argum[1][2]);
	  break;
	  case 'D'://remove potential outliers an reanalyze
		  Outlieflag=1;
		  break;
	case 'Q'://set maximim number of missing data per SNP
	  Qc=atof(&argum[1][2]);
	  break;
	case 'h'://Help
		help_output();
		break;
	default:
		printf("Unknown option %s\n",argum[1]);
		help_output();
		exit(8);
	  }
	  --noa;
      ++argum;
  }

  if(*subs>*snps)*subs=*snps;	
   if(*last<1 ||*last>*snps)
	   *last=*snps;
  return;
}
/*********************************************************/
void help_output(void)
{
  printf("\nAnalysis options:\n");
  printf("  ex:\tRounds of EM \n");
  printf("  Bx:\tdetermine best boundaries; x is minimum length;	\n");
  printf("  R:\tuse LogR Ratio for EM\n");
  printf("  G:\tmodel genotyping error\n");
  printf("  Px:\tprior \n");
  printf("  Tx:\tthreshold x; individuals with prob >x are called as carriers\n");
  printf("  F:\tFist SNP to be considered\n");
  printf("  L:\tLast SNP to be considered\n");
  printf("  Qx:\tset maximim percentage of missing data per SNP\n");
  printf("\nThe Dataset:\n");
  printf("  ix:\tinputfile \n");  
  printf("  sx:\tSample Size \n");
  printf("  px:\tNumber of SNPs\n");
  printf("  b:\tInputfile for multiple CNV-borders\n");
  printf("  m:\tMapfile for SNPs\n");
  printf("  O:\tOnly one hybridization intensity is in the datafile\n");
  printf("  cx:\tNumber of columns with irrelevant info\n"); 

  printf("\nOutput\n");
  printf("  S:\tsilent EM off\n"); 
  printf("  A:\tCalculate distributions of hybridization intensity\n");
  printf("  h:\tHelp-this output\n");
  printf("\nSubsampling\n");
  printf("  ux:\tnumber of subsampled snps  \n");
  printf("  rx:\tsubsampled sample size \n");
  printf("  nx:\tnumber of subsamples taken \n");
		
  printf("  fx:\tfrequency of  the CNV in resampled datasets \n");

	
  printf("\n");	
  exit(8);
  return;
}
/****************************************************************/
int * readmap(char *mapfile, int *nom)
{
	FILE *inf=fopen(mapfile,"r");
	char temp[200]; 
	int ts=200; //maximum length of a line
	char *st_ret;
	int * ret;
	int i;
 if(inf==NULL)
    {
      printf("Error opening map file %s\n",mapfile);
      exit(8);
    }
	printf("map positions taken from %s\n",mapfile);
	*nom=0;

	i=0;
	st_ret=fgets(temp,ts,inf);
	while(st_ret!=NULL)
	{
	switch(temp[0]){
		case '#'://comments, ignore;
			break;
		case 'P':
			*nom=atoi(&temp[1]);
			ret=malloc(*nom*sizeof(float));			
			printf("%d Markers in map file\n",*nom);
			break;
		default:
		if(*nom==0)
			{
				printf("Error in %s; P-line missing\n",mapfile);			
				exit(8);
			}
		ret[i]=atoi(temp);
		++i;
		break;
	}
	st_ret=fgets(temp,ts,inf);
	}
	if(i!=*nom)
	{
		printf("Error: Wrong number of markers in %s (%d!=%d)\n",mapfile,i,*nom);
		exit(8);
	}
//testing if map is ordered

	for(i=0;i<*nom-1;++i)
		if(ret[i+1]<ret[i])
		{
			printf("Error: map file %s not ordered (Line %d (%d<%d))\n",mapfile,i+2,ret[i+1],ret[i]);
			exit(8);
		}
	return(ret);
	}
/********************************************************************/
struct cnv * readpos(char *posfile,int *nocnv,int indiv,char *mapfile)
{
  FILE *inf=fopen(posfile,"r");
  char temp[200]; 
  int ts=200; //maximum length of a line
  char *st_ret;
  char irr;
  struct cnv *ret;
  int i=0,j;
  int last;
  int *map=NULL;
  int nom;
 
  if(inf==NULL)
    {
      printf("Error opening CNV file %s\n",posfile);
      exit(8);
    }
  if(strcmp(mapfile,"nofile")!=0)
    map=readmap(mapfile, &nom);

  printf("CNV boundaries taken from %s\n",posfile);
  *nocnv=0;
  st_ret=fgets(temp,ts,inf);
  while(st_ret!=NULL)
    {
      //printf("%s\n",temp);
      switch(temp[0]){
      case '#'://comments, ignore;
	break;
      case 'P':
	*nocnv=atoi(&temp[1]);
	ret=makecnvarr(*nocnv,indiv);			
	printf("%d CNVs in input file\n",*nocnv);
	break;
      case 'S':		
	if(*nocnv==0)
	  {
	    printf("Error in %s; P-line missing\n",posfile);			
	    exit(8);
	  } 
	sscanf(&temp[1],"%d,%d,%d",&ret[i].type,&ret[i].sstart,&last);
	ret[i].snps=last-ret[i].sstart+1;
	++i;
	break;
      case 'L': 
	if(*nocnv==0)
	  {
	    printf("Error in %s; P-line missing\n",posfile);			
	    exit(8);
	  }
	if(map==NULL)
	  {
	    printf("Error: No map file provided\n");
	    exit(8);
	  }
	sscanf(&temp[1],"%d,%d,%d",&ret[i].type,&ret[i].pstart,&ret[i].pend);

	j=0;
	while(map[j]<ret[i].pstart && j<nom)
	  {
	    //printf("%d %d\n",map[j],ret[i].pstart);
	    ++j;
	  }
	if(j==nom)
	  {
	    printf("WARNING; CNV %d not covered by SNPs\n",i+1); 
	    ret[i].sstart=0;
	    ret[i].snps=0;
	  }
	else
	  {
	    ret[i].sstart=j+1;
	    while(map[j]<=ret[i].pend &&j<nom)
	      {
		//printf("%d %d\n",map[j],ret[i].pend);
		++j;
	      }
	    if(j+1==ret[i].sstart)
	      {
		printf("WARNING; CNV %d not covered by SNPs\n",i+1); 
		ret[i].snps=0;
	      }
	    else
	      ret[i].snps=j-ret[i].sstart+1;
	  }
	printf(" %d-%d\n",ret[i].sstart,ret[i].snps);
	++i;
	break;
      default:
	break;
      }
      st_ret=fgets(temp,ts,inf);
    }
  if(i!=*nocnv)
    {
      printf("Wrong number of regions in %s\n",posfile);
      exit(8);
    }

  if(strcmp(mapfile,"nofile")!=0)
    free(map);
  return(ret);
}
/********************************************************************/
void threshold(int indiv, struct signal * data, float thresh)
{
  int i;
//printf("Tresholding\n");
for (i=0;i<indiv;++i)
    if(data[i].inf[1]>thresh)
	{

		data[i].inf[1]=1.0;
		data[i].inf[0]=0.0;
	}
	else
{
		data[i].inf[0]=1.0;
		data[i].inf[1]=0.0;
	}
    
 
  return;
}
/********************************************************/
float results(int indiv, struct signal * data,int nos)
{
  int i,j;
  float c=0.0;
  
  for (i=0;i<indiv;++i)
    {
		if(data[i].kn<0)
			printf("%s\t******\t******",data[i].name);
		else    
			printf("%s\t%.4f\t%.4f",data[i].name,data[i].inf[0],data[i].inf[1]);
	if(data[i].kn>0 && data[i].inf[1]>0.01)
	{
		printf("\t");
		for(j=0;j<nos;++j)
			printf("%d",data[i].gt[j]);
	}
	
		printf("\n");
if(data[i].kn<0)
data[i].inf[1]=-1;
else
    c+=data[i].inf[1];
    }
  printf("Expected number of deletions %f\n",c);
  
  return(c);
}
/*******************************************************************************/

void readdata(int indiv,struct signal *data,int nsnps,char * infile,int first)
{
  int read_1hsignal(char * temp, int k, struct signal *data, int a,int c,int sl);
  int read_2hsignal(char * temp, int k, struct signal *data, int a,int c,int sl);
  int read_Lrrsignal(char * temp, int k, struct signal *data, int a,int c,int sl);
  int jump_comma(int k,char * temp,int sl,int nos);

  FILE *inf=fopen(infile,"r");
  char *temp=NULL; 
  int c,a;
  int ts=indiv*(4*40)+50,sl;
  char g1,g2;
  float i1,i2,i3;
  int j,k,i,t;
  char *st_ret;
  char name[50];
  int noc=3-Lrr; //Number of collumns in input file
  int snpc;
  int gt[4];

  printf("Opening %s\n",infile);

  if(inf==NULL)
    {
      printf("Error opening data file %s\n",infile);
      exit(8);
    }
  temp=(char *)malloc(ts);
  
  if(temp==NULL)
    {
      printf("Out of memory in read_data\n");
      exit(8);
    }

  //Read first line
  do
    st_ret=fgets(temp,ts,inf);
  while (temp[0]=='#');
  sl=strlen(temp);
  //first line is IDs
  a=0;
  k=0;

  //printf("First Line %s\n",temp);
    
  k=jump_comma(k,temp,sl,Colsk);//first 5 items are uninteresting
  //printf("%d %d %d %s\n",k,sl,Colsk,&temp[k]);
  while(k<sl)
    {
      //printf("%d %d \n",k,sl);
      j=k;
      while(temp[k]!=','&&temp[k]!='.')
	{
	  name[k-j]=temp[k];
	  ++k;
	}
      name[k-j]='\0';
      //printf("%d T:%s\n",a,name);
      strcpy(data[a].name,name);
      k=jump_comma(k,temp,sl,noc);
      //printf("after jump\n");
      ++a;
    }
  //printf("No error %d\n",a);
   if(a!=indiv)
	{
	  printf("Input error, wrong number of individuals (observed: %d expected %d) in first line \n",a,indiv);
	  printf("There may also be a format error in the input file in the first line\n");
	  exit(8);
	}
  // other lines contain hybridization
  for(i=0;i<first;++i) //skip all snps until the first
    { 
      do  st_ret=fgets(temp,ts,inf);
      while (temp[0]=='#');
      //printf("%d\n",i);
    }
  c=0;
  
  //printf("Line %d(%d-%d): %s\n",i,first,nsnps, temp);
  while(c<nsnps)
    { 
      //printf("%d %d\n",c,nsnps);	
      if(st_ret==NULL)
	{
	  printf("Input error, wrong number of markers\nMaybe boundaries are set wrong\n");
	  exit(8);
	}
      sl=strlen(temp);
      a=0;
      k=0;

      k=jump_comma(k,temp,sl,Colsk);
      for(i=0;i<4;++i)
	gt[i]=0;
      
      //printf("\n");
      while(k<sl)
	{
	  switch(Lrr)
	    {
	    case 0:
	      k=read_2hsignal(temp,k,data,a,c,sl); 
	      break;
	    case -1:
	      k=read_Lrrsignal(temp,k,data,a,c,sl); 
	      break;
	    case 1:
	      // printf ("HERE!\n");
	      k=read_1hsignal(temp,k,data,a,c,sl); 
			
	      break;
	    default:
	      printf("We should never be here!!!\n");
	      exit(8);
	      break;
	    }
	  //printf("Here\n");
	  // printf("%d: %d\n",a,data[a].gt[c]);
	  ++gt[data[a].gt[c]];

	  ++a;
	}
 
      if(a!=indiv)
	{
	  printf("Input error, wrong number of individuals (%d) in line %d\n",a,c+2);
	  printf("There may also be a format error in the input file at marker (%d) in line %d\n",a,c+2);
	  exit(8);
	}
      //printf("Line %d read\n",c);
      printf("Marker %d: ",c);
      for(i=0;i<4;++i)
	printf("%d ",gt[i]);
      printf("\n");
      if(Qc*indiv<gt[3])
	{
	  printf("Marker %d failed QC\n",c);
	  for(i=0;i<indiv;++i)
	    data[i].gt[c]=3;
	}
      ++c;
      st_ret=fgets(temp,ts,inf);
    }
  
  free(temp);

  return;
}
/******************************************************************************/
int read_1hsignal(char * temp, int k, struct signal *data, int a,int c,int sl)
{	
  int j;
  char g1,g2;			
  float i1;

  //printf("character %d\n%s",k,&temp[k]);
  j=sscanf(&temp[k],"%c%c,%f",&g1,&g2,&i1);
  //printf("%d: %c %c %f\n",j,g1,g2,i1);
  if(j==3)
    {
      data[a].maxsig[c]=i1;

      if(g1==g2)
	if(g1=='A')
	  data[a].gt[c]=0;
		    
	else
	  data[a].gt[c]=2;
		    
      else
	if(g1=='N')
	  {
	    data[a].gt[c]=3;
	    data[a].maxsig[c]=0.0;
	  }
	else
	  {
	    data[a].gt[c]=1;
	    data[a].maxsig[c]=0.0;
	  }
	  
      k=jump_comma(k,temp,sl,2);
	      
    }
  else 
    k=sl+3;
  return (k);
}
/***********************************************************************************************/
int read_2hsignal(char * temp, int k, struct signal *data, int a,int c,int sl)
{
  int j;
  char g1,g2;
  float i1,i2;
 

  j=sscanf(&temp[k],"%c%c,%f,%f",&g1,&g2,&i1,&i2);

  if(j==4)
    {
      if(g1==g2)
	if(g1=='A')
	  {
		   
	    data[a].gt[c]=0;
	    data[a].maxsig[c]=i1;
	  }
	else
	  {
	    data[a].gt[c]=2;
	    data[a].maxsig[c]=i2;
	  }
      else
	if(g1=='N')
	  {
	    data[a].gt[c]=3;
	    data[a].maxsig[c]=0.0;
	  }
	else
	  {
	    data[a].gt[c]=1;
	    data[a].maxsig[c]=0.0;
	  }
      data[a].sigA[c]=i1;
      data[a].sigB[c]=i2;
	  
      k=jump_comma(k,temp,sl,3);
	      
    }
  else 
    k=sl+3;
  return (k);
}
/***********************************************************************************************/
int read_Lrrsignal(char * temp, int k, struct signal *data, int a,int c,int sl)
{
	int j;
	char g1,g2;
	float i1,i2,i3;

		
	  j=sscanf(&temp[k],"%c%c,%f,%f,%f",&g1,&g2,&i1,&i2,&i3);
		
	  if(j==5)
	    {
	      if(g1==g2)
		if(g1=='A')
		  {
		   
		    data[a].gt[c]=0;
		    data[a].maxsig[c]=i1;
		  }
		else
		  {
		    data[a].gt[c]=2;
		    data[a].maxsig[c]=i2;
		  }
	      else
		if(g1=='N')
		  {
		    data[a].gt[c]=3;
		    data[a].maxsig[c]=0.0;
		  }
		else
		  {
		    data[a].gt[c]=1;
		    data[a].maxsig[c]=0.0;
		  }
	      data[a].sigA[c]=i1;
	      data[a].sigB[c]=i2;	  		  
			  
		  data[a].lrr[c]=i3;

k=jump_comma(k,temp,sl,4);
	  }
	  else 
				k=sl+3;
	  return k;
}
/*******************************************************************************************************/
int jump_comma(int k,char * temp,int sl,int nos)
{		   
	int j;
			   
	for(j=0;j<nos;++k)
	{
		if(k>=sl)
		    break;
		
	  if(temp[k]==',')
	    ++j;
	}
	return k;
}
/***************************************************************************************************/
void summout(char * sumfile,struct cnv *cnvs,int indiv,int nocnv,char ** names)
{
	FILE *sout=fopen(sumfile,"w");
	int i,j;

	fprintf(sout,"No\tType\tStart\tEnd\tFirstSNP\tlength\testFS\testlength\tExp car\tmax LR\t\t");
	for(i=0;i<indiv;++i)
		fprintf(sout,"\t%s",names[i]);
	fprintf(sout,"\n");
	for(j=0;j<nocnv;++j)
	{
		fprintf(sout,"%d\t%d\t%d\t%d",j+1,cnvs[j].type,cnvs[j].pstart,cnvs[j].pend);
		fprintf(sout,"\t%d\t%d\t+%d\t%d\t%.1f\t%.1e",cnvs[j].sstart,cnvs[j].snps,cnvs[j].estart,cnvs[j].esnps,cnvs[j].carr,cnvs[j].blr);
		fprintf(sout,"\t%.4f",cnvs[j].initfr);
		if(Outlieflag==0 && cnvs[j].po>0)
		fprintf (sout,"\tOL");
else if(Outlieflag==1)
fprintf(sout,"\t%d",cnvs[j].po);
else
fprintf(sout,"\t");
		if(cnvs[j].snps>0)
		for(i=0;i<indiv;++i)
			if(cnvs[j].cprobs[i]>=0)
			fprintf(sout,"\t%.3f",cnvs[j].cprobs[i]);
			else
				fprintf(sout,"\t****");
		
	fprintf(sout,"\n");
	}
	fclose(sout);
}
/**************************************************************************************/
/*************** Additional analyses***************************************************/
/**************************************************************************************/
void calc_stuff(struct signal * res, int snps, int indiv)
{
	void calc_mv(struct signal * data, int snp, int gt, int indiv);

	int i,j;
	printf("\n------------------------------------------\n");
	printf("Analysis of signal distribution\n");
	for(i=0;i<snps;++i)
	{
		for(j=0;j<4;++j)
		{
			printf("GT %d",j);
		calc_mv(res,i,j,indiv);
		}
		printf("\n------------------------------------------\n");
	}
	return;
}
/*******************************************************************/
void calc_mv(struct signal * data, int snp, int gt, int indiv)
{
  int k,i;
  float muA,varA,muB,varB,muR,varR;
  float c;

  for(k=0;k<2;++k)
    {
      muA=0.0;
      varA=0.0;
      muB=0.0;
      varB=0.0;
	  muR=0.0;
	  varR=0.0;
      c=0.0;

      for(i=0;i<indiv;++i)
	if(data[i].gt[snp]==gt)
	  {
	    muA+=data[i].sigA[snp]*data[i].inf[k];	 
	    muB+=data[i].sigB[snp]*data[i].inf[k];
		if(Lrr==-1)
		{
			muR+=data[i].lrr[snp]*data[i].inf[k];
			varR+=pow(data[i].lrr[snp]*data[i].inf[k],2);
		}
	    c+=data[i].inf[k];
	  }
	
      muA=muA/c;
      muB=muB/c;
	  muR=muR/c;
	  varR=sqrt(varR/c-pow(muR,2));

      for(i=0;i<indiv;++i)
	if(data[i].gt[snp]==gt)
	  {
	    varA+=pow(data[i].sigA[snp]- muA,2)*data[i].inf[k];
	    varB+=pow(data[i].sigB[snp]- muB,2)*data[i].inf[k];
	  }

      varA=sqrt(varA/c);
      varB=sqrt(varB/c);
      
      printf("\tA %.3f %.3f B %.3f %.3f",muA,varA,muB,varB);
	  if(Lrr==-1)
		  printf(" LRR %.3f %.3f",muR,varR);
	  printf(" %.2f\n",c);

    }
  return;
}
/**************************************************************************************/
void subsample(struct signal *data,struct signal *sdat,int indiv,int snps,int subs)
{
	
  int i,j,k;
  int*samp;

  samp=malloc(subs*sizeof(int));

  for(k=0;k<indiv;++k)
    {
      strcpy(sdat[k].name,data[k].name);
      sdat[k].kn=0;
      sdat[k].inf[0]=data[k].inf[0];
      sdat[k].inf[1]=data[k].inf[1];
    }

  for(i=0;i<subs;++i)
    {
      //printf("i=%d\n",i);
      do{
	j=snps*RandDbl();
	for(k=0;k<i;++k)
	  if(j==samp[k])
	    j=-1;
      }while (j==-1);
      samp[i]=j;
      printf("%d ",j);
      //printf("Here: %d\n",j);

      for(k=0;k<indiv;++k)
	{
	  //printf("k=%d\n",k);
	  sdat[k].maxsig[i]=data[k].maxsig[j];
	  sdat[k].sigA[i]=data[k].sigA[j];
	  sdat[k].sigB[i]=data[k].sigB[j];
	  sdat[k].gt[i]=data[k].gt[j];
	}

    }

  for(k=0;k<indiv;++k)
    for(i=0;i<subs;++i)
      if(sdat[k].gt[i]==1)
	sdat[k].kn++;

  printf("\n");
  free (samp);
  return;
}

/**********************************************************/
void subsample2(struct signal *data,struct signal *sdat,int indiv,int sind, int snps, float cnvfreq)
{	
  int i,j,k;
  int hmz =cnvfreq*sind+0.5;

  //Edit to subsample a fixed percentage of hemizygous.

  printf("%d hemizygous (%f, %d)\n",hmz,cnvfreq,sind);
  for(k=0;k<sind-hmz;++k)
    {
      do
	j=indiv*RandDbl();
      while (data[j].inf[0]<0.5);

      strcpy(sdat[k].name,data[j].name);
      sdat[k].kn=data[j].kn;
      sdat[k].inf[0]=data[j].inf[0];
      sdat[k].inf[1]=data[j].inf[1];
	
      //printf("Here %d\n",k);
      for(i=0;i<snps;++i)
	{
	
	  sdat[k].maxsig[i]=data[j].maxsig[i];
	  sdat[k].sigA[i]=data[j].sigA[i];
	  sdat[k].sigB[i]=data[j].sigB[i];
	  sdat[k].gt[i]=data[j].gt[i];
		
	}
    }
  for(;k<sind;++k)
    {
      do
	j=indiv*RandDbl();
      while (data[j].inf[0]>0.5);

      strcpy(sdat[k].name,data[j].name);
      sdat[k].kn=data[j].kn;
      sdat[k].inf[0]=data[j].inf[0];
      sdat[k].inf[1]=data[j].inf[1];
	
      //printf("Here %d\n",k);
      for(i=0;i<snps;++i)
	{
	
	  sdat[k].maxsig[i]=data[j].maxsig[i];
	  sdat[k].sigA[i]=data[j].sigA[i];
	  sdat[k].sigB[i]=data[j].sigB[i];
	  sdat[k].gt[i]=data[j].gt[i];
		
	}
    }
  return;
}

/**********************************************************/
float abso(float in)
{	
	if (in<0) in=-in;
	return(in);
}
/*********************************************************************/
float compare(struct signal *data,struct signal *sdat, int indiv)
{
	int i;
	float stat=0.0;
	
	for(i=0;i<indiv;++i)
		stat+=fabs(data[i].inf[0]-sdat[i].inf[0]);

	return(stat);
}
/**********************************************************/
float compare2(struct signal *data,struct signal *sdat, int indiv, int sind)
{
	int strcomp(char * str1, char *str2);
	int i,j;
	float stat=0.0;
	float tot=0;


	for(i=0;i<sind;++i)
	{
		j=0;
	while(strcomp(data[j].name,sdat[i].name)==0)
	++j;
//printf("%s %s\n",data[j].name,sdat[i].name);
		stat+=abso(data[j].inf[0]-sdat[i].inf[0]);
	tot+=data[j].inf[0];
	}
	printf("%f %f \n",stat,tot);
	if(tot>sind)
		exit(8);
	//if(tot>0) stat=stat/tot;
	return(stat);
}
/*****************************************************************/
void calcss(float * results, int noe)
{
	int i;
	float av=0.0;
	float tvar=0.0;
	float min=results[0];
	float max=results[0];
	int pcount=0;

	for(i=0;i<noe;++i)
	{
		av+=results[i];
		tvar+=results[i]*results[i];
		if(results[i]>max) max=results[i];
		if(results[i]<min) min=results[i];
		if(results[i]<0.0001)
			++pcount;
	}
	av=av/noe;
	tvar=sqrt(tvar/noe-pow(av,2));

	printf("average %f standard deviation %f minimum %f maximum %f perfect sets %d\n",av,tvar,min,max,pcount);

	return;
}
/*****************************************************************/
int strcomp(char * str1, char *str2)
{
	int sl=strlen(str1);
	int i;
	int ret=1;

	if(strlen(str2)!=sl)
		ret=0;

	for(i=0;i<sl && ret==1;++i)
		if(str1[i]!=str2[i])
			ret=0;

	return (ret);
}
/*******************************************************************/
int remove_empty_markers(struct signal *data, int snps,int indiv)
{
  int i,j,k;
  int count;
  int ret=snps;

  for(i=0;i<ret;++i)
    {
      count=0;
      for(j=0;j<indiv;++j)
	if(data[j].gt[i]==3)
	  ++count;

      if(count==indiv)
	{
	  printf("Removing Marker %d\n",i);
	  for(k=i;k<snps-1;++k)
	    for(j=0;j<indiv;++j)
	      {
		data[j].maxsig[k]=data[j].maxsig[k+1];
		data[j].lrr[k]=data[j].lrr[k+1];
		data[j].sigA[k]=data[j].sigA[k+1];
		data[j].sigB[k]=data[j].sigB[k+1];
		data[j].gt[k]=data[j].gt[k+1];
	      }
	  --ret;
	  --i;
	}
    }
  return (ret);
}
/******************************************************************************/
/*******************Duplications*************************************************/
/******************************************************************************/

float d2_em(int steps,struct signal * data,int indiv, int snps,int start, int end, float * noc)
{
  void est_two_dist(int indiv,float *** mu,float *** var,struct signal * data,int start,int end);
  int est_two_status(int indiv,float *** mu,float *** var,struct signal *data,int start,int end,float *** dmu,float *** dvar);
  float ***mu,*** dmu;
  float ***var,*** dvar;
  int i;

  for(i=0;i<steps;++i)
    {
      est_two_dist(indiv,mu,var,data,start,end);
      *noc=est_two_status(indiv,mu,var,data,start,end,dmu,dvar);
    }
  return(0);
}
/*************************************************************************************/
void est_two_dist(int indiv,float *** mu,float *** var,struct signal * data,int start,int end)
{
  float dmu[3][2];
  float dvar[3][2];
  int i,j,k,m;
  float c[3][2];
  int count;

  for(i=start;i<end;++i)
    {
      for(k=0;k<3;++k)
	for(m=0;m<2;++m)
	{
	  mu[j][k][m]=0.0;
	  var[j][k][m]=0.0;
	  dmu[j][m]=0.0;
	  dvar[j][m]=0.0;
	}

	for(i=0;i<indiv;++i)
	  if(data[i].gt[j]<3&&data[i].kn>=0)
	    {
	    mu[j][data[i].gt[j]][0]+=data[i].sigA[j]*data[i].inf[0];
	    mu[j][data[i].gt[j]][1]+=data[i].sigB[j]*data[i].inf[0];
	    dmu[data[i].gt[j]][0]+=data[i].sigA[j]*data[i].inf[1];
	    dmu[data[i].gt[j]][1]+=data[i].sigB[j]*data[i].inf[1];

	    var[j][data[i].gt[j]][0]+=pow(data[i].sigA[j],2)*data[i].inf[0];
	    var[j][data[i].gt[j]][1]+=pow(data[i].sigB[j],2)*data[i].inf[0];
	    dvar[data[i].gt[j]][0]+=pow(data[i].sigA[j],2)*data[i].inf[1];
	    dvar[data[i].gt[j]][1]+=pow(data[i].sigB[j],2)*data[i].inf[1];

	    for(m=0;m<2;++m)
	      c[data[i].gt[j]][m]+=data[i].inf[m];
	    }
	for(k=0;k<3;++k)
	  {
	  if(c[k][0]>0)
	  for(m=0;m<2;++m)
	    {
	    mu[j][k][m]=mu[j][k][m]/c[k][0];
	    var[j][k][m]=sqrt(var[j][k][m]/c[k][0]-pow(mu[j][k][m],2));
	    }
	  if(c[k][1]>0)
	    for(m=0;m<2;++m)
	      {
		dmu[k][m]=dmu[k][m]/c[k][1];
	      dvar[k][m]=sqrt(dvar[k][m]/c[k][1]-pow(dmu[k][m],2));
	    }
	  }

	if(var[j][0][0]==0)
	  var[j][0][0]=var[j][2][1];
	if(var[j][2][1]==0)
	  var[j][2][1]=var[j][0][0];



	  for(m=0;m<2;++m)
	    {
	      mu[j][3][m]=0.0;
	      var[j][3][m]=0.0;
	      count=0;
	      for(k=0;k<3;++k)
		{
		  mu[j][3][m]+=c[k][1]*(dmu[k][m]-mu[j][k][m]);
		  var[j][3][m]+=c[k][1]*(pow(dvar[k][m],2)-pow(var[j][k][m],2));
		  count+=c[k][i];
		}
	      if(count>0)
		{
		  mu[j][3][m]=mu[j][3][m]/count;
		  if(var[j][3][m]>0)
		  var[j][3][m]=sqrt(var[j][3][m]/count);
		  else
		    var[j][3][m]=var[j][1][m];
		}
	    }
    }
	  return;
}
/*********************************************************************************************/
int est_two_status(int indiv,float *** mu,float *** var,struct signal *data,int start,int end,float *** dmu,float *** dvar)
{
  int i,j,k,m;
  double p[2];
  int ret=0;

  for(j=start;j<end;++j)
    for(k=0;k<3;++k)
      for(m=0;m<2;++m)
	{
	  dmu[j][k][m]=mu[j][k][m]+mu[j][3][m];
	  dvar[j][k][m]=sqrt(pow(var[j][k][m],2)+pow(var[j][3][m],2));
	}


  for(i=0;i<indiv;++i)
    {
      p[1]=0.0;p[0]=0.0;
      for(j=start;j<end;++j)
	switch(data[i].gt[j])
	  {
	  case 0:
	    p[0]+=logprob(data[i].sigA[j],mu[j][data[i].gt[j]][0],var[j][data[i].gt[j]][0]);
	    p[1]+=logprob(data[i].sigA[j],dmu[j][data[i].gt[j]][0],dvar[j][data[i].gt[j]][0]);
	    break;
	  case 1:
		
	    p[0]+=logprob(data[i].sigA[j],mu[j][data[i].gt[j]][0],var[j][data[i].gt[j]][0]);
	    p[0]+=logprob(data[i].sigB[j],mu[j][data[i].gt[j]][1],var[j][data[i].gt[j]][1]);

	    p[1]+=logprob(data[i].sigA[j],dmu[j][data[i].gt[j]][0],dvar[j][data[i].gt[j]][0]);
	    p[1]+=logprob(data[i].sigB[j],dmu[j][data[i].gt[j]][1],dvar[j][data[i].gt[j]][1]);
	    break;    
	  case 2:
	    p[0]+=logprob(data[i].sigB[j],mu[j][data[i].gt[j]][1],var[j][data[i].gt[j]][1]);
	    p[1]+=logprob(data[i].sigB[j],dmu[j][data[i].gt[j]][1],dvar[j][data[i].gt[j]][1]);
	  default:
	    break;
	  }
      p[0]=exp(p[0]);
      p[1]=exp(p[1]);

      if(p[0]>=0.0 && p[0]>=0)
	if(p[1]>0.0 || p[0]>0.0)
	  p[0]=p[0]/(p[0]+p[1]);
	else
	  {
	    printf("Underflow error in est_two_status\n");
	    p[0]=0.5;
	  }
      else
	{	
	  printf("ERROR:p1 p2 %f %f\n",p[0],p[1]);
	  for(j=start;j<end;++j)
	    for(k=0;k<2;++k)
	      {
		printf("SNP Baseline %d Channel %d mu %f sigma %f\n",j,k,mu[j][data[i].gt[j]][k],var[j][data[i].gt[j]][k]);
		printf("SNP Duplication %d Channel %d mu %f sigma %f\n",j,k,dmu[j][data[i].gt[j]][k],dvar[j][data[i].gt[j]][k]);
	      }
	  exit(8);
	}

      data[i].inf[0]=p[0];
      data[i].inf[1]=1-p[0];
      ret+=data[i].inf[1];
    }
  
  return(ret);
}
