/*  
	LASER: Locating Ancestry from SEquence Reads
    Copyright (C) 2013  Chaolong Wang

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <map>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_statistics_double.h>
#include <cmath>
#include <algorithm>
#include <time.h>
#include <stdlib.h>
#include "armadillo"
using namespace arma;
using namespace std;

const string ARG_PARAM_FILE = "-p";
const string ARG_GENO_FILE = "-g";
const string ARG_COORD_FILE = "-c";
const string ARG_SEQ_FILE = "-s";
const string ARG_OUT_PREFIX = "-o";
const string ARG_MIN_LOCI = "-l";
const string ARG_DIM = "-k";
const string ARG_SEQ_ERR = "-e";
const string ARG_FIRST_IND = "-x";
const string ARG_LAST_IND = "-y";
const string ARG_REPS = "-r";
const string ARG_OUTPUT_REPS = "-R";
const string ARG_CHECK_COVERAGE = "-cov";
const string ARG_CHECK_FORMAT = "-fmt";
const string ARG_PCA_MODE = "-pca";
const string ARG_DIM_SEQ = "-ks";

const string default_str = "---this-is-a-default-string---";
const int default_int = -999999998;
const double default_double = -9.99999999;
char tmpstr[2000];

string LOG_FILE;
string TMP_LOG_FILE = "Temporary.";

string PARAM_FILE = default_str;    // parameter file name
string GENO_FILE = default_str;     // Reference genotype datafile name
string COORD_FILE = default_str;    // Reference coordinates datafile name
string SEQ_FILE = default_str;      // Sequence datafile name
string OUT_PREFIX = default_str;    // Prefix for output files

double SEQ_ERR = default_double;    // Sequencing error rate per read;
int DIM = default_int;              // Number of PCs to match;
int MIN_LOCI = default_int;         // Minimum loci that have at least one read;

int FIRST_IND = default_int;	    // First individual in the list sample to be tested; 
int LAST_IND = default_int;	    // Last individual in the list of sample to be tested;

int REPS = default_int;             // Number of replicates to run for each sample;
int OUTPUT_REPS = default_int;      // 0: Only output the mean and sd; 
                                    // 1: Output results from all replicates;

int CHECK_COVERAGE = default_int;   // 0: do not check coverage, proceed to major computation;
                                    // 1: check coverage first, proceed to major computation;
                                    // 2: check coverage only.

int CHECK_FORMAT = default_int;     // 0: Do not check format of the input data files, and proceed to major computation;
                                    // 1: Check format of all files and stop;  10: Proceed after checking all files
                                    // 2: Check format of GENO_FILE and stop;  20: Proceed after checking GENO_FILE
                                    // 3: Check format of SEQ_FILE and stop;   30: Proceed after checking SEQ_FILE
                                    // 4: Check format of COORD_FILE and stop; 40: Proceed after checking COORD_FILE

int PCA_MODE = default_int;         // 0: LASER;
                                    // 1: PCA on genotypes;
									
// The following options are under development
int DIM_SEQ = default_int;               // Number of sequence-based PCs to match;

// The following parameters will be determined from the input data files					 
int REF_INDS = default_int;         // Number of reference individuals
int SEQ_INDS = default_int;         // Number of sequence samples
int LOCI = default_int;             // Number of loci
int NUM_PCS = default_int;          // Number of PCs in the COORD_FILE;

int GENO_NON_DATA_ROWS = 0;    // Number of non-data rows in the GENO_FILE;
int GENO_NON_DATA_COLS = 2;    // Number of non-data columns in the GENO_FILE;
int COORD_NON_DATA_ROWS = 1;   // Number of non-data rows in the COORD_FILE;
int COORD_NON_DATA_COLS = 2;   // Number of non-data columns in the COORD_FILE;
int SEQ_NON_DATA_ROWS = 0;     // Number of non-data rows in the SEQ_FILE;
int SEQ_NON_DATA_COLS = 2;     // Number of non-data columns in the SEQ_FILE;

//=======================================================================================
bool is_int(string str);
bool is_numeric(string str);
bool parse_cmd_line(int argc, char* argv[], map<string,string> &args, map<string,int> &argi, map<string,double> &argd);
int read_paramfile();
int create_paramfile();
int check_parameters();
void print_configuration();

int pca(fmat G, int nPCs, mat &PC, rowvec &PCvar);
int pca_only(fmat &G, int nPCs, mat &PC, rowvec &PCvar);
int procrustes(mat &X, mat &Y, mat &Xnew, double &t, double &rho, mat &A, rowvec &b);
int simuseq(fmat &G, frowvec &C, urowvec &Loc, double e, fmat &S, int random_seed);

int check_coverage();
int check_format_geno();
int check_format_seq();
int check_format_coord();
bool get_table_dim(int &nrow, int &ncol, string filename, char separator);

ofstream foutLog;

//=========================================================================================================
int main(int argc, char* argv[]){
	time_t t1,t2;
	float runningtime;
	t1 = clock();   // Program starting time
	
	time_t rawtime;
  	struct tm * timeinfo;
 	time ( &rawtime );
  	timeinfo = localtime ( &rawtime );

	stringstream ss;
	ss << getpid();
	string strpid = ss.str();
	TMP_LOG_FILE.append(strpid);
	TMP_LOG_FILE.append(".log");
	
	foutLog.open(TMP_LOG_FILE.c_str());
	if(foutLog.fail()){
		cerr << "Error: cannot create a temporary log file." << endl;
		return 0;
	}

	cout << endl;
	cout << "====================================================================" <<endl;
	cout << "====        LASER: Locating Ancestry from SEquence Reads        ====" <<endl; 
	cout << "====          Version 1.03, Last updated on Aug/08/2013         ====" <<endl;	
	cout << "====  (C) 2013 Chaolong Wang, GNU General Public License v3.0   ====" <<endl;
	cout << "====================================================================" <<endl;
  	cout << "Started at: " << asctime (timeinfo) << endl;

	foutLog << "====================================================================" <<endl;
	foutLog << "====        LASER: Locating Ancestry from SEquence Reads        ====" <<endl; 
	foutLog << "====          Version 1.03, Last updated on Aug/08/2013         ====" <<endl;
	foutLog << "====  (C) 2013 Chaolong Wang, GNU General Public License v3.0   ====" <<endl;
	foutLog << "====================================================================" <<endl;
  	foutLog << "Started at: " << asctime (timeinfo) << endl;
		

	// ################ Read in command line ##########################
	map<string,string> args;
	map<string,int> argi;
	map<string,double> argd;	
	bool cmd_flag = parse_cmd_line(argc, argv, args, argi, argd);
	if(args[ARG_PARAM_FILE].compare(default_str)!=0){PARAM_FILE = args[ARG_PARAM_FILE];}
	if(args[ARG_GENO_FILE].compare(default_str)!=0){GENO_FILE = args[ARG_GENO_FILE];}
	if(args[ARG_COORD_FILE].compare(default_str)!=0){COORD_FILE = args[ARG_COORD_FILE];}
	if(args[ARG_SEQ_FILE].compare(default_str)!=0){SEQ_FILE = args[ARG_SEQ_FILE];}
	if(args[ARG_OUT_PREFIX].compare(default_str)!=0){OUT_PREFIX = args[ARG_OUT_PREFIX];}
	if(argi[ARG_MIN_LOCI]!=default_int){MIN_LOCI = argi[ARG_MIN_LOCI];}
	if(argi[ARG_DIM]!=default_int){DIM = argi[ARG_DIM];}
	if(argd[ARG_SEQ_ERR]!=default_double){SEQ_ERR = argd[ARG_SEQ_ERR];}
	if(argi[ARG_FIRST_IND]!=default_int){FIRST_IND = argi[ARG_FIRST_IND];}
	if(argi[ARG_LAST_IND]!=default_int){LAST_IND = argi[ARG_LAST_IND];}
	if(argi[ARG_REPS]!=default_int){REPS = argi[ARG_REPS];}
	if(argi[ARG_OUTPUT_REPS]!=default_int){OUTPUT_REPS = argi[ARG_OUTPUT_REPS];}
	if(argi[ARG_CHECK_COVERAGE]!=default_int){CHECK_COVERAGE = argi[ARG_CHECK_COVERAGE];}
	if(argi[ARG_CHECK_FORMAT]!=default_int){CHECK_FORMAT = argi[ARG_CHECK_FORMAT];}
	if(argi[ARG_PCA_MODE]!=default_int){PCA_MODE = argi[ARG_PCA_MODE];}	
	if(argi[ARG_DIM_SEQ]!=default_int){DIM_SEQ = argi[ARG_DIM_SEQ];}
	//##################     Read in and check parameter values #######################
	if(PARAM_FILE.compare(default_str)==0){ PARAM_FILE = "laser.conf"; }
	int flag = read_paramfile();
	if(flag==0 || cmd_flag==0){
		foutLog.close();
		if(OUT_PREFIX.compare(default_str)==0){
			LOG_FILE = "laser.log";
		}else{
			LOG_FILE = OUT_PREFIX;
			LOG_FILE.append(".log");
		}
		sprintf(tmpstr, "%s%s%s%s", "mv ", TMP_LOG_FILE.c_str()," ", LOG_FILE.c_str());
		int sys_msg = system(tmpstr);
		return 0;
	}
	//###############################################################################
	if(OUT_PREFIX.compare(default_str)==0){ OUT_PREFIX = "laser"; }
	foutLog.close();
	LOG_FILE = OUT_PREFIX;
	LOG_FILE.append(".log");
	sprintf(tmpstr, "%s%s%s%s", "mv ", TMP_LOG_FILE.c_str()," ", LOG_FILE.c_str());
	int sys_msg = system(tmpstr);
	foutLog.open(LOG_FILE.c_str(), ios::app);
	if(foutLog.fail()){
		cerr << "Error: cannot create the log file." << endl;
		return 0;
	}
	//############## Get values for REF_INDS, LOCI, SEQ_INDS, NUM_PCs ################
	int nrow = 0;
	int ncol = 0;
	flag = 1;
	if(GENO_FILE.compare(default_str) != 0){
		if(!get_table_dim(nrow, ncol, GENO_FILE, '\t')){
			cerr << "Error: cannot open the GENO_FILE '" << GENO_FILE << "'." << endl;    
			foutLog << "Error: cannot open the GENO_FILE '" << GENO_FILE << "'." << endl;   
			foutLog.close();
			return 0;
		}	
		REF_INDS = nrow - GENO_NON_DATA_ROWS;
		LOCI = ncol - GENO_NON_DATA_COLS;
		cout << REF_INDS << " individuals are detected in the GENO_FILE." << endl;
		cout << LOCI << " loci are detected in the GENO_FILE." << endl;
		foutLog << REF_INDS << " individuals are detected in the GENO_FILE." << endl;
		foutLog << LOCI << " loci are detected in the GENO_FILE." << endl;		
		if(REF_INDS < 0){
			cerr << "Error: Invalid number of rows in the GENO_FILE '" << GENO_FILE << "'." << endl;
			foutLog << "Error: Invalid number of rows in the GENO_FILE '" << GENO_FILE << "'." << endl;
			flag = 0;
		}
		if(LOCI < 0){
			cerr << "Error: Invalid number of columns in the GENO_FILE '" << GENO_FILE << "'." << endl;
			foutLog << "Error: Invalid number of columns in the GENO_FILE '" << GENO_FILE << "'." << endl;
			flag = 0;
		}
	}else{
		cerr << "Error: GENO_FILE (-g) is not specified." << endl;
		foutLog << "Error: GENO_FILE (-g) is not specified." << endl;
		foutLog.close();
		return 0;
	}
	if(SEQ_FILE.compare(default_str) != 0){
		if(!get_table_dim(nrow, ncol, SEQ_FILE, '\t')){
			cerr << "Error: cannot open the SEQ_FILE '" << SEQ_FILE << "'." << endl;    
			foutLog << "Error: cannot open the SEQ_FILE '" << SEQ_FILE << "'." << endl; 
			foutLog.close();
			return 0;
		}	
		SEQ_INDS = nrow - SEQ_NON_DATA_ROWS;
		int tmpLOCI = ncol - SEQ_NON_DATA_COLS;
		cout << SEQ_INDS << " individuals are detected in the SEQ_FILE." << endl;
		cout << tmpLOCI << " loci are detected in the SEQ_FILE." << endl;
		foutLog << SEQ_INDS << " individuals are detected in the SEQ_FILE." << endl;
		foutLog << tmpLOCI << " loci are detected in the SEQ_FILE." << endl;
		if(SEQ_INDS < 0){
			cerr << "Error: Invalid number of rows in the SEQ_FILE '" << SEQ_FILE << "'." << endl;
			foutLog << "Error: Invalid number of rows in the SEQ_FILE '" << SEQ_FILE << "'." << endl;
			flag = 0;
		}
		if(tmpLOCI < 0){
			cerr << "Error: Invalid number of columns in the SEQ_FILE '" << SEQ_FILE << "'." << endl;
			foutLog << "Error: Invalid number of columns in the SEQ_FILE '" << SEQ_FILE << "'." << endl;
			flag = 0;
		}else if(tmpLOCI != LOCI && LOCI >= 0){
			cerr << "Error: Number of loci in the SEQ_FILE is not the same as in the GENO_FILE." << endl;
			foutLog << "Error: Number of loci in the SEQ_FILE is not the same as in the GENO_FILE." << endl;
			flag = 0;
		}
	}	
	if(COORD_FILE.compare(default_str) != 0){
		if(!get_table_dim(nrow, ncol, COORD_FILE, '\t')){
			cerr << "Error: cannot open the COORD_FILE '" << COORD_FILE << "'." << endl;    
			foutLog << "Error: cannot open the COORD_FILE '" << COORD_FILE << "'." << endl; 
			foutLog.close();
			return 0;
		}	
		int tmpINDS = nrow - COORD_NON_DATA_ROWS;
		NUM_PCS = ncol - COORD_NON_DATA_COLS;
		cout << tmpINDS << " individuals are detected in the COORD_FILE." << endl;
		cout << NUM_PCS << " PCs are detected in the COORD_FILE." << endl;
		foutLog << tmpINDS << " individuals are detected in the COORD_FILE." << endl;
		foutLog << NUM_PCS << " PCs are detected in the COORD_FILE." << endl;
		
		if(tmpINDS < 0){
			cerr << "Error: Invalid number of rows in the COORD_FILE " << COORD_FILE << "." << endl;
			foutLog << "Error: Invalid number of rows in the COORD_FILE " << COORD_FILE << "." << endl;
			flag = 0;
		}else if(tmpINDS != REF_INDS && REF_INDS >= 0){
			cout << tmpINDS << "\t" << REF_INDS << endl;
			cerr << "Error: Number of individuals in the COORD_FILE is not the same as in the GENO_FILE." << endl;
			foutLog << "Error: Number of individuals in the COORD_FILE is not the same as in the GENO_FILE." << endl;
			flag = 0;
		}
		if(NUM_PCS < 0){
			cerr << "Error: Invalid number of columns in the COORD_FILE " << COORD_FILE << "." << endl;
			foutLog << "Error: Invalid number of columns in the COORD_FILE " << COORD_FILE << "." << endl;
			flag = 0;
		}
	}
	if(flag == 0){
		foutLog.close();
		return 0;
	}	
	//################  Set default values to some parameters #######################
	if(SEQ_ERR==default_double){ SEQ_ERR = 0.01; }
	if(MIN_LOCI==default_int){ MIN_LOCI = 100; }
	if(DIM==default_int){ DIM = 2; }
	if(FIRST_IND==default_int){ FIRST_IND = 1; }
	if(LAST_IND==default_int){ LAST_IND = SEQ_INDS; }
	if(REPS == default_int){ REPS = 1; }
	if(OUTPUT_REPS == default_int) { OUTPUT_REPS = 0; }
	if(CHECK_COVERAGE==default_int){ CHECK_COVERAGE = 0; }
	if(CHECK_FORMAT==default_int){ CHECK_FORMAT = 10; }
	if(PCA_MODE==default_int){ PCA_MODE = 0; }
	//======================================================================
	if(DIM_SEQ==default_int){ DIM_SEQ = DIM; }

	// ################### Check Parameters #############################
	flag = check_parameters();
	if(flag==0){
		foutLog.close();
		return 0;
	}
	if(MIN_LOCI <= DIM && PCA_MODE!=1){
		cerr << "Warning: DIM>=MIN_LOCI is found; DIM=" << DIM << ", MIN_LOCI=" << MIN_LOCI << "." << endl; 
		cerr << "Reset MIN_LOCI to DIM+1: MIN_LOCI=" << DIM+1 << "." << endl;
		foutLog << "Warning: DIM>=MIN_LOCI is found; DIM=" << DIM << ", MIN_LOCI=" << MIN_LOCI << "." << endl; 
		foutLog << "Reset MIN_LOCI to DIM+1: MIN_LOCI=" << DIM+1 << "." << endl;
		MIN_LOCI = DIM+1;
	}
	if(REPS==1){
		OUTPUT_REPS = 0;
	}
	print_configuration();
	// #####################  Check data format ############################
	if(CHECK_FORMAT != 0){
		int flag1 = 1;
		int flag2 = 1;
		int flag3 = 1;
 		time ( &rawtime );
  		timeinfo = localtime ( &rawtime );
		cout << endl << asctime (timeinfo);
		cout << "Checking data format ..." << endl;
		foutLog << endl << asctime (timeinfo);
		foutLog << "Checking data format ..." << endl;
		if(CHECK_FORMAT==1 || CHECK_FORMAT==2 || CHECK_FORMAT==10 || CHECK_FORMAT==20){
			if(GENO_FILE.compare(default_str)!=0){
				flag1 = check_format_geno();
				if(flag1==1){
					cout << "GENO_FILE: OK." << endl;
					foutLog << "GENO_FILE: OK." << endl;				
				}
			}else{
					cout << "GENO_FILE: not specified." << endl;
					foutLog << "GENO_FILE: not specified." << endl;
			}
		}
		if(CHECK_FORMAT==1 || CHECK_FORMAT==3 || CHECK_FORMAT==10 || CHECK_FORMAT==30){
			if(SEQ_FILE.compare(default_str)!=0){
				flag2 = check_format_seq();
				if(flag2==1){
					cout << "SEQ_FILE: OK." << endl;
					foutLog << "SEQ_FILE: OK." << endl;
				}
			}else{
					cout << "SEQ_FILE: not specified." << endl;
					foutLog << "SEQ_FILE: not specified." << endl;
			}
		}
		if(CHECK_FORMAT==1 || CHECK_FORMAT==4 || CHECK_FORMAT==10 || CHECK_FORMAT==40){
			if(COORD_FILE.compare(default_str)!=0){
				flag3 = check_format_coord();
				if(flag3==1){
					cout << "COORD_FILE: OK." << endl;
					foutLog << "COORD_FILE: OK." << endl;
				}
			}else{
					cout << "COORD_FILE: not specified." << endl;
					foutLog << "COORD_FILE: not specified." << endl;
			}
		}
		if(flag1==0 || flag2==0 || flag3==0){
			foutLog.close();
			return 0;
		}
		if(CHECK_FORMAT < 5){
			time ( &rawtime );
 		 	timeinfo = localtime ( &rawtime );			
			t2 = clock();
			runningtime = (t2-t1)/CLOCKS_PER_SEC;
			cout << endl << "Finished at: " << asctime (timeinfo);
			cout << "Total running time: " << runningtime << " seconds." << endl;
			cout << "====================================================================" <<endl;
			foutLog << endl << "Finished at: " << asctime (timeinfo);
			foutLog << "Total running time: " << runningtime << " seconds." << endl;
			foutLog << "====================================================================" <<endl;
			foutLog.close(); 
			return 1;
		}	
	}
	// #####################  Check coverage ############################
	if(CHECK_COVERAGE != 0 && PCA_MODE == 0){
		time ( &rawtime );
  		timeinfo = localtime ( &rawtime );
  		cout << endl << asctime (timeinfo);
		cout << "Checking coverage in the sequence data ..." << endl;
  		foutLog << endl << asctime (timeinfo);
		foutLog << endl << "Checking coverage in the sequence data ..." << endl;
		int flag = check_coverage();
		if(flag == 0){
			foutLog.close();
			return 0;
		}
		if(CHECK_COVERAGE == 2){
			time ( &rawtime );
 		 	timeinfo = localtime ( &rawtime );
			t2 = clock();
			runningtime = (t2-t1)/CLOCKS_PER_SEC;
			cout << endl << "Finished at: " << asctime (timeinfo);
			cout << "Total running time: " << runningtime << " seconds." << endl;
			cout << "====================================================================" <<endl;
			foutLog << endl << "Finished at: " << asctime (timeinfo);
			foutLog << "Total running time: " << runningtime << " seconds." << endl;
			foutLog << "====================================================================" <<endl;
			foutLog.close();
			return 1;			
		}
	}
	// ##################################################################
	int i=0;
	int j=0;
	int k=0;
	string str;
	ifstream fin;
	string outfile;
	ofstream fout;
	string outfile2;
	ofstream fout2;
	string outfile3;
	ofstream fout3;
	// == Variables to be saved for reused ==
	string *RefInfo1 = new string [REF_INDS];
	string *RefInfo2 = new string [REF_INDS];
	fmat RefG(REF_INDS, LOCI);
	mat refPC = zeros<mat>(REF_INDS,DIM);
	//========================= Read reference data ==========================
	fin.open(GENO_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the GENO_FILE '" << GENO_FILE << "'." << endl;    
		foutLog << "Error: cannot find the GENO_FILE '" << GENO_FILE << "'." << endl;   
		foutLog.close();
		return 0;
	}
 	time ( &rawtime );
  	timeinfo = localtime ( &rawtime );
  	cout << endl << asctime (timeinfo);	
	cout << "Reading reference genotypes ..." << endl;
  	foutLog << endl << asctime (timeinfo);
	foutLog << "Reading reference genotypes ..." << endl;
	for(i=0; i<GENO_NON_DATA_ROWS; i++){
		getline(fin, str);          // Read non-data rows
	}
	for(i=0; i<REF_INDS; i++){
		fin >> RefInfo1[i] >> RefInfo2[i];
		for(j=2; j<GENO_NON_DATA_COLS; j++){
			fin >> str;
		}
		for(j=0; j<LOCI; j++){
			fin >> RefG(i,j);    // Read genotype data
		}
	}
	if(!fin.good()){
		fin.close();
		cerr << "Error: ifstream error occurs when reading the GENO_FILE." << endl;
		cerr << "Run 'laser -fmt 2' to check the GENO_FILE '" << GENO_FILE << "'." << endl;
		foutLog << "Error: ifstream error occurs when reading the GENO_FILE." << endl;
		foutLog << "Run 'laser -fmt 2' to check the GENO_FILE '" << GENO_FILE << "'." << endl;
		foutLog.close();
		return 0;
	}		
	fin.close();
	//========================= Get reference coordinates  ==========================
	time ( &rawtime );
	timeinfo = localtime ( &rawtime );
	if(COORD_FILE.compare(default_str)!=0 && PCA_MODE==0){		
		fin.open(COORD_FILE.c_str());
		if(fin.fail()){
			cerr << "Error: cannot find the COORD_FILE '" << COORD_FILE << "'." << endl;
			foutLog << "Error: cannot find the COORD_FILE '" << COORD_FILE << "'." << endl;  
			foutLog.close();
			return 0;
		}else{
			cout << endl << asctime (timeinfo);
			cout << "Reading reference PCA coordinates ..." << endl;
			foutLog << endl << asctime (timeinfo);
			foutLog << "Reading reference PCA coordinates ..." << endl;
			for(i=0; i<COORD_NON_DATA_ROWS; i++){
				getline(fin, str);          // Read non-data rows
			}
			for(i=0; i<REF_INDS; i++)
			{
				string popstr;
				string indstr;
				fin >> popstr >> indstr;
				if(popstr.compare(RefInfo1[i])!=0 || indstr.compare(RefInfo2[i])!=0){
					cerr << "Error: ID of individual " << i+1 << " in the COORD_FILE differs from that in the GENO_FILE." << endl;
					foutLog << "Error: ID of individual " << i+1 << " in the COORD_FILE differs from that in the GENO_FILE." << endl;
					fin.close();
					foutLog.close();
					return 0;
				}
				for(j=2; j<COORD_NON_DATA_COLS; j++){
					fin >> str;
				}
				for(j=0; j<DIM; j++){
					fin >> refPC(i,j);    // Read reference coordiantes
				}
				getline(fin, str);  // Read the rest of the line 
			}
		}
		if(!fin.good()){
			fin.close();
			cerr << "Error: ifstream error occurs when reading the COORD_FILE." << endl;
			cerr << "Run 'laser -fmt 4' to check the COORD_FILE '" << COORD_FILE << "'." << endl;
			foutLog << "Error: ifstream error occurs when reading the COORD_FILE." << endl;
			foutLog << "Run 'laser -fmt 4' to check the COORD_FILE '" << COORD_FILE << "'." << endl;
			foutLog.close();
			return 0;
		}
		fin.close();
	}else{                
		cout << endl << asctime (timeinfo);
		cout << "Performing PCA on reference genotypes ..." << endl;
		foutLog << endl << asctime (timeinfo);
		foutLog << "Performing PCA on reference genotypes ..." << endl;	
		rowvec PCvar = zeros<rowvec>(DIM);
		if(PCA_MODE==0){
			pca(RefG, DIM, refPC, PCvar);   // Perform PCA
		}else{
			pca_only(RefG, DIM, refPC, PCvar); 
		}
		//==================== Output reference PCs ==========================
		outfile = OUT_PREFIX;
		outfile.append(".RefPC.coord");
		fout.open(outfile.c_str());
		if(fout.fail()){
			cerr << "Error: cannot create a file named " << outfile << "." << endl;
			foutLog << "Error: cannot create a file named " << outfile << "." << endl;  
			foutLog.close();
			return 0;
		}		
		fout << "popID\tindivID\t";
		for(j=0; j<DIM; j++){ fout << "PC" << j+1 << "\t"; }
		fout << endl;
		for(i=0; i<REF_INDS; i++){
			fout << RefInfo1[i] << "\t" << RefInfo2[i] << "\t";
			for(j=0; j<DIM; j++){
				fout << refPC(i,j) << "\t";
			}
			fout << endl;	
		}		
		fout.close();
		cout << "Reference PCA coordinates are output to '" << outfile << "'." << endl;
		foutLog << "Reference PCA coordinates are output to '" << outfile << "'." << endl;
		//==================================================================
		outfile = OUT_PREFIX;
		outfile.append(".RefPC.var");
		fout.open(outfile.c_str());
		if(fout.fail()){
			cerr << "Error: cannot create a file named " << outfile << "." << endl;
			foutLog << "Error: cannot create a file named " << outfile << "." << endl;  
			foutLog.close();
			return 0;
		}
		fout << "PC" << "\t" << "Variance(%)" << endl;
		for(j=0; j<DIM; j++){ 
			fout << j+1 << "\t" << PCvar(j) << endl;
		}	
		fout.close();
		PCvar.clear();
		cout << "Variances explained by PCs are output to '" << outfile << "'." << endl;
		foutLog << "Variances explained by PCs are output to '" << outfile << "'." << endl;		
		//===================================================================
		if(PCA_MODE == 1){                                 // If performing PCA only
 	 		delete [] RefInfo1;
			delete [] RefInfo2;
			time ( &rawtime );
 		 	timeinfo = localtime ( &rawtime );
			t2 = clock();
			runningtime = (t2-t1)/CLOCKS_PER_SEC;
			cout << endl << "Finished at: " << asctime (timeinfo);
			cout << "Total running time: " << runningtime << " seconds." << endl;
			cout << "====================================================================" <<endl;
			foutLog << endl << "Finished at: " << asctime (timeinfo);
			foutLog << "Total running time: " << runningtime << " seconds." << endl;
			foutLog << "====================================================================" <<endl;
			foutLog.close();
			return 1;	
		}
	}
	delete [] RefInfo1;
	delete [] RefInfo2;
	
	//=============== Add columns of 0 to refPC to match DIM_SEQ ===============
	if(DIM<DIM_SEQ){
		k = DIM_SEQ-DIM;
		mat X = zeros<mat>(REF_INDS, k);
		refPC.insert_cols(DIM, X);
		X.clear();
	}				
	//=========================================================================
	//========================= Read sequence data ==========================
		
	fin.open(SEQ_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the SEQ_FILE '"<< SEQ_FILE <<"'." << endl;
		foutLog << "Error: cannot find the SEQ_FILE '"<< SEQ_FILE <<"'." << endl;
		foutLog.close();       
		return 0;
	}
	//==== Open output file ====
	outfile = OUT_PREFIX;
	outfile.append(".SeqPC.coord");
	fout.open(outfile.c_str());
	if(fout.fail()){
		cerr << "Error: cannot create a file named " << outfile << "." << endl;
		foutLog << "Error: cannot create a file named " << outfile << "." << endl;
		foutLog.close();       
		return 0;
	}
	fout << "popID\t" << "indivID\t" << "L1\t" << "Ci\t" << "t\t";
	for(j=0; j<DIM; j++){ fout << "PC" << j+1 << "\t"; }
	fout << endl;

	if(REPS>1){
		outfile2 = outfile;
		outfile2.append(".sd");
		fout2.open(outfile2.c_str());
		if(fout2.fail()){
			cerr << "Error: cannot create a file named " << outfile2 << "." << endl;
			foutLog << "Error: cannot create a file named " << outfile2 << "." << endl;
			foutLog.close(); 
			fout.close();      
			return 0;
		}	
		fout2 << "popID\t" << "indivID\t" << "t.sd ";
		for(j=0; j<DIM; j++){
			fout2 << "PC" << j+1 << ".sd\t";
		}
		fout2 << endl;
		if(OUTPUT_REPS==1){
			outfile3 = outfile;
			outfile3.append(".reps");
			fout3.open(outfile3.c_str());
			if(fout3.fail()){
				cerr << "Error: cannot create a file named " << outfile3 << "." << endl;
				foutLog << "Error: cannot create a file named " << outfile3 << "." << endl;
				foutLog.close(); 
				fout.close();
				fout2.close();      
				return 0;
			}	
			fout3 << "popID\t" << "indivID\t" << "L1\t" << "Ci\t" << "t\t";
			for(j=0; j<DIM; j++){
				fout3 << "PC" << j+1 << "\t";
			}
			fout3 << endl;
		}
	}
	//==========================
 	time ( &rawtime );
  	timeinfo = localtime ( &rawtime );
  	cout << endl << asctime (timeinfo);
	cout << "Analyzing sequence samples ..." << endl;
  	foutLog << endl << asctime (timeinfo);	
	foutLog << "Analyzing sequence samples ..." << endl;
	for(i=0; i<SEQ_NON_DATA_ROWS; i++){
		getline(fin, str);          // Read non-data rows
	}
	for(i=1; i<=LAST_IND; i++){
		if(i<FIRST_IND){
			getline(fin, str);
		}else{
			string SeqInfo1;
			string SeqInfo2;
			rowvec C(LOCI);    // Coverage of one sample
			rowvec S(LOCI);    // Sequence read of one sample
			int Lcov = 0;     // Number of loci with nonzero coverage			
			fin >> SeqInfo1 >> SeqInfo2;
			for(j=2; j<SEQ_NON_DATA_COLS; j++){
				fin >> str;
			}
			for(j=0; j<LOCI; j++){
				fin >> C(j) >> S(j);
				if(C(j)<0 || S(j)<0 || S(j)>C(j)){
					if(!fin.good()){
						fin.close();
						cerr << "Error: ifstream error occurs when reading the SEQ_FILE." << endl;
						cerr << "Run 'laser -fmt 3' to check the SEQ_FILE '" << SEQ_FILE << "'." << endl;
						foutLog << "Error: ifstream error occurs when reading the SEQ_FILE." << endl;
						foutLog << "Run 'laser -fmt 3' to check the SEQ_FILE '" << SEQ_FILE << "'." << endl;
						foutLog.close();
						return 1;
					}else{
						fin.close();
						cerr << "Error: invalid value at locus "<< j+1 << " of individual " << i << " in the SEQ_FILE." << endl;
						foutLog << "Error: invalid value at locus "<< j+1 << " of individual " << i << " in the SEQ_FILE." << endl;
						foutLog.close();       
						return 1;
					}
				}else if(C(j)>0){
					Lcov++;
				}
			}
			frowvec Cc(Lcov);
			frowvec Sc(Lcov);
			urowvec Loc(Lcov);
			k=0;
			double meanC = 0;
			for(j=0; j<LOCI; j++){
				if(C(j)>0){
					Cc(k)=C(j);
					Sc(k)=S(j);
					Loc(k)=j;
					k++;
					meanC += C(j);
				}
			}
			meanC = meanC/LOCI;
			C.clear();
			S.clear();
			if(Lcov >= MIN_LOCI){
				double t_m1 = 0;
				double t_m2 = 0;
				rowvec rotPC_m1 = zeros<rowvec>(DIM);
				rowvec rotPC_m2 = zeros<rowvec>(DIM);
				for(int rep=0; rep<REPS; rep++){
					//=================== Simulate sequence reads ======================
					int random_seed = time(NULL)+rep+i+Lcov;
					fmat SS(REF_INDS, Lcov);
					simuseq(RefG, Cc, Loc, SEQ_ERR, SS, random_seed);  
					SS.insert_rows(REF_INDS, Sc);
					//=================== Perform PCA =================================	
					rowvec PCvar = zeros<rowvec>(DIM_SEQ);
					mat simuPC(REF_INDS+1, DIM_SEQ);
					pca_only(SS, DIM_SEQ, simuPC, PCvar);
					SS.clear();
					rowvec PC_one = zeros<rowvec>(DIM_SEQ);
					for(j=0; j<DIM_SEQ; j++){
						PC_one(j) = simuPC(REF_INDS, j);
					}
					simuPC.shed_row(REF_INDS);
					//=================  Procrustes Analysis =======================
					mat simuPC_rot(REF_INDS, DIM_SEQ);
					double t;
					double rho;
					mat A(DIM_SEQ, DIM_SEQ);
					rowvec b(DIM_SEQ);
					procrustes(simuPC, refPC, simuPC_rot, t, rho, A, b);
					rowvec rotPC_one = rho*PC_one*A+b;
					simuPC.clear();
					//================= Output Procrustes Results for one repated run ===================	
					if(REPS == 1){
						fout << SeqInfo1 << "\t" << SeqInfo2 << "\t" << Lcov << "\t" << meanC << "\t" << t << "\t"; 
						for(j=0; j<DIM; j++){ fout << rotPC_one(j) << "\t"; }
						fout << endl;
					}else if(REPS > 1){
						if(OUTPUT_REPS == 1){
							fout3 << SeqInfo1 << "\t" << SeqInfo2 << "\t" << Lcov << "\t" << meanC << "\t" << t << "\t"; 
							for(j=0; j<DIM; j++){ fout3 << rotPC_one(j) << "\t"; }
							fout3 << endl;
						}	
						t_m1 += t;
						t_m2 += pow(t,2);
						rotPC_m1 = rotPC_m1 + rotPC_one;
						rotPC_m2 = rotPC_m2 + rotPC_one%rotPC_one;
					}
				}
				Cc.clear();	
				Sc.clear();
				Loc.clear();		
				//================= Output Procrustes Results ===================
				if(REPS>1){
					// calculate mean and sd
					double t_mean = t_m1/REPS;
					double t_sd = sqrt((t_m2-REPS*pow(t_mean,2))/(REPS-1));
					rowvec rotPC_mean = rotPC_m1/REPS;
					rowvec rotPC_sd = sqrt((rotPC_m2-REPS*rotPC_mean%rotPC_mean)/(REPS-1));					
					// output mean values of results
					fout << SeqInfo1 << "\t" << SeqInfo2 << "\t" << Lcov << "\t" << meanC << "\t" << t_mean << "\t"; 
					for(j=0; j<DIM; j++){
						fout << rotPC_mean(j) << "\t";
					}
					fout << endl;
					// output sd values of results
					fout2 << SeqInfo1 << "\t" << SeqInfo2 << "\t" << t_sd << "\t"; 
					for(j=0; j<DIM; j++){
						fout2 << rotPC_sd(j) << "\t";
					}
					fout2 << endl;
					// declare vectors
					rotPC_mean.clear();
					rotPC_sd.clear();
				}
				rotPC_m1.clear();
				rotPC_m2.clear();
			}else{
				//Too few number of loci covered. Skip computation and output "NA".
				cout << "Warning: skipping sample "<< SeqInfo2 << " (# covered loci < MIN_LOCI)." << endl;
				foutLog << "Warning: skipping sample "<< SeqInfo2 << " (# covered loci < MIN_LOCI)." << endl;
				fout << SeqInfo1 << "\t" << SeqInfo2 << "\t" << Lcov << "\t" << meanC << "\t" << "NA" << "\t"; 
				for(j=0; j<DIM; j++){
					fout << "NA" << "\t";
				}
				fout << endl;
				if(REPS>1){
					fout2 << SeqInfo1 << "\t" << SeqInfo2 << "\t" << "NA" << "\t"; 
					for(j=0; j<DIM; j++){ fout2 << "NA" << "\t"; }
					fout2 << endl;
					if(OUTPUT_REPS==1){
						for(int rep=0; rep<REPS; rep++){
							fout3 << SeqInfo1 << "\t" << SeqInfo2 << "\t" << Lcov << "\t" << meanC << "\t" << "NA" << "\t"; 
							for(j=0; j<DIM; j++){ fout3 << "NA" << "\t"; }
							fout3 << endl;
						}
					}
				}
			}		
			if(i%50==0){	
				cout << "Progress: finish analysis of individual " << i << "." << endl;
				foutLog << "Progress: finish analysis of individual " << i << "." << endl;			
			}
		}
	}
	if(!fin.good()){
		fin.close();
		fout.close();
		if(REPS>1){
			fout2.close();
			if(OUTPUT_REPS==1){
				fout3.close();
			}
		}	
		cerr << "Error: ifstream error occurs when reading the SEQ_FILE." << endl;
		cerr << "Run 'laser -fmt 3' to check the SEQ_FILE '" << SEQ_FILE << "'." << endl;
		foutLog << "Error: ifstream error occurs when reading the SEQ_FILE." << endl;
		foutLog << "Run 'laser -fmt 3' to check the SEQ_FILE '" << SEQ_FILE << "'." << endl;
		foutLog.close();
		return 0;
	}
	fin.close();
	fout.close();
	if(REPS>1){
		fout2.close();
		cout << "Results for the sequence samples are output to:" << endl; 
		cout << "'" << outfile << "' (mean across " << REPS << " repeated runs)" << endl;
		cout << "'" << outfile2 << "' (standard deviation across " << REPS << " repeated runs)" << endl;
		foutLog << "Results for the sequence samples are output to:" << endl; 
		foutLog << "'" << outfile << "' (mean across " << REPS << " repeated runs)" << endl;
		foutLog << "'" << outfile2 << "' (standard deviation across " << REPS << " repeated runs)" << endl;
		if(OUTPUT_REPS==1){
			fout3.close();	
			cout << "'" << outfile3 << "' (results from all " << REPS << " repeated runs)" << endl;	
			foutLog << "'" << outfile3 << "' (results from all " << REPS << " repeated runs)" << endl;	
		}
	}else{
		cout << "Results for the sequence samples are output to '" << outfile << "'." << endl;
		foutLog << "Results for the sequence samples are output to '" << outfile << "'." << endl;
	}

	//#########################################################################	
	time ( &rawtime );
 	timeinfo = localtime ( &rawtime );
	t2 = clock();
	runningtime = (t2-t1)/CLOCKS_PER_SEC;
	cout << endl << "Finished at: " << asctime (timeinfo);
	cout << "Total running time: " << runningtime << " seconds." << endl;
	cout << "====================================================================" <<endl;
	foutLog << endl << "Finished at: " << asctime (timeinfo);
	foutLog << "Total running time: " << runningtime << " seconds." << endl;
	foutLog << "====================================================================" <<endl;
	foutLog.close();	
	return 1;
}
//##########################################################################################################
bool parse_cmd_line(int argc, char* argv[], map<string,string> &args, map<string,int> &argi, map<string,double> &argd){
	bool flag=1;
	//Populate with default values
	args[ARG_PARAM_FILE] = default_str;
	args[ARG_GENO_FILE] = default_str;
	args[ARG_SEQ_FILE] = default_str;
	args[ARG_COORD_FILE] = default_str;
 	argi[ARG_DIM] = default_int;
	args[ARG_OUT_PREFIX] = default_str;
	argi[ARG_MIN_LOCI] = default_int;
 	argd[ARG_SEQ_ERR] = default_double;
	argi[ARG_FIRST_IND] = default_int;	
	argi[ARG_LAST_IND] = default_int;
	argi[ARG_REPS] = default_int;
	argi[ARG_OUTPUT_REPS] = default_int;
	argi[ARG_CHECK_COVERAGE] = default_int;
	argi[ARG_CHECK_FORMAT] = default_int;	
	argi[ARG_PCA_MODE] = default_int;
	argi[ARG_DIM_SEQ] = default_int;
	for(int i = 1; i < argc-1; i++){
		if(args.count(argv[i]) > 0){
	  		args[argv[i]] = argv[i+1];
			i++;
		}else if(argi.count(argv[i]) > 0){
			if(is_int(argv[i+1])){
	  			argi[argv[i]] = atoi(argv[i+1]);
				i++;
			}else{
				cerr <<"Error: "<<"invalid value for "<<argv[i]<<"." << endl;
				foutLog <<"Error: "<<"invalid value for "<<argv[i]<<"." << endl;
				flag=0;
			}
		}else if(argd.count(argv[i]) > 0){
			if(is_numeric(argv[i+1])){
	  			argd[argv[i]] = atof(argv[i+1]);
				i++;
			}else{
				cerr <<"Error: "<<"invalid value for "<<argv[i]<<"." << endl;
				foutLog <<"Error: "<<"invalid value for "<<argv[i]<<"." << endl;
				flag=0;
			}
		}else{
			cerr << "Error: " << argv[i] << " is not recognized as a valid argument." << endl;
			foutLog << "Error: " << argv[i] << " is not recognized as a valid argument." << endl;
			flag=0;
		}
	}
	return flag;
}
//############## Check if a string is an integer #######################
bool is_int(string str){
	bool flag=1;
	for(int i=0; i<str.length(); i++){
		if( str[i] < '0' || str[i] > '9' ){
			flag=0;         // not an integer number
		}
	}
	return flag;
}
//############## Check if a string is a number #######################
bool is_numeric(string str){
	bool flag=1;
	bool dot_flag=0;
	for(int i=0; i<str.length(); i++){
		if( str[i] < '0' || str[i] > '9' ){
			if(str[i] == '.' && dot_flag==0){
				dot_flag = 1;
			}else{
				flag = 0;
			}
		}
	}
	return flag;
}
//######################### Procrustes Analysis ##########################
int procrustes(mat &X, mat &Y, mat &Xnew, double &t, double &rho, mat &A, rowvec &b){
	int NUM = X.n_rows;
	//======================= Center to mean =======================
	mat Xm = mean(X);
	mat Ym = mean(Y);
	mat Xc = X-repmat(Xm, NUM, 1);
	mat Yc = Y-repmat(Ym, NUM, 1);
	//======================  SVD =====================
	mat C = Yc.t()*Xc;
	mat U;
	vec s;
	mat V;
	bool bflag = svd(U, s, V, C, "dc");	// use "divide & conquer" algorithm
	//bool bflag = svd(U, s, V, C);
	if(!bflag){
		cout << "Error: singular value decomposition in procrustes() fails." << endl;
		return 0;
	}
	//===================== Transformation ===================
	double trXX = trace(Xc.t()*Xc);
	double trYY = trace(Yc.t()*Yc);
	double trS = sum(s);
	A = V*U.t(); 
	rho = trS/trXX;
	b = Ym-rho*Xm*A;
	double d = trYY-pow(trS,2)/trXX;
	double D = 1-pow(trS,2)/(trXX*trYY);
	t = sqrt(1-D);
	Xnew = rho*Xc*A+repmat(b, NUM, 1);
	return 1;
}
//#########################     PCA      ##########################
int pca(fmat G, int nPCs, mat &PC, rowvec &PCvar){
	int i=0;
	int j=0;
	int k=0;
	int N = G.n_rows;
	int L = G.n_cols;
	//========================= Normalization and Covariance =======================		
	for(j=0; j<L; j++){
		double X1=0;
		double X2=0;
		double NM=0;
		for(i=0; i<N; i++){
			if(G(i,j) != -9){
				X1 += G(i,j);
				X2 += pow(G(i,j),2);
				NM++;
			}
		}			
		double colM=X1/NM;
		double colSD=sqrt((X2-NM*pow(colM,2))/(NM-1));
		if(colSD == 0){           // Monophmorphic sites are set to 0
			for(i=0; i<N; i++){ G(i,j) = 0; }
		}else{
			for(i=0; i<N; i++){
				if(G(i,j) != -9){  // Missing data are set to 0
					G(i,j) = (G(i,j)-colM)/colSD;
				}else{
					G(i,j) = 0;
				}
			}
		}			
	}
	fmat tmpM = zeros<fmat>(N, N);
	tmpM = G*G.t();
	mat M = conv_to<mat>::from(tmpM);
	tmpM.clear();
	vec eigval;
	mat eigvec;
	bool bflag = eig_sym(eigval, eigvec, M, "dc");	// use "divide & conquer" algorithm
	//bool bflag = eig_sym(eigval, eigvec, M);
	M.clear();
	if(!bflag){
		eigval.clear();
		eigvec.clear();
		cout << "Error: eigen decomposition in pca() fails." << endl;
		return 0;
	}
	double eigsum = sum(eigval);
	vec propvar = eigval/eigsum*100;	
	PCvar = zeros<rowvec>(nPCs);
	for(j=0; j<nPCs; j++){
		if(eigval(N-1-j)<0){
			PCvar(j) = 0;
			for(i=0; i<N; i++){
				PC(i,j) = 0;
			}
		}else{
			PCvar(j) = propvar(N-1-j);
			for(i=0; i<N; i++){
				PC(i,j) = eigvec(i, N-1-j)*sqrt(eigval(N-1-j));
			}	
		}
	}
	return 1;
}
//#########################     PCA ONLY     ##########################
int pca_only(fmat &G, int nPCs, mat &PC, rowvec &PCvar){
	int i=0;
	int j=0;
	int k=0;
	int N = G.n_rows;
	int L = G.n_cols;
	//========================= Normalization and Covariance =======================		
	for(j=0; j<L; j++){
		double X1=0;
		double X2=0;
		double NM=0;
		for(i=0; i<N; i++){
			if(G(i,j) != -9){
				X1 += G(i,j);
				X2 += pow(G(i,j),2);
				NM++;
			}
		}			
		double colM=X1/NM;
		double colSD=sqrt((X2-NM*pow(colM,2))/(NM-1));
		if(colSD == 0){           // Monophmorphic sites are set to 0
			for(i=0; i<N; i++){ G(i,j) = 0; }
		}else{
			for(i=0; i<N; i++){
				if(G(i,j) != -9){  // Missing data are set to 0
					G(i,j) = (G(i,j)-colM)/colSD;
				}else{
					G(i,j) = 0;
				}
			}
		}			
	}
	// Use eigen decomposition to get PCA results
	fmat tmpM = zeros<fmat>(N, N);
	tmpM = G*G.t();
	mat M = conv_to<mat>::from(tmpM);
	tmpM.clear();
	vec eigval;
	mat eigvec;
	bool bflag = eig_sym(eigval, eigvec, M, "dc");	// use "divide & conquer" algorithm
	//bool bflag = eig_sym(eigval, eigvec, M);
	M.clear();
	if(!bflag){
		eigval.clear();
		eigvec.clear();
		cout << "Error: eigen decomposition in pca() fails." << endl;
		foutLog << "Error: eigen decomposition in pca() fails." << endl;
		return 0;
	}
	double eigsum = sum(eigval);
	vec propvar = eigval/eigsum*100;	
	PCvar = zeros<rowvec>(nPCs);
	for(j=0; j<nPCs; j++){
		if(eigval(N-1-j)<0){
			PCvar(j) = 0;
			for(i=0; i<N; i++){
				PC(i,j) = 0;
			}
		}else{
			PCvar(j) = propvar(N-1-j);
			for(i=0; i<N; i++){
				PC(i,j) = eigvec(i, N-1-j)*sqrt(eigval(N-1-j));
			}	
		}
	}
	return 1;
}
//################### Simulate sequence reads from genotypes ##########################
int simuseq(fmat &G, frowvec &C, urowvec &Loc, double e, fmat &S, int random_seed){
	// This function simulates sequence reads S from genotypes G
	// Loc is a list of loci to be simulated
	// Coverage C has to be greater than 0;
	// e is the sequencing error rate per read;
	gsl_rng *rng;
	rng = gsl_rng_alloc(gsl_rng_taus);
	gsl_rng_set(rng, random_seed);

	int N = G.n_rows;
	int L = C.n_cols;
	double P[3];
	P[0] = e;
	P[1] = 0.5;
	P[2] = 1-e;

	for(int i=0; i<N; i++){
		for(int j=0; j<L; j++){
			if(G(i,Loc(j))==-9){
				S(i,j) = -9;
			}else{
				if(P[int(G(i,Loc(j)))]>0 && P[int(G(i,Loc(j)))]<1){
					S(i,j) = gsl_ran_binomial(rng, P[int(G(i,Loc(j)))], int(C(j)));
				}else if(P[int(G(i,Loc(j)))]==0){
					S(i,j) = 0;
				}else{
					S(i,j) = C(j);
				}
			}
		}
	}

	gsl_rng_free(rng);
	return 1;
}
//################### Check the average coverage per sample and per locus ####################
int check_coverage(){	
	int i=0;
	int j=0;
	string str;
	ifstream fin;
	ofstream fout;
	fin.open(SEQ_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the SEQ_FILE '" << SEQ_FILE << "'." << endl;
		foutLog << "Error: cannot find the SEQ_FILE '" << SEQ_FILE << "'." << endl;         
		return 0;
	}
	for(i=0; i<SEQ_NON_DATA_ROWS; i++){
		getline(fin, str);          // Read non-data rows
	}
	vec C_loc = zeros<vec>(LOCI);	// average coverage per marker
	vec Ncov = zeros<vec>(LOCI);   // number of samples with non-zero coverage
	string outfile = OUT_PREFIX;
	outfile.append(".ind.cov");
	fout.open(outfile.c_str());
	if(fout.fail()){
		cerr << "Error: cannot create a file named " << outfile << "." << endl;
		return 0;
	}
	fout << "popID" << "\t" << "indivID" << "\t" << "L1" << "\t"  << "Ci" << endl;
	for(i=0; i<SEQ_INDS; i++)
	{
		string SeqInfo1;
		string SeqInfo2;
		double S;    // Reference
		double C;    // Coverage
		int tmpL=LOCI;
		int Lcov=0;      // number of markers with non-zero coverage
		double C_ind=0;  // average coverage per sample		
		fin >> SeqInfo1 >> SeqInfo2;
		for(j=2; j<SEQ_NON_DATA_COLS; j++){
			fin >> str;
		}
		for(j=0; j<LOCI; j++){
			fin >> C >> S;
			if(C>0){
				Lcov++;
				C_ind += C;
				Ncov(j)++;
				C_loc(j) += C;
			}
		}
		C_ind = C_ind/LOCI;
		fout << SeqInfo1 << "\t" << SeqInfo2 << "\t" << Lcov << "\t" << C_ind << endl;
	}
	fin.close();
	fout.close();
	cout << "Results of the mean coverage per individual are output to '" << outfile << "'." << endl;
	foutLog << "Results of the mean coverage per individual are output to '" << outfile << "'." << endl;

	C_loc = C_loc/SEQ_INDS;

	outfile = OUT_PREFIX;
	outfile.append(".loc.cov");
	fout.open(outfile.c_str());
	if(fout.fail()){
		cerr << "Error: cannot create a file named " << outfile << "." << endl;
		return 0;
	}
	fout << "Locus" << "\t" << "N1" << "\t"  << "Cl" << endl;
	for(j=0; j<LOCI; j++){
		fout << j+1 << "\t" << Ncov(j) << "\t" << C_loc(j) << endl;
	}

	fout.close();
	cout << "Results of the mean coverage per locus are output to '" << outfile << "'." << endl;
	foutLog << "Results of the mean coverage per locus are output to '" << outfile << "'." << endl;

	Ncov.clear();
	C_loc.clear();

	return 1;
}
//################# Function to check the GENO_FILE format  ##################
int check_format_geno(){
	string str;
	int nrow = 0;
	int ncol = 0;
	ifstream fin;
	fin.open(GENO_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the GENO_FILE '" << GENO_FILE << "'." << endl;    
		foutLog << "Error: cannot find the GENO_FILE '" << GENO_FILE << "'." << endl;   
		return 0;
	}
	//==========================================================		
	while(nrow < GENO_NON_DATA_ROWS){
		getline(fin, str);     // Read in non-data rows
		nrow+=1;
	}
	while(!fin.eof()){
		getline(fin, str);
		if(str.length()>0){
			nrow+=1;
			ncol=0;
			bool tab=true;    //Previous character is a tab
			for(int i=0; i<str.length(); i++){
				bool missing=false;     
				if(str[i]!='\t' && i==0){        //Read in the first element
					ncol+=1;
					tab=false;
				}else if(str[i]!='\t' && i>0 && tab){
					ncol+=1;
					tab=false;
					if(ncol>GENO_NON_DATA_COLS && (str[i]!='0' && str[i]!='1'&& str[i]!='2')){
						if(i<(str.length()-2)){
							if(str[i]=='-'&&str[i+1]=='9'&&str[i+2]=='\t'){
								missing = true;
							}
						}else if(i==(str.length()-2)){
							if(str[i]=='-'&&str[i+1]=='9'){
								missing = true;
							}
						}
						if(missing == false){
							cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the GENO_FILE."<<endl;
							foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the GENO_FILE."<<endl;
							fin.close();
							return 0;
						}
					}
				}else if(str[i]!='\t' && i>0 && !tab){
					if(ncol>GENO_NON_DATA_COLS){
						if(str[i-1]!='-' || str[i]!='9'){
							cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the GENO_FILE."<<endl;
							foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the GENO_FILE."<<endl;
							fin.close();
							return 0;
						}
					}
				}else if(str[i]=='\t'){
					tab=true;
				}
			}
			if(ncol!=(LOCI+GENO_NON_DATA_COLS)){
				cerr << "Error: incorrect number of loci in row " << nrow << " in the GENO_FILE." <<endl;
				foutLog << "Error: incorrect number of loci in row "<< nrow << " in the GENO_FILE." <<endl;
				fin.close();
				return 0;
			}
		}
	}
	if(nrow!=(REF_INDS+GENO_NON_DATA_ROWS)){
		cerr << "Error: incorrect number of individuals in the GENO_FILE." << endl;
		foutLog << "Error: incorrect number of individuals in the GENO_FILE." << endl;
		fin.close();
		return 0;
	}
	fin.close();
	return 1;
}
//################# Function to check the SEQ_FILE format  ##################
int check_format_seq(){
	string str;
	int nrow = 0;
	int ncol = 0;
	ifstream fin;
	fin.open(SEQ_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the SEQ_FILE '" << SEQ_FILE << "'." << endl;    
		foutLog << "Error: cannot find the SEQ_FILE '" << SEQ_FILE << "'." << endl;   
		return 0;
	}
	//==========================================================	
	while(nrow < SEQ_NON_DATA_ROWS){
		getline(fin, str);     // Read in non-data rows
		nrow+=1;
	}
	while(!fin.eof()){
		getline(fin, str);
		if(str.length()>0){
			nrow+=1;
			ncol=0;
			bool tab=true;   //Previous character is a tab
			bool space=false;    // A space has been found after the last tab 
			for(int i=0; i<str.length(); i++){    
				if(str[i]!='\t' && i==0){        //Read in the first element
					ncol+=1;
					tab=false;
				}else if(str[i]!='\t' && i>0 && tab){
					ncol+=1;
					tab=false;
					if(ncol>SEQ_NON_DATA_COLS && (str[i]<'0' || str[i]>'9')){
						cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the SEQ_FILE."<<endl;
						foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the SEQ_FILE."<<endl;
						fin.close();
						return 0;
					}
				}else if(str[i]!='\t' && i>0 && !tab){
					if(ncol>SEQ_NON_DATA_COLS && (str[i]<'0' || str[i]>'9')){
						if(str[i]==' ' && space==false){
							space=true;
						}else{
							cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the SEQ_FILE."<<endl;
							foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the SEQ_FILE."<<endl;
							fin.close();
							return 0;
						}
					}
				}else if(str[i]=='\t'){
					if(ncol>SEQ_NON_DATA_COLS && (space==false || str[i-1]==' ')){
						cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the SEQ_FILE."<<endl;
						foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the SEQ_FILE."<<endl;
						fin.close();
						return 0;
					}else{
						tab=true;
						space=false;
					}
				}
			}
			if(ncol!=(LOCI+SEQ_NON_DATA_COLS)){
				cerr << "Error: incorrect number of loci in row " << nrow << " in the SEQ_FILE." <<endl;
				foutLog << "Error: incorrect number of loci in row " << nrow << " in the SEQ_FILE." <<endl;
				fin.close();
				return 0;
			}
		}
	}
	if(nrow!=(SEQ_INDS+SEQ_NON_DATA_ROWS)){
		cerr << "Error: incorrect number of individuals in the SEQ_FILE." << endl;
		foutLog << "Error: incorrect number of individuals in the SEQ_FILE." << endl;
		fin.close();
		return 0;
	}
	fin.close();
	return 1;
}
//################# Function to check the COORD_FILE format  ##################
int check_format_coord(){
	string str;
	int nrow = 0;
	int ncol = 0;
	ifstream fin;
	fin.open(COORD_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the COORD_FILE '" << COORD_FILE << "'." << endl;    
		foutLog << "Error: cannot find the COORD_FILE '" << COORD_FILE << "'." << endl;   
		return 0;
	}
	//==========================================================	
	while(nrow < COORD_NON_DATA_ROWS){
		getline(fin, str);     // Read in non-data rows
		nrow+=1;
	}
	while(!fin.eof()){
		getline(fin, str);
		if(str.length()>0){
			nrow+=1;
			ncol=0;
			bool tab=true;   //Previous character is a tab
			bool dot=false;    // A dot has been found after the last space 
			bool dash=false;
			bool exp=false;
			for(int i=0; i<str.length(); i++){    
				if(str[i]!='\t' && i==0){        //Read in the first element
					ncol+=1;
					tab=false;
				}else if(str[i]!='\t' && i>0 && tab){
					ncol+=1;
					tab=false;
					if(ncol>COORD_NON_DATA_COLS && (str[i]<'0' || str[i]>'9') && str[i]!='-'){
						cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the COORD_FILE."<<endl;
						foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the COORD_FILE."<<endl;
						fin.close();
						return 0;
					}
				}else if(str[i]!='\t' && i>0 && !tab){
					if(ncol>COORD_NON_DATA_COLS && (str[i]<'0' || str[i]>'9')){
						if(str[i]=='.' && dot==false){
							dot=true;
						}else if(str[i]=='-' && (str[i-1]=='e' || str[i-1]=='E') && dash==false){
							dash=true;
						}else if(str[i]=='e' || str[i]=='E' && exp==false){
							exp=true;
						}else{
							cerr<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the COORD_FILE."<<endl;
							foutLog<<"Error: invalid value in (row "<<nrow<<", column "<<ncol<<") in the COORD_FILE."<<endl;
							fin.close();
							return 0;
						}
					}
				}else if(str[i]=='\t'){
					tab=true;
					dot=false;
					dash=false;
					exp=false;
				}
			}
			if(ncol!=(NUM_PCS+COORD_NON_DATA_COLS)){
				cerr << "Error: incorrect number of PCs in row " << nrow << " in the COORD_FILE." <<endl;
				foutLog << "Error: incorrect number of PCs in row " << nrow << " in the COORD_FILE." <<endl;
				fin.close();
				return 0;
			}
		}
	}
	if(nrow!=(REF_INDS+COORD_NON_DATA_ROWS)){
		cerr << "Error: incorrect number of individuals in the COORD_FILE." << endl;
		foutLog << "Error: incorrect number of individuals in the COORD_FILE." << endl;
		fin.close();
		return 0;
	}
	fin.close();
	return 1;
}	
//################# Function to create an empty paramfile  ##################
int create_paramfile(){
	ofstream fout;
	fout.open("laser.conf");
	if(fout.fail()){
		cerr << "Error: cannot create a file named 'laser.conf'." << endl;
		return 0;
	}
	fout << "# This is a parameter file for LASER v1.01." <<endl;
	fout << "# The entire line after a '#' will be ignored." <<endl;
	fout << "\n" << "###----Main Parameters----###" <<endl;
	fout << endl << "GENO_FILE          # File name of the reference genotype data (inlcude path if in a different directory)" <<endl;
	fout << endl << "SEQ_FILE           # File name of the study sequence data (inlcude path if in a different directory)" <<endl;
	fout << endl << "COORD_FILE         # File name of the reference coordinates (inlcude path if in a different directory)" <<endl;
	fout << endl << "OUT_PREFIX         # Prefix of output files (inlcude path if output to a different directory, default \"laser\")" <<endl;
	fout << endl << "DIM                # Number of PCs to compute (must be a positive integer; default 2)" <<endl;
	fout << endl << "MIN_LOCI           # Minimum number of covered loci in a sample (must be a positive integer; default 100)" <<endl;
	fout << endl << "SEQ_ERR            # Sequencing error rate per base (must be a number between 0 and 1; default 0.01)" <<endl;

	fout << "\n\n" << "###----Advanced Parameters----###" <<endl;
	fout << endl << "FIRST_IND           # Index of the first sample to analyze (must be a positive integer; default 1)" <<endl;
	fout << endl << "LAST_IND            # Index of the last sample to analyze (must be a positive integer; default [last sample in the SEQ_FILE])" <<endl;
	fout << endl << "REPS                # Number of repeated runs in analyzing each sample (must be a positive integer; default 1)" <<endl;
	fout << endl << "OUTPUT_REPS         # Output results from each repeated run (must be 0 or 1; default 0)" <<endl;
	fout <<	        "                    # 0: Only output mean and standardard deviation across repeated runs" <<endl; 
	fout <<         "                    # 1: Also output results from each repeated run" <<endl;	
	fout << endl << "CHECK_FORMAT        # Check the format of input files (must be 0, 1, 2, 3, 4, 10, 20, 30, or 40; default 10)" <<endl; 
	fout <<	        "                    # 0: Do not check the format of input files, and proceed to major computation" <<endl; 
	fout <<         "                    # 1: Check the format of all files and stop;     10: Proceed after checking all files" <<endl;
	fout <<         "                    # 2: Check the format of GENO_FILE and stop;     20: Proceed after checking GENO_FILE" <<endl;
	fout <<         "                    # 3: Check the format of SEQ_FILE and stop;      30: Proceed after checking SEQ_FILE" <<endl;
	fout <<         "                    # 4: Check the format of COORD_FILE and stop;    40: Proceed after checking COORD_FILE" <<endl;
	fout << endl << "CHECK_COVERAGE      # Check the sequencing coverage (must be 0, 1, or 2; default 0)" <<endl; 
	fout <<	        "                    # 0: Do not check the coverage, and proceed to major computation" <<endl; 
	fout <<         "                    # 1: Check the coverage and proceed to major computation" <<endl;
	fout <<         "                    # 2: Check the coverage and stop" <<endl;
	fout << endl << "PCA_MODE            # Switch to the PCA mode (must be 0 or 1; default 0)" <<endl; 
	fout <<	        "                    # 0: Perform LASER to estimate ancestry from sequencing data" <<endl; 
	fout <<         "                    # 1: Use the PCA mode and only perform PCA on the reference genotype data" <<endl;

 	fout << "\n\n" << "###----Command line arguments----###" <<endl <<endl;
	fout << "# -p   parameterfile (this file)" << endl;
	fout << "# -g   GENO_FILE" <<endl;
	fout << "# -s   SEQ_FILE" <<endl;
	fout << "# -c   COORD_FILE" <<endl;
	fout << "# -o   OUT_PREFIX" <<endl;
	fout << "# -k   DIM" <<endl;
	fout << "# -l   MIN_LOCI" <<endl;
	fout << "# -e   SEQ_ERR" <<endl;
	fout << "# -x   FIRST_IND" <<endl;
	fout << "# -y   LAST_IND" <<endl;
	fout << "# -r   REPS" <<endl;
	fout << "# -R   OUTPUT_REPS" <<endl;
	fout << "# -fmt CHECK_FORMAT" <<endl;
	fout << "# -cov CHECK_COVERAGE" <<endl;
	fout << "# -pca PCA_MODE" <<endl;

	fout << "\n" << "###----End of file----###";
	fout.close();
	cout << "An empty template parameter file named 'laser.conf' has been created." << endl;
	foutLog << "An empty template parameter file named 'laser.conf' has been created." << endl;  
	return 1;
}		
//################# Function to read and check the paramfile  ##################
int read_paramfile(){	
	int flag = 1;
	ifstream fin;
	fin.open(PARAM_FILE.c_str());
	if(fin.fail()){
		cerr << "Error: cannot find the PARAM_FILE '" << PARAM_FILE << "'." << endl;
		foutLog << "Error: cannot find the PARAM_FILE '" << PARAM_FILE << "'." << endl;
		create_paramfile();
		return 0;
	}
	string str;
	while(!fin.eof()){
		fin>>str;
		if(str[0]=='#'){
			getline(fin, str);
		}else if(str.compare("GENO_FILE")==0){
			fin>>str;
			if(str[0]!='#'){
				if(GENO_FILE == default_str){
					GENO_FILE = str;
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("COORD_FILE")==0){
			fin>>str;
			if(str[0]!='#'){
				if(COORD_FILE == default_str){
					COORD_FILE = str;
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("SEQ_FILE")==0){
			fin>>str;
			if(str[0]!='#'){
				if(SEQ_FILE == default_str){
					SEQ_FILE = str;
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("OUT_PREFIX")==0){
			fin>>str;
			if(str[0]!='#'){
				if(OUT_PREFIX == default_str){
					OUT_PREFIX = str;
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("MIN_LOCI")==0){
			fin>>str;
			if(str[0]!='#'){
				if(is_int(str) && atoi(str.c_str())>0){
					if(MIN_LOCI == default_int){
						MIN_LOCI = atoi(str.c_str());
					}
				}else{
					if(MIN_LOCI != default_int){
						cerr<< "Warning: MIN_LOCI in the parameter file is not a positive integer." <<endl;
						foutLog<< "Warning: MIN_LOCI in the parameter file is not a positive integer." <<endl;
					}else{
						MIN_LOCI = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("DIM")==0){
			fin>>str;
			if(str[0]!='#'){
				if(is_int(str) && atoi(str.c_str())>0){
					if(DIM == default_int){
						DIM = atoi(str.c_str());
					}
				}else{
					if(DIM != default_int){
						cerr<< "Warning: DIM in the parameter file is not a positive integer." <<endl;
						foutLog<< "Warning: DIM in the parameter file is not a positive integer." <<endl;
					}else{
						DIM = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("SEQ_ERR")==0){
			fin>>str;
			if(str[0]!='#'){
				if(is_numeric(str) && atof(str.c_str())>=0 && atof(str.c_str())<=1){
					if(SEQ_ERR == default_double){
						SEQ_ERR = atof(str.c_str());
					}
				}else{
					if(SEQ_ERR != default_double){
						cerr<< "Warning: SEQ_ERR in the parameter file is not between 0 and 1." <<endl;
						foutLog<< "Warning: SEQ_ERR in the parameter file is not between 0 and 1." <<endl;
					}else{
						SEQ_ERR  = default_double-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("FIRST_IND")==0){
			fin>>str;
			if(str[0]!='#'){
				if(is_int(str) && atoi(str.c_str())>0){
					if(FIRST_IND == default_int){
						FIRST_IND = atoi(str.c_str());
					}
				}else{
					if(FIRST_IND != default_int){
						cerr<< "Warning: FIRST_IND in the parameter file is not a positive integer." <<endl;
						foutLog<< "Warning: FIRST_IND in the parameter file is not a positive integer." <<endl;
					}else{
						 FIRST_IND = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("LAST_IND")==0){
			fin>>str;
			if(str[0]!='#'){
				if(is_int(str) && atoi(str.c_str())>0){
					if(LAST_IND == default_int){
						LAST_IND = atoi(str.c_str());
					}
				}else{
					if(LAST_IND != default_int){
						cerr<< "Warning: LAST_IND in the parameter file is not a positive integer." <<endl;
						foutLog<< "Warning: LAST_IND in the parameter file is not a positive integer." <<endl;
					}else{
						 LAST_IND = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("REPS")==0){
			fin>>str;
			if(str[0]!='#'){
				if(is_int(str) && atoi(str.c_str())>0){
					if(REPS == default_int){
						REPS = atoi(str.c_str());
					}
				}else{
					if(REPS != default_int){
						cerr<< "Warning: REPS in the parameter file is not a positive integer." <<endl;
						foutLog<< "Warning: REPS in the parameter file is not a positive integer." <<endl;
					}else{
						 REPS = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("OUTPUT_REPS")==0){
			fin>>str;
			if(str[0]!='#'){
				if(str=="0" || str=="1"){
					if(OUTPUT_REPS == default_int){
						OUTPUT_REPS = atoi(str.c_str());
					}
				}else{
					if(OUTPUT_REPS != default_int){
						cerr<< "Warning: OUTPUT_REPS in the parameter file is not 0 or 1." <<endl;
						foutLog<< "Warning: OUTPUT_REPS in the parameter file is not 0 or 1." <<endl;
					}else{
						OUTPUT_REPS  = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("CHECK_FORMAT")==0){
			fin>>str;
			if(str[0]!='#'){
				if(str=="0" || str=="1" || str=="2" || str=="3" || str=="4" || str=="10" || str=="20" || str=="30" || str=="40"){
					if(CHECK_FORMAT == default_int){
						CHECK_FORMAT = atoi(str.c_str());
					}
				}else{
					if(CHECK_FORMAT != default_int){
						cerr<< "Warning: CHECK_FORMAT in the parameter file is not 0, 1, 2, 3, 4, 10, 20, 30, or 40." <<endl;
						foutLog<< "Warning: CHECK_FORMAT in the parameter file is not 0, 1, 2, 3, 4, 10, 20, 30, or 40." <<endl;
					}else{
						CHECK_FORMAT  = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("CHECK_COVERAGE")==0){
			fin>>str;
			if(str[0]!='#'){
				if(str=="0" || str=="1" || str=="2"){
					if(CHECK_COVERAGE == default_int){
						CHECK_COVERAGE = atoi(str.c_str());
					}
				}else{
					if(CHECK_COVERAGE != default_int){
						cerr<< "Warning: CHECK_COVERAGE in the parameter file is not 0, 1, or 2." <<endl;
						foutLog<< "Warning: CHECK_COVERAGE in the parameter file is not 0, 1, or 2." <<endl;
					}else{
						CHECK_COVERAGE  = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}else if(str.compare("PCA_MODE")==0){
			fin>>str;
			if(str[0]!='#'){
				if(str=="0" || str=="1"){
					if(PCA_MODE == default_int){
						PCA_MODE = atoi(str.c_str());
					}
				}else{
					if(PCA_MODE != default_int){
						cerr<< "Warning: PCA_MODE in the parameter file is not 0 or 1." <<endl;
						foutLog<< "Warning: PCA_MODE in the parameter file is not 0 or 1." <<endl;
					}else{
						PCA_MODE  = default_int-1;
					}
				}
			}else{
				getline(fin, str);
			}
		}
	}
	fin.close();
	return flag;
}
//################# Function to print parameters in execution  ##################
void print_configuration(){
	cout <<endl << "Parameter values used in execution:" <<endl;
	cout << "-------------------------------------------------" << endl;
	if(PCA_MODE == 0){
		cout << "GENO_FILE (-g)" << "\t" << GENO_FILE <<endl;
		cout << "SEQ_FILE (-s)" << "\t" << SEQ_FILE << endl;
		if(COORD_FILE.compare(default_str)!=0){
			cout << "COORD_FILE (-c)" << "\t" << COORD_FILE << endl;
		}
		cout << "OUT_PREFIX (-o)" << "\t" << OUT_PREFIX << endl;		
		cout << "DIM (-k)" << "\t" << DIM << endl;
		cout << "MIN_LOCI (-l)" << "\t" << MIN_LOCI << endl;
		cout << "SEQ_ERR (-e)" << "\t" << SEQ_ERR << endl;
		cout << "FIRST_IND (-x)" << "\t" << FIRST_IND << endl;
		cout << "LAST_IND (-y)" << "\t" << LAST_IND << endl;
		cout << "REPS (-r)" << "\t" << REPS << endl;	
		cout << "OUTPUT_REPS (-R)" << "\t" << OUTPUT_REPS << endl;				
		cout << "CHECK_FORMAT (-fmt)" << "\t" << CHECK_FORMAT << endl; 
		cout << "CHECK_COVERAGE (-cov)" << "\t" << CHECK_COVERAGE <<endl; 
		cout << "PCA_MODE (-pca)" << "\t" << PCA_MODE << endl; 
	}else{
		cout << "GENO_FILE (-g)" << "\t" << GENO_FILE <<endl;
		cout << "DIM (-k)" << "\t" << DIM << endl;
		cout << "OUT_PREFIX (-o)" << "\t" << OUT_PREFIX << endl;
		cout << "CHECK_FORMAT (-fmt)" << "\t" << CHECK_FORMAT << endl; 
		cout << "PCA_MODE (-pca)" << "\t" << PCA_MODE << endl; 
	}
	cout << "-------------------------------------------------" << endl; 

	foutLog <<endl << "Parameter values used in execution:" <<endl;
	foutLog << "-------------------------------------------------" << endl;
	if(PCA_MODE == 0){
		foutLog << "GENO_FILE (-g)" << "\t" << GENO_FILE <<endl;
		foutLog << "SEQ_FILE (-s)" << "\t" << SEQ_FILE << endl;
		if(COORD_FILE.compare(default_str)!=0){
			foutLog << "COORD_FILE (-c)" << "\t" << COORD_FILE << endl;
		}
		foutLog << "OUT_PREFIX (-o)" << "\t" << OUT_PREFIX << endl;
		foutLog << "DIM (-k)" << "\t" << DIM << endl;
		foutLog << "MIN_LOCI (-l)" << "\t" << MIN_LOCI << endl;
		foutLog << "SEQ_ERR (-e)" << "\t" << SEQ_ERR << endl;
		foutLog << "FIRST_IND (-x)" << "\t" << FIRST_IND << endl;
		foutLog << "LAST_IND (-y)" << "\t" << LAST_IND << endl;
		foutLog << "REPS (-r)" << "\t" << REPS << endl;	
		foutLog << "OUTPUT_REPS (-R)" << "\t" << OUTPUT_REPS << endl;				
		foutLog << "CHECK_FORMAT (-fmt)" << "\t" << CHECK_FORMAT << endl; 
		foutLog << "CHECK_COVERAGE (-cov)" << "\t" << CHECK_COVERAGE <<endl; 
		foutLog << "PCA_MODE (-pca)" << "\t" << PCA_MODE << endl; 
	}else{
		foutLog << "GENO_FILE (-g)" << "\t" << GENO_FILE <<endl;
		foutLog << "DIM (-k)" << "\t" << DIM << endl;
		foutLog << "OUT_PREFIX (-o)" << "\t" << OUT_PREFIX << endl;
		foutLog << "CHECK_FORMAT (-fmt)" << "\t" << CHECK_FORMAT << endl; 
		foutLog << "PCA_MODE (-pca)" << "\t" << PCA_MODE << endl; 
	}
	foutLog << "-------------------------------------------------" << endl; 
}
//################# Function to check parameter values  ##################
int check_parameters(){
	int flag = 1;
	if(GENO_FILE.compare(default_str)==0){
		cerr << "Error: GENO_FILE (-g) is not specified." << endl;
		foutLog << "Error: GENO_FILE (-g) is not specified." << endl;
		flag = 0;
	}
	if(SEQ_FILE.compare(default_str)==0 && PCA_MODE!=1){
		cerr << "Error: SEQ_FILE (-s) is not specified." << endl;
		foutLog << "Error: SEQ_FILE (-s) is not specified." << endl;
		flag = 0;
	}
	if(DIM==default_int){
		cerr << "Error: DIM (-k) is not specified." << endl;
		foutLog << "Error: DIM (-k) is not specified." << endl;
		flag = 0;
	}else if(DIM<1){ 
		cerr << "Error: invalid value for DIM (-k)." << endl;
		foutLog << "Error: invalid value for DIM (-k)." << endl;
		flag = 0;
	}else if(REF_INDS!=default_int && DIM>=REF_INDS){
		cerr << "Error: invalid value for DIM (-k)." << endl;
		cerr << "DIM must be smaller than the number of individuals in the GENO_FILE;" << endl;
		foutLog << "Error: invalid value for DIM (-k)." << endl;
		foutLog << "DIM must be smaller than the number of individuals in the GENO_FILE;" << endl;
		flag = 0;
	}else if(LOCI!=default_int && DIM>=LOCI){
		cerr << "Error: invalid value for DIM (-k)." << endl;
		cerr << "DIM must be smaller than the number of loci in the GENO_FILE;" << endl;
		foutLog << "Error: invalid value for DIM (-k)." << endl;
		foutLog << "DIM must be smaller than the number of loci in the GENO_FILE;" << endl;
		flag = 0;
	}else if(NUM_PCS!=default_int && DIM>NUM_PCS && PCA_MODE!=1){
		cerr << "Error: invalid value for DIM (-k)." << endl;
		cerr << "DIM cannot be greater than the number of PCs in the COORD_FILE;" << endl;
		foutLog << "Error: invalid value for DIM (-k)." << endl;
		foutLog << "DIM cannot be greater than the number of PCs in the COORD_FILE;" << endl;
		flag = 0;
	}
	if(MIN_LOCI < 1){
		cerr << "Error: invalid value for MIN_LOCI (-l)." << endl;
		foutLog << "Error: invalid value for MIN_LOCI (-l)." << endl;
		flag = 0;
	}else if(MIN_LOCI>LOCI  && LOCI!=default_int){
		cerr << "Error: invalid value for MIN_LOCI (-l)." << endl;
		cerr << "MIN_LOCI cannot be greater than the total number of loci." << endl;
		foutLog << "Error: invalid value for MIN_LOCI (-l)." << endl;
		foutLog << "MIN_LOCI cannot be greater than the total number of loci." << endl;
		flag = 0;
	}
	if(SEQ_ERR < 0 || SEQ_ERR > 1){
		cerr << "Error: invalid value for SEQ_ERR (-e)." << endl;
		foutLog << "Error: invalid value for SEQ_ERR (-e)." << endl;
		flag = 0;
	}
	if(FIRST_IND < 0){
		cerr << "Error: invalid value for FIRST_IND (-x)." << endl;
		foutLog << "Error: invalid value for FIRST_IND (-x)." << endl;
		flag = 0;
	}else if(FIRST_IND>SEQ_INDS && SEQ_INDS!=default_int){
		cerr << "Error: invalid value for FIRST_IND (-x)." << endl;
		cerr << "FIRST_IND cannot be greater than the number of individuals in the SEQ_FILE." << endl;
		foutLog << "Error: invalid value for FIRST_IND (-x)." << endl;
		foutLog << "FIRST_IND cannot be greater than the number of individuals in the SEQ_FILE." << endl;
		flag = 0;
	}
	if(LAST_IND<0 && LAST_IND!=default_int){
		cerr << "Error: invalid value for LAST_IND (-y)." << endl;
		foutLog << "Error: invalid value for LAST_IND (-y)." << endl;
		flag = 0;	
	}else if(LAST_IND<FIRST_IND && FIRST_IND!=default_int && LAST_IND!=default_int){
		cerr << "Error: invalid value for LAST_IND (-y)." << endl;
		cerr << "LAST_IND cannot be smaller than FIRST_IND." << endl;
		foutLog << "Error: invalid value for LAST_IND (-y)." << endl;
		foutLog << "LAST_IND cannot be smaller than FIRST_IND." << endl;
		flag = 0;
	}else if(LAST_IND>SEQ_INDS && SEQ_INDS!=default_int){
		cerr << "Error: invalid value for LAST_IND (-y)." << endl;
		cerr << "LAST_IND cannot be greater than the number of individuals in the SEQ_FILE." << endl;
		foutLog << "Error: invalid value for LAST_IND (-y)." << endl;
		foutLog << "LAST_IND cannot be greater than the number of individuals in the SEQ_FILE." << endl;
		flag = 0;
	}
	if(REPS < 1){
		cerr << "Error: invalid value for REPS (-r)." << endl;
		foutLog << "Error: invalid value for REPS (-r)." << endl;
		flag = 0;
	}
	if(OUTPUT_REPS!=0 && OUTPUT_REPS!=1){
		cerr << "Error: invalid value for OUTPUT_REPS (-R)." << endl;
		foutLog << "Error: invalid value for OUTPUT_REPS (-R)." << endl;
		flag = 0;
	}
	if(CHECK_COVERAGE!=0 && CHECK_COVERAGE!=1 && CHECK_COVERAGE!=2){
		cerr << "Error: invalid value for CHECK_COVERAGE (-cov)." << endl;
		foutLog << "Error: invalid value for CHECK_COVERAGE (-cov)." << endl;
		flag = 0;
	}
	if(CHECK_FORMAT!=0 && CHECK_FORMAT!=1 && CHECK_FORMAT!=2 && CHECK_FORMAT!=3 && CHECK_FORMAT!=4){
	 	if(CHECK_FORMAT!=10 && CHECK_FORMAT!=20 && CHECK_FORMAT!=30 && CHECK_FORMAT!=40){
			cerr << "Error: invalid value for CHECK_FORMAT (-fmt)." << endl;
			foutLog << "Error: invalid value for CHECK_FORMAT (-fmt)." << endl;
			flag = 0;
		}
	}
	if(PCA_MODE!=0 && PCA_MODE!=1){
		cerr << "Error: invalid value for PCA_MODE (-pca)." << endl;
		foutLog << "Error: invalid value for PCA_MODE (-pca)." << endl;
		flag = 0;
	}
	//============================================================================
	if(DIM_SEQ < 1 && DIM != default_int){
		cerr << "Error: invalid value for DIM_SEQ (-ks)." << endl;
		foutLog << "Error: invalid value for DIM_SEQ (-ks)." << endl;
		flag = 0;
	}
	//============================================================================
	return flag;
}
//################# Function to calculate input table file dimension  ##################
bool get_table_dim(int &nrow, int &ncol, string filename, char separator){
	ifstream fin;
	string str;
	nrow = 0;
	ncol = 0;
	fin.open(filename.c_str());
	if(fin.fail()){
		return false;
	}
	while(!fin.eof()){
		getline(fin, str);
		if(str.length()>0){
			nrow+=1;
			if(ncol==0){
				bool is_sep=true;    //Previous character is a separator
				for(int i=0; i<str.length(); i++){
					if(str[i]!=separator && i==0){        //Read in the first element
						ncol+=1;
						is_sep=false;
					}else if(str[i]!=separator && i>0 && is_sep){
						ncol+=1;
						is_sep=false;
					}else if(str[i]==separator){
						is_sep=true;
					}	
				}
			}
		}
	}
	return true;
}
