#include <iostream>
#include <fstream>
#include <string>
#include <stdlib.h>
#include <vector>
#include <algorithm>
#include <string.h>
#include <math.h>
using namespace std;

int fileinput(vector<vector<double>>& distanceMat, string filename, vector<string>& labels) {
	string line;
	ifstream input(filename);
	int size = 0;

	if (!input.good()) {
		cerr << "Error opening '" << filename << "'. Bailing out." << endl;
		return 0;
	}

	int linecount = 0;
	while (getline(input, line).good()){

		if (linecount == 0) {
			size = stoi(line);
			linecount++;
		}
		else if (linecount == 1) {
			labels.resize(size);
			
			int j = 0;
			int cSize = size * 3;
			char* cRow = new char[cSize];
			strcpy_s(cRow, cSize, line.c_str());

			char* nextTok = NULL;
			char* p = strtok_s(cRow, "\t", &nextTok);
			while (p != NULL) {
				labels[j] = p;
				p = strtok_s(NULL, "\t", &nextTok);
				j++;
			}
			
			linecount++;
		}else{
			distanceMat.resize(size);
			int j = 0;
			distanceMat[linecount-2].resize(size);
			int cSize = size * 3;
			char* cRow = new char[cSize];
			strcpy_s(cRow, cSize, line.c_str());

			char* nextTok = NULL;
			char* p = strtok_s(cRow, "\t", &nextTok);
			while (p != NULL) {
				distanceMat[linecount-2][j] = stoi(p);
				//cout << "i: " << i << " j: " << j << " p: " << p << endl;
				p = strtok_s(NULL, "\t", &nextTok);
				j++;
			}
			linecount++;
		}
	}
	return size;
}

void upgma(vector<vector<double>>& distanceMat, int size, vector<string>& labels) {
	string newick;
	int numClusters = size;
	while (numClusters > 2) {
		//find smallest value
		int rowIndex = 0;
		int colIndex = 0;
		double smallestVal = 999999999;
		for (int i = 1; i < numClusters; i++) {
			for (int j = 0; j < numClusters; j++) {
				if (distanceMat[i][j] < smallestVal && distanceMat[i][j] != 0) {
					smallestVal = distanceMat[i][j];
					rowIndex = i;
					colIndex = j;
					cout << "merging: " << labels[min(rowIndex, colIndex)] << " and " << labels[max(colIndex, rowIndex)] << " with branch distances: " << smallestVal/2.0 << endl;
				}
			}
		}

		vector<vector<double>> icMat(distanceMat);
		for (int i = 0; i < numClusters; i++) {
			for (int j = 0; j < numClusters; j++) {
				if ((i == colIndex || i == rowIndex) && (j != rowIndex || j != colIndex)) {
					icMat[i][j] = (distanceMat[colIndex][j] + distanceMat[rowIndex][j] ) / 2.0;
				}
				else if ((j == colIndex || j == rowIndex) && (i != rowIndex || i != colIndex)) {
					icMat[i][j] = (distanceMat[i][colIndex] + distanceMat[i][rowIndex] ) / 2.0;
				}
			}
		}
		distanceMat = icMat;
		//delete column
		for (int i = 0; i < numClusters; i++) {
			if (distanceMat[i].size() > max(colIndex, rowIndex)) {
				distanceMat[i].erase(distanceMat[i].begin() + max(colIndex, rowIndex));
			}
		}
		//delete row
		if (distanceMat.size() > max(colIndex, rowIndex)) {
			distanceMat.erase(distanceMat.begin() + max(colIndex, rowIndex));
		}
		//merge labels
		for (int i = 0; i < numClusters; i++) {
			if (i == min(colIndex, rowIndex)) {
				//labels[i] += labels[max(colIndex, rowIndex)];
				labels[i].insert(labels[i].begin(), '(');
				labels[i] += ", " + labels[max(colIndex, rowIndex)] + ")";
				newick = labels[i];
			}
			else if (i == max(colIndex, rowIndex)) {
				labels.erase(labels.begin() + max(colIndex, rowIndex));
			}
		}
		numClusters--;
	}
	cout << "newick format: " << newick << endl;
}

void neiJoin(vector<vector<double>>& distanceMat, int size, vector<string>& labels) {
	string newick;
	int numClusters = size;
	while (numClusters > 2) {
		vector<double> r;
		r.resize(numClusters);
		cout << "r values from left to right: " << endl;
		for (int i = 0; i < numClusters; i++) {
			double rsum = 0;
			for (int j = 0; j < numClusters; j++) {
				rsum += distanceMat[j][i];
			}
			r[i] = rsum / (numClusters - 2);
			cout << r[i] << "\t";
		}
		cout << endl;

		vector<vector<double>> transitionM(distanceMat);
		int rowIndex = 0;
		int colIndex = 0;
		double smallestVal = 999999999;
		cout << "tranistion matrix: " << endl;
		for (int i = 0; i < numClusters; i++) {
			for (int j = 0; j < numClusters; j++) {
				transitionM[i][j] = distanceMat[i][j] - r[i] - r[j];
				cout << transitionM[i][j] << "\t";
				if (transitionM[i][j] < smallestVal && transitionM[i][j] != 0) {
					smallestVal = transitionM[i][j];
					rowIndex = i;
					colIndex = j;
				}
			}
			cout << endl;
		}
		cout << endl;
		
		//update distance matrix
		vector<vector<double>> temp(distanceMat);
		for (int i = 0; i < numClusters; i++) {
			for (int j = 0; j < numClusters; j++) {
				if ((i == colIndex || i == rowIndex) && (j != rowIndex || j != colIndex)) {
					temp[i][j] = (distanceMat[colIndex][j] + distanceMat[rowIndex][j] - distanceMat[rowIndex][colIndex] )/2.0;
				}
				else if ((j == colIndex || j == rowIndex) && (i != rowIndex || i != colIndex)) {
					temp[i][j] = (distanceMat[i][colIndex] + distanceMat[i][rowIndex] - distanceMat[rowIndex][colIndex] )/2.0;
				}
			}
		}
		distanceMat = temp;
		//delete column
		for (int i = 0; i < numClusters; i++) {
			if (distanceMat[i].size() > max(colIndex,rowIndex)) {
				distanceMat[i].erase(distanceMat[i].begin() + max(colIndex, rowIndex));
			}
		}
		//delete row
		if (distanceMat.size() > max(colIndex, rowIndex)) {
			distanceMat.erase(distanceMat.begin() + max(colIndex, rowIndex));
		}
		//merge labels
		for (int i = 0; i < numClusters; i++) {
			if (i == min(colIndex, rowIndex)) {
				//labels[i] += labels[max(colIndex, rowIndex)];
				labels[i].insert(labels[i].begin(), '(');
				labels[i] += ", " + labels[max(colIndex, rowIndex)] + ")";
				newick = labels[i];
			}
			else if (i == max(colIndex,rowIndex)) {
				labels.erase(labels.begin() + max(colIndex, rowIndex));
			}
		}
		numClusters--;
		//print updated distance matrix
		cout << "updated distance matrix: " << endl;
		for (int i = 0; i < numClusters; i++) {
			cout << labels[i] << "\t";
		}
		cout << endl;
		for (int i = 0; i < numClusters; i++) {
			for (int j = 0; j < numClusters; j++) {
				cout << distanceMat[i][j] << "\t";
			}
			cout << endl;
		}
		cout << endl;
	}
	cout << "newick format: " << newick << endl;
}

int main() {
	string filename;

	cout << "Type the filename of the distance matrix: " << endl;
	cin >> filename;
	vector<vector<double>> distanceMat;
	vector<string> labels;
	int size = fileinput(distanceMat, filename, labels);
	vector<vector<double>> distMat(distanceMat);
	vector<string> label(labels);

	cout << "running UPGMA." << endl;
	upgma(distMat, size, label);
	distMat = distanceMat;
	label = labels;
	cout << "running Nieghbor-Join." << endl;
	neiJoin(distMat, size, label);

	return 0;
} 

C++ Online Compiler

Write, Run & Share C++ code online using OneCompiler's C++ online compiler for free. It's one of the robust, feature-rich online compilers for C++ language, running on the latest version 17. Getting started with the OneCompiler's C++ compiler is simple and pretty fast. The editor shows sample boilerplate code when you choose language as C++ and start coding!

Read inputs from stdin

OneCompiler's C++ online compiler supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample program which takes name as input and print your name with hello.

#include <iostream>
#include <string>
using namespace std;

int main() 
{
    string name;
    cout << "Enter name:";
    getline (cin, name);
    cout << "Hello " << name;
    return 0;
}

About C++

C++ is a widely used middle-level programming language.

  • Supports different platforms like Windows, various Linux flavours, MacOS etc
  • C++ supports OOPS concepts like Inheritance, Polymorphism, Encapsulation and Abstraction.
  • Case-sensitive
  • C++ is a compiler based language
  • C++ supports structured programming language
  • C++ provides alot of inbuilt functions and also supports dynamic memory allocation.
  • Like C, C++ also allows you to play with memory using Pointers.

Syntax help

Loops

1. If-Else:

When ever you want to perform a set of operations based on a condition If-Else is used.

if(conditional-expression) {
   //code
}
else {
   //code
}

You can also use if-else for nested Ifs and If-Else-If ladder when multiple conditions are to be performed on a single variable.

2. Switch:

Switch is an alternative to If-Else-If ladder.

switch(conditional-expression){    
case value1:    
 // code    
 break;  // optional  
case value2:    
 // code    
 break;  // optional  
......    
    
default:     
 code to be executed when all the above cases are not matched;    
} 

3. For:

For loop is used to iterate a set of statements based on a condition.

for(Initialization; Condition; Increment/decrement){  
  //code  
} 

4. While:

While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.

while (condition) {  
// code 
}  

5. Do-While:

Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.

do {  
 // code 
} while (condition); 

Functions

Function is a sub-routine which contains set of statements. Usually functions are written when multiple calls are required to same set of statements which increases re-usuability and modularity. Function gets run only when it is called.

How to declare a Function:

return_type function_name(parameters);

How to call a Function:

function_name (parameters)

How to define a Function:

return_type function_name(parameters) {  
 // code
}