#include <ctime> #include <fstream> #include <iostream> #include <sstream> #include <vector> #include <stdio.h> #include <chrono> #include <mpi.h> #include <stdlib.h> using namespace std; using namespace std::chrono; // allocate in memory 2-dimensional array float **alloc_2d_float(int rows, int cols) { float *data = (float *)malloc(rows*cols*sizeof(float)); float **array= (float **)malloc(rows*sizeof(float*)); for (int i=0; i<rows; i++) array[i] = &(data[cols*i]); return array; } struct Point { double x, y; // coordinates int cluster; // no default cluster double minDist; // default infinite distance to nearest cluster Point() : x(0.0), y(0.0), cluster(-1), minDist(__DBL_MAX__) {} Point(double x, double y) : x(x), y(y), cluster(-1), minDist(__DBL_MAX__) {} // Computes the (square) euclidean distance between this point and another double distance(Point p) { return (p.x - x) * (p.x - x) + (p.y - y) * (p.y - y); } }; // reads data from 2-d array into vector of points vector<Point> read_data(float ** XY, int n) { vector<Point> points; double x, y; for (int i = 0; i < n; i++) { x = XY[i][0]; y = XY[i][1]; points.push_back(Point(x, y)); } return points; } // calculate Calinsky-Harabatzs index float calc_CH(int n, int k, float ** out) { float x_m = 0; // mean value of x float y_m = 0; // mean value of y float Wk = 0; // within group dispersion float T = 0; // data scatter float CH = 0; // Calinsky-Harabatsz index // calculate means and Wk for (int i = 0; i < n; i++) { x_m += out[i][0]; y_m += out[i][1]; Wk += ((out[i][0] - out[i][3])*(out[i][0] - out[i][3]) + (out[i][1] - out[i][4])*(out[i][1] - out[i][4])); } x_m = x_m / n; y_m = y_m / n; // calculate T for (int i = 0; i < n; i++) { T += ((out[i][0] - x_m)*(out[i][0] - x_m) + (out[i][1] - y_m)*(out[i][1] - y_m)); } // calculate CH CH = (T - Wk) * (k - 1) / (Wk * (n - k)); return CH; } void kMeansClustering(vector<Point>* points, float ** out, int epochs, int k, float ** centers) { int n = points->size(); // Randomly initialise centroids // The index of the centroid within the centroids vector // represents the cluster label. vector<Point> centroids; srand(time(0)); for (int i = 0; i < k; ++i) { centroids.push_back(points->at(rand() % n)); } for (int i = 0; i < epochs; ++i) { // For each centroid, compute distance from centroid to each point // and update point's cluster if necessary for (vector<Point>::iterator c = begin(centroids); c != end(centroids); ++c) { int clusterId = c - begin(centroids); for (vector<Point>::iterator it = points->begin(); it != points->end(); ++it) { Point p = *it; double dist = c->distance(p); if (dist < p.minDist) { p.minDist = dist; p.cluster = clusterId; } *it = p; } } // Create vectors to keep track of data needed to compute means vector<int> nPoints; vector<double> sumX, sumY; for (int j = 0; j < k; ++j) { nPoints.push_back(0); sumX.push_back(0.0); sumY.push_back(0.0); } // Iterate over points to append data to centroids for (vector<Point>::iterator it = points->begin(); it != points->end(); ++it) { int clusterId = it->cluster; nPoints[clusterId] += 1; sumX[clusterId] += it->x; sumY[clusterId] += it->y; it->minDist = __DBL_MAX__; // reset distance } // Compute the new centroids for (vector<Point>::iterator c = begin(centroids); c != end(centroids); ++c) { int clusterId = c - begin(centroids); c->x = sumX[clusterId] / nPoints[clusterId]; c->y = sumY[clusterId] / nPoints[clusterId]; } } // Write to csv computed points ofstream myfile1; myfile1.open("./output.csv"); myfile1 << "x,y,c" << endl; for (vector<Point>::iterator it = points->begin(); it != points->end(); ++it) { myfile1 << it->x << "," << it->y << "," << it->cluster << endl; } myfile1.close(); // Write to csv final centroids ofstream myfile2; myfile2.open("./centroids.csv"); myfile2 << "x,y,c" << endl; int count = 0; for (vector<Point>::iterator c = begin(centroids); c != end(centroids); ++c) { int clusterId = c - begin(centroids); // fill array with centroids centers[count][0] = c->x; centers[count][1] = c->y; centers[count][2] = clusterId; myfile2 << centers[count][0] << "," << centers[count][1] << "," << centers[count][2] << "\n"; ++count; } myfile2.close(); count = 0; // Save points, centroid index and coordinates in one array // fill the array for (vector<Point>::iterator it = points->begin(); it != points->end(); ++it) { for (vector<Point>::iterator c = begin(centroids); c != end(centroids); ++c) { int clusterId = c - begin(centroids); if (clusterId == it->cluster) { out[count][0] = it->x; out[count][1] = it->y; out[count][2] = it->cluster; out[count][3] = c->x; out[count][4] = c->y; } } count++; } } // Calculate number of lines in .csv file ( number of elements for axis in 2-d array) int calc_n(std::string file) { int n = 0; std::string line; std::ifstream myfile(file); while (std::getline(myfile, line)) { ++n; } return n; } // Fill the 2-d floats array of points, passed by pointer from the .csv file void fill_XY(std::string file, float **XY) { std::string line; std::ifstream myfile(file); int i = 0; while (std::getline(myfile, line)) { std::stringstream lineStream(line); std::string bit; float x, y; std::getline(lineStream, bit, ','); x = std::stof(bit); std::getline(lineStream, bit, '\n'); y = std::stof(bit); XY[i][0] = x; XY[i][1] = y; ++i; } } int main() { double time1, time2, duration, global; // Get starting timepoint time1 = MPI_Wtime(); MPI_Init(NULL, NULL); int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); int world_rank; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); std::string file = "./brooklyn_sales_map.csv"; // input file int n = calc_n(file); // number of points in file int k = 5; // number of clusters float **XY; // input data array float **centers; // output array of centroids if (world_rank == 0) { // allocate data array XY = alloc_2d_float(n,2); // fill the array from file fill_XY(file, XY); // Send the data to the first process MPI_Send(&(XY[0][0]), 2*n, MPI_FLOAT, 1, 0, MPI_COMM_WORLD); // allocate an array of centroids centers = alloc_2d_float(k,3); // receive centroids coordinates MPI_Recv(&(centers[0][0]), k*3, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // output centroids cout << "\n Centroids: \n\n"; for (int i = 0; i < k; i++) { cout << centers[i][0] << " " << centers[i][1] << " " << centers[i][2] << "\n"; } // receive the CH-value float ch = 0; MPI_Recv(&ch, 1, MPI_FLOAT, 1, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // output CH-value cout << "\n" << "CH-value: " << ch << "\n\n"; } if (world_rank > 0) { // allocate data array XY = alloc_2d_float(n,2); // Receive at most MAX_NUMBERS from process zero MPI_Recv(&(XY[0][0]), 2*n, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // fill the points vector vector<Point> points = read_data(XY, n); // create an output array of kMeans float **out; out = alloc_2d_float(n,k); // allocate an output array of centroids centers = alloc_2d_float(k,3); // Run k-means with 100 iterations and for 5 clusters kMeansClustering(&points, out, 100, k, centers); // Calculate clustering efficiency by CH-index float ch = calc_CH(n, k, out); // Send the centroids array to process zero MPI_Send(&(centers[0][0]), k*3, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); // Send the CH-value to the process zero MPI_Send(&ch, 1, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); } // Get ending timepoint time2 = MPI_Wtime(); // Calculate duration duration = time2 - time1; // Calculate global runtime MPI_Reduce(&duration,&global,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD); if(world_rank == 0) { printf("Global runtime is %f\n",global); } MPI_Finalize(); return 0; }
Write, Run & Share C++ code online using OneCompiler's C++ online compiler for free. It's one of the robust, feature-rich online compilers for C++ language, running on the latest version 17. Getting started with the OneCompiler's C++ compiler is simple and pretty fast. The editor shows sample boilerplate code when you choose language as C++
and start coding!
OneCompiler's C++ online compiler supports stdin and users can give inputs to programs using the STDIN textbox under the I/O tab. Following is a sample program which takes name as input and print your name with hello.
#include <iostream>
#include <string>
using namespace std;
int main()
{
string name;
cout << "Enter name:";
getline (cin, name);
cout << "Hello " << name;
return 0;
}
C++ is a widely used middle-level programming language.
When ever you want to perform a set of operations based on a condition If-Else is used.
if(conditional-expression) {
//code
}
else {
//code
}
You can also use if-else for nested Ifs and If-Else-If ladder when multiple conditions are to be performed on a single variable.
Switch is an alternative to If-Else-If ladder.
switch(conditional-expression){
case value1:
// code
break; // optional
case value2:
// code
break; // optional
......
default:
code to be executed when all the above cases are not matched;
}
For loop is used to iterate a set of statements based on a condition.
for(Initialization; Condition; Increment/decrement){
//code
}
While is also used to iterate a set of statements based on a condition. Usually while is preferred when number of iterations are not known in advance.
while (condition) {
// code
}
Do-while is also used to iterate a set of statements based on a condition. It is mostly used when you need to execute the statements atleast once.
do {
// code
} while (condition);
Function is a sub-routine which contains set of statements. Usually functions are written when multiple calls are required to same set of statements which increases re-usuability and modularity. Function gets run only when it is called.
return_type function_name(parameters);
function_name (parameters)
return_type function_name(parameters) {
// code
}