I'm making a program that calculates the equation of a linear regression based on some data from a .csv file and something's not fine, I humbly request your help and assistance C++ community. The data will be read from the Salary_Data.csv file, and the results will be displayed on the screen. The data is this:
YearsExperience Salary
1.1 39.343
1.5 37.731
2.2 39.891
3.0 60.150
3.2 64.445
3.9 63.218
4.0 56.957
4.5 61.111
5.1 66.029
5.9 81.363
6.8 91.738
7.9 101.302
8.7 109.431
9.5 116.969
10.3 122.391
And this is what I've managed to do:
#include <iostream>
#include <fstream>
#include <string>
#include <cmath>
using namespace std;
double columnSum(double array[], int size);
double sigma(double x[], double avgX, double y[], double avgY, double n);
int main() {
string linia1;
string tabel;
char capDeTabel[100];
ifstream data("Salary_Data.csv");
data.getline(capDeTabel, 255);
double x[100];
double y[100];
double avgX;
double avgY;
int i = 0;
cout << "Whats being read: " << endl << endl;
while (!data.eof())
{
data >> x[i]; data >> y[i];
cout << x[i] << " " << y[i] << endl << endl;
i++;
}
data.close();
int n;
n = i;
avgX = columnSum(x, n) / n;
avgY = columnSum(y, n) / n;
double w2 = sigma(x, avgX, y, avgY, n);
double w1 = avgY - w2 * avgX;
cout << "Equation: " << endl;
cout << "y = " << w1 << "x + " << w2 << "\n";
}
double columnSum(double array[], int size)
{
double total = 0.0;
for (int i = 0; i < size; i++) {
total = total + array[i];
}
return total;
}
double sigma(double x[], double avgX, double y[], double avgY, double n) {
double A = 0.0;
double B = 0.0;
for (int i = 0; i < n; i++) {
A = pow((x[i] - avgX), 2);
}
for (int i = 0; i < n; i++) {
B = (x[i] - avgX) * (y[i] - avgY);
}
double C = 0;
C = B / A;
return C;
}
The program should output y = 9.536x + 24.800 but instead it outputs y = 25.4456x + 9.41217. Close to what it should be but it's inversed and still not correct. I dont know what to try anymore.
The equation is:
where Xmed and Ymed are averages of the columns.
Any ideas why I get different results or a fix for it?