0

I'm making a program that calculates the equation of a linear regression based on some data from a .csv file and something's not fine, I humbly request your help and assistance C++ community. The data will be read from the Salary_Data.csv file, and the results will be displayed on the screen. The data is this:

YearsExperience Salary
1.1 39.343
1.5 37.731
2.2 39.891
3.0 60.150
3.2 64.445
3.9 63.218
4.0 56.957
4.5 61.111
5.1 66.029
5.9 81.363
6.8 91.738
7.9 101.302
8.7 109.431
9.5 116.969
10.3 122.391

And this is what I've managed to do:

#include <iostream>
#include <fstream>
#include <string>
#include <cmath>

using namespace std;


double columnSum(double array[], int size);
double sigma(double x[], double avgX, double y[], double avgY, double n);

int main() {

    
    string linia1;
    string tabel;
    char capDeTabel[100];
    ifstream data("Salary_Data.csv");
    data.getline(capDeTabel, 255);

    
    double x[100];
    double y[100];
    double avgX;
    double avgY;

    int i = 0;

    cout << "Whats being read: " << endl << endl;

    while (!data.eof())
    {
        data >> x[i]; data >> y[i];
        cout << x[i] << "  " << y[i] << endl << endl;
        i++;

    }
    data.close();

    int n;
    n = i;

    
    avgX = columnSum(x, n) / n;
    avgY = columnSum(y, n) / n;


    double w2 = sigma(x, avgX, y, avgY, n);
    double w1 = avgY - w2 * avgX;


    cout << "Equation: " << endl;
    cout << "y = " << w1 << "x + " << w2 << "\n";




}


double columnSum(double array[], int size)
{
    double total = 0.0;
    for (int i = 0; i < size; i++) {
        total = total + array[i];
    }
    return total;

}


double sigma(double x[], double avgX, double y[], double avgY, double n) {
    double A = 0.0;
    double B = 0.0;
    for (int i = 0; i < n; i++) {
        A = pow((x[i] - avgX), 2);
    }

    for (int i = 0; i < n; i++) {
        B = (x[i] - avgX) * (y[i] - avgY);
    }

    double C = 0;
    C = B / A;

    return C;


}

The program should output y = 9.536x + 24.800 but instead it outputs y = 25.4456x + 9.41217. Close to what it should be but it's inversed and still not correct. I dont know what to try anymore.

The equation is:

equsion

where Xmed and Ymed are averages of the columns.

Any ideas why I get different results or a fix for it?

Marek R
  • 23,155
  • 5
  • 37
  • 107
  • I bet problem is `while (!data.eof())` ant fact that input is terminated by white characters. – Marek R Dec 07 '20 at 13:40
  • You need to read [Why is iostream::eof inside a loop condition (i.e. `while (!stream.eof())`) considered wrong?](https://stackoverflow.com/questions/5605125/why-is-iostreameof-inside-a-loop-condition-i-e-while-stream-eof-cons). – molbdnilo Dec 07 '20 at 13:45
  • 1
    The `sigma` routine does lots of calculations, and throws the result away, only keeping the last-most calculation. That's probably incorrect. – Eljay Dec 07 '20 at 13:46
  • Voting to close as typo: you use `=` instead `+=` when calculating `sigma`: https://godbolt.org/z/5aKcqz – Marek R Dec 07 '20 at 13:46
  • You wrote a correct summation in `columnSum`, but not in `sigma`. – molbdnilo Dec 07 '20 at 13:49

0 Answers0