I had the following task:
There are points obtained from the experiment, they are described by some theoretical smooth function f (x) , which is described by several parameters f (x, p1, p2, ..., pn)
The task is to choose the parameters p , so that the theoretical function most accurately describes the experimental data (using the least squares method) —in other words, that the sum of the square of the difference between the theoretical and experimental values is minimal
sum((f(x) - data(x))^2) -> min Decision
Since the theoretical function is very complex (the sum of two log-normal distributions, 5 (6) parameters)
const long double f1 = a1 * exp(-(log(x) - mu1) * (log(x) - mu1) / (2.0 * sigma1 * sigma1)) / (x * sigma1 * 2.506628274631000502415765284811); const long double f2 = a2 * exp(-(log(x) - mu2) * (log(x) - mu2) / (2.0 * sigma2 * sigma2)) / (x * sigma2 * 2.506628274631000502415765284811); const long double f = f1 + f2 Then to the additional search of the parameters from 'min' to 'max' I added optimization, allowing to significantly speed up the code.
For example, since the function is smooth, then by changing the parameter it is possible to achieve that the theoretical function will first get closer and closer to the experimental one, and then it will begin to move away and at this moment in principle you can stop increasing the parameter and move on to the next.
Problem
Everything would be great, but deciding to see the behavior described above saw the following:
those. first a gradual decrease (approximation to the theoretical), and then some garbage.
0,13186800 0,000119252701172278 0,13189500 0,000119239008348983 0,13192200 0,000119242281293030 0,13194900 0,000119245605642282 0,13197600 0,000119248981396740 0,13200300 0,000119252408556403 0,13203000 0,000119255887121271 0,13205700 0,000119241658719692 0,13208400 0,000119245240094971 0,13211100 0,000119248872875455 0,13213800 0,000119252557061144 0,13216500 0,000119256292652038 0,13219200 0,000119241551855830 0,13221900 0,000119245390257135 Tell me what it can be connected with and how to deal with it? It seems to me that the accuracy of calculations is no longer enough simply, although 'long double'
PS
But what the approximation graphs look like - it is clear that everything fits pretty well, but something is missing, maybe just because of the problem described and it’s not possible to superimpose the graphs more accurately
Pps
Well, or the assumption of smooth behavior when changing one parameter is wrong: (but it seems not
PPPS
Who wants to see the code:
Source + file containing data
https://yadi.sk/i/wtYtXss83YrUJc source (cpp)
https://yadi.sk/d/dvEnweGL3YrUFR data
// ols.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include <cmath> #include <fstream> #include <iomanip> #include <iostream> #include <string> #include <vector> #include <conio.h> std::vector<std::string> splitString(_In_ const std::string& strData, _In_ const std::string& strSubString) { std::vector<std::string> lResult; size_t szOffset = std::string::npos; std::string strCurrentSlice = strData; while ((szOffset = strCurrentSlice.find(strSubString)) != std::string::npos) { lResult.push_back(strCurrentSlice.substr(0, szOffset)); strCurrentSlice = strCurrentSlice.substr(szOffset + strSubString.length()); } lResult.push_back(strCurrentSlice); return lResult; } int main() { struct point_t { long double x; long double x_ln; long double y; }; // считать файл std::ifstream data(L"d:\\science\\data.dat"); std::string line; point_t* pointsData = new point_t[10000]; int pointsAmount = 0; while (std::getline(data, line)) { // распарсить строку std::vector<std::string> parts = splitString(line, ";"); pointsData[pointsAmount].x = (long double)stod(parts[0]); pointsData[pointsAmount].x_ln = (pointsData[pointsAmount].x == 0.0) ? 0 : log(pointsData[pointsAmount].x); pointsData[pointsAmount].y = (long double)stod(parts[1]); pointsAmount++; } data.close(); // подобрать коэффициенты struct CParameter { typedef std::pair<long double, long double> start_point_pr; long double min; long double max; long double delta; int steps_amount; CParameter(const long double in_min, const long double in_max, const int in_steps_amount) { min = in_min; max = in_max; steps_amount = in_steps_amount; delta = (max - min) / steps_amount; } CParameter(const start_point_pr& in_ave, const int in_steps_amount) { min = in_ave.first * (1.0 - in_ave.second); max = in_ave.first * (1.0 + in_ave.second); steps_amount = in_steps_amount; delta = (max - min) / steps_amount; } CParameter(const long double in_ave, const int in_steps_amount) : CParameter(start_point_pr(in_ave, 0.3), in_steps_amount) {} void compact(const long double current) { min = current - delta * 2.5; max = current + delta * 2.5; delta = (max - min) / steps_amount; } }; int approximationMax = 5; const int xIndexMin = 1; const int xIndexMax = 200;// pointsAmount; CParameter _sigma1(0.61991875, 50); CParameter _mu1(3.72709594, 50); CParameter _a1(0.86259259, 15); CParameter _sigma2(0.09229050, 50); CParameter _mu2(4.54333653, 50); CParameter _a2(0.13740741, 15); long double olsSigma1 = 0.0; long double olsMu1 = 0.0; long double olsA1 = 0.0; long double olsSigma2 = 0.0; long double olsMu2 = 0.0; long double olsA2 = 0.0; __int64 maxCounter = (__int64)_sigma1.steps_amount * (__int64)_mu1.steps_amount * (__int64)_a1.steps_amount * (__int64)_sigma2.steps_amount * (__int64)_mu2.steps_amount /** (__int64)_a2.steps_amount*/ * (__int64)approximationMax; __int64 localCounter = 0; long double olsMinCoeff = 1e20; for (int approximationIndex = 0; approximationIndex < approximationMax; approximationIndex++) { // перебрать параметры for (long double sigma1 = _sigma1.min; sigma1 <= _sigma1.max; sigma1 += _sigma1.delta) { const long double s1_1 = -1.0 / (2.0 * sigma1 * sigma1); const long double s1_2 = sigma1 * 2.506628274631000502415765284811; for (long double mu1 = _mu1.min; mu1 <= _mu1.max; mu1 += _mu1.delta) { for (long double a1 = _a1.min; a1 <= _a1.max; a1 += _a1.delta) { const long double a2 = 1.0 - a1; for (long double sigma2 = _sigma2.min; sigma2 <= _sigma2.max; sigma2 += _sigma2.delta) { const long double s2_1 = -1.0 / (2.0 * sigma2 * sigma2); const long double s2_2 = sigma2 * 2.506628274631000502415765284811; for (long double mu2 = _mu2.min; mu2 <= _mu2.max; mu2 += _mu2.delta) { // for (long double a2 = _a2.min; a2 <= _a2.max; a2 += _a2.delta) // { // отобразить информацию localCounter++; if ((localCounter % 100000) == 0) { const long double process = 100.0 * (long double)localCounter / (long double)maxCounter; std::cout << std::fixed << std::setprecision(3) << process << "% (" << approximationIndex << ") | ols: " << std::setprecision(8) << (olsMinCoeff / 1000000.0) << " | sigma1: " << olsSigma1 << " mu1: " << olsMu1 << " a1: " << olsA1 << " | sigma2: " << olsSigma2 << " mu2: " << olsMu2 << " a2: " << olsA2 << " \r"; } // вычислить МНК коэффициент long double olsCoeff = 0.0; for (int xIndex = xIndexMin; xIndex < xIndexMax; xIndex++) { const long double x = pointsData[xIndex].x; const long double x_ln = pointsData[xIndex].x_ln; const long double y = pointsData[xIndex].y * 1000.0; // const long double formula1 = a1 * exp(-(x_ln - mu1) * (x_ln - mu1) / (2.0 * sigma1 * sigma1)) / (x * sigma1 * 2.506628274631000502415765284811); // const long double formula2 = a2 * exp(-(x_ln - mu2) * (x_ln - mu2) / (2.0 * sigma2 * sigma2)) / (x * sigma2 * 2.506628274631000502415765284811); const long double formula1 = a1 * exp((x_ln - mu1) * (x_ln - mu1) * s1_1) / (x * s1_2); const long double formula2 = a2 * exp((x_ln - mu2) * (x_ln - mu2) * s2_1) / (x * s2_2); const long double formula = (formula1 + formula2) * 1000.0; olsCoeff += (formula - y) * (formula - y); // если вычисленный МНК коэффициент превысил максимальный МНК коэффициент, прекратить дальнейший анализ точек для заданной совокупности параметров if (olsCoeff > olsMinCoeff) break; } // рассчитать коэффициент if (olsCoeff < olsMinCoeff) { olsMinCoeff = olsCoeff; olsA1 = a1; olsMu1 = mu1; olsSigma1 = sigma1; olsA2 = a2; olsMu2 = mu2; olsSigma2 = sigma2; } // } } } } } } // скорректировать (сузить) диапазон, в котором могут меняться коэффициенты _sigma1.compact(olsSigma1); _mu1.compact(olsMu1); _a1.compact(olsA1); _sigma2.compact(olsSigma2); _mu2.compact(olsMu2); _a2.compact(olsA2); } std::cout << std::endl << std::endl; std::cout << std::setprecision(8) << "sigma1: " << olsSigma1 << std::endl; std::cout << std::setprecision(8) << "mu1: " << olsMu1 << std::endl; std::cout << std::setprecision(8) << "a1: " << olsA1 << std::endl; std::cout << std::setprecision(8) << "sigma2: " << olsSigma2 << std::endl; std::cout << std::setprecision(8) << "mu2: " << olsMu2 << std::endl; std::cout << std::setprecision(8) << "a2: " << olsA2 << std::endl; std::cout << std::setprecision(16) << std::endl << "ols: " << olsMinCoeff << std::endl; std::cout << "Press any key to continue" << std::endl; _getch(); return 0; } 



foreach other). I did not check all the variables, I checked it on the deepest. It may well be that the computational error, although I thought thatlong doubleshould cope, all the same 10 bytes per number. - Zhihartotal += f(x)) is performed for all points (mine is 200), and sincef(x)gives values of the order of e-9, and (yf (x)) ^ 2 order y-10, then the error and accumulates. Do you think you should first multiply say by a million-billion? - Zhihar