in the implementation of this task, faced with the problem of slow computing
First I decided to speed up log10 - it did not help, although it calculates faster than log10 from the standard library - (see the comparison in the questionnaire), but using the degree table 10 was not very good ideas, although bin search should have affected ...
In general, I can not understand what else and most importantly how to optimize in addition to calculating the decimal logarithm
#include <iostream> #include <vector> #include <math.h> #include <stdint.h> const long double _10_POWERS[40] = { 1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39 }; static inline uint32_t log10_fast(long double x) { //uint32_t res = 0; int l = 0, r = 40 - 1; while (l <= r) { int mid = l + ((r -l) >> 1); if ( x >= _10_POWERS[mid] && x < _10_POWERS[mid + 1] ) { return mid; } if (x >= _10_POWERS[mid]) l = mid; else r = mid; } return 0; }; uint32_t compute(int n, std::vector<std::vector<uint32_t>>& a) { long double x = 0.0f; uint32_t s = 0; const long double _2_96 = pow(2, 96); const long double _2_64 = pow(2, 64); const long double _2_32 = pow(2, 32); for (int i = 0; i < n; ++i) { for (int j = i + 1; j < n; ++j) { x = _2_96 * (a[i][0] ^ a[j][0]); x += _2_64 * (a[i][1] ^ a[j][1]); x += _2_32 * (a[i][2] ^ a[j][2]); x += (a[i][3] ^ a[j][3]); s += log10_fast(x); } } return 2 * s; } int main(int argc, char const *argv[]) { int n; std::cin >> n; std::vector<std::vector<uint32_t>> a(n, std::vector<uint32_t>(4)); for (int i = 0; i < n; ++i) { for (int j = 0; j < 4; ++j) { std::cin >> a[i][j]; } } std::cout << compute(n, a) << '\n'; return 0; } What are some ideas about optimizations here?
Maybe there is another faster way to calculate log10 ? Or maybe it's not logarithm?
ps
comparison log10 and log10_fast
uint32_t s = 0; high_resolution_clock::time_point t1 = high_resolution_clock::now(); for (unsigned i = 0; i < 1e+8; ++i) { s += log10( static_cast<long double>(rand()) ); } high_resolution_clock::time_point t2 = high_resolution_clock::now(); duration<double> dur = duration_cast<milliseconds>( t2 - t1 ); std::cout << dur.count() << '\n'; // 6.374 sec s = 0; t1 = high_resolution_clock::now(); for (unsigned i = 0; i < 1e+8; ++i) { s += log10_fast( static_cast<long double>(rand()) ); } t2 = high_resolution_clock::now(); dur = duration_cast<milliseconds>( t2 - t1 ); std::cout << dur.count() << '\n'; // 5.907 sec 
xin any case, accuracy, I think that is not critical here since the logarithm should give the same number for a whole set of numbers, for example, lying in the range10^5 до 99^5- the number 5, and as for speed operations with integers over floating-point numbers, then yes, it is understandable faster, but that is another question - ampawd