paralleling code with generating permutations in C, openmp

Question

Good day. It is necessary to speed up the program for large values of input data. Actually the task itself: it is necessary for each generated permutation to read certain values that are written to an array (we denote it by uniq_arr ). Next, it will be checked whether there is a node in the linear list ( Node_t ) containing the given array. If so, then ignore the given uniq_arr and, therefore, the permutation itself; otherwise, we create a new node with this value and write it to the very beginning of the list (FIFO). Actually, here is the main code.

  #define SIZE_RING 10 const int ring[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; typedef struct SpectrNode { int spectVector[SIZE_RING]; struct SpectrNode *next; }SpectrNode_t; void push (SpectrNode_t **head, int spectr[SIZE_RING]) { SpectrNode_t *newNode; int i = 0; newNode = (SpectrNode_t*)malloc(sizeof(SpectrNode_t)); memset(newNode->spectVector, 0, SIZE_RING * sizeof(int)); for(i = 0; i < SIZE_RING; i++) (newNode->spectVector)[i] = spectr[i]; newNode->next = *head; *head = newNode; } void find (SpectrNode_t **head, int newSpectr[SIZE_RING]) { SpectrNode_t *cur = *head; int res = 0; while (cur != NULL) { if (CompareIntArrays(cur->spectVector, newSpectr) == 0) res = 1; // такой элемент уже есть в списке cur = cur->next; } cur = *head; if (res == 0) { // элемент уникален push(head, newSpectr); } } void countSpectVector(SpectrNode_t **head, int perm[SIZE_RING]) { int intSpectr[SIZE_RING]; int i = 0, k = 0; int diffTmp = 0, betta = 0; memset(intSpectr, 0, SIZE_RING * sizeof(int)); for (i = 0; i < SIZE_RING; i++){ betta = ring[i]; diffTmp = perm[( 1 + betta ) % SIZE_RING] - perm[betta]; if (diffTmp < 0) diffTmp += SIZE_RING; intSpectr[diffTmp]++; } find(head, intSpectr); } void generatePerms(int arr[SIZE_RING], SpectrNode_t **head) { int c[SIZE_RING]; int i; i = 0; memset(c, 0, SIZE_RING * sizeof(int)); while (i < SIZE_RING) { if (c[i] < i) { if (i % 2 == 0) swap(&arr[0], &arr[i]); else{ swap(&arr[c[i]], &arr[i]); } countSpectVector(head, arr); c[i] += 1; i = 0; } else { c[i] = 0; i += 1; } } } int main() { int *perm; int i = 0; SpectrNode_t *head; perm = (int*)malloc(SIZE_RING * sizeof(int)); for (i = 0; i < SIZE_RING; i++) perm[i] = i; head = (SpectrNode_t*)malloc(sizeof(SpectrNode_t)); if (head == NULL) return 1; memset(head->spectVector, 0, SIZE_RING * sizeof(int)); head->next = NULL; generatePerms(perm, &head); return 0; }

Actually, now my thoughts. I did not accidentally set the define value to 10, because already at this value the program is computed for quite a long time (10! Permutations are generated, and for each of them, about 10 iterations + pass through the list). It is necessary to parallelize the generation of permutations. But they are generated in a while , so simple OpenMP , for example, does not do this. You also need to make a place to add a node to the list and generally work with the list as a critical section (most likely, just adding, but I'm not sure). Also in the mind so far using CUDA , but not sure yet how to solder it here.

In general, please tell me with advice. Maybe where the code to fix, to work faster or change somewhere the very concept.

ps heap algorithm was used to generate permutations.

Accepted Answer · 2017-12-16T06:36:19

It does not slow down the generation of permutations, but searches the list.

For a start it is worth replacing the list with a hash or tree. The complexity will immediately fall from O (N! Log (N!)) To ~ O (N!) (In the case of a successful hash) or ~ O (N! Loglog (N)) in the case of a tree. In practice, the time should be reduced at least 10-20 times already for N = 8.

An example of a hash implementation.

A couple of notes:

As-is code. There are no complaints about optimality.
The hash function is honestly lent, the difference in quality for the table is minimal.
The hashes are written for little-ending architecture.
The dimensions of the table can be made round, simple, made them in the first pairs ...
At the same time, it will be possible to get rid of modulo division when hashing.

Code:

 #include <stdint.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <assert.h> //#define HEAP_PROFILE //turn on collission counting #define LOG2l(X) ((unsigned) ( 8*sizeof (long) - __builtin_clzl((X)) - 1)) #define COUNT_OF(arr) (sizeof(arr)/sizeof(arr[0])) #define SWAP(a, b) do { \ typeof (a) _a = (a); \ a = b;\ b=_a;} while (0) #define SIZE_RING 10 typedef int16_t permutInt_t; typedef permutInt_t permut_t[SIZE_RING]; static const permut_t ring = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; //static const permut_t ring = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; static size_t hashPrimes[]={ 61l, 127l, 251l, 509l, 1021l, 2039l, 4093l, 8191l, 16381l, 32749l, 65521l, 131071l, 262139l, 524287l, 1048573l, 2097143l, 4194301l, 8388593l, 16777213l, 33554393l, 67108859l, 134217689l }; // primes number for hash size typedef struct spectrNode_t { permut_t spectVector; struct spectrNode_t *next; } spectrNode_t; typedef struct spectrHash_t { size_t sz; spectrNode_t **tbl; size_t num; uint8_t size_cnt; uint32_t seed; #ifdef HEAP_PROFILE size_t collisions; #endif // HEAP_PROFILE } spectrHash_t; int spectrHash_rehash (spectrHash_t *hash); inline uint32_t murmur3_32(const uint8_t* key, size_t len, uint32_t seed) { uint32_t h = seed; if (len > 3) { const uint32_t* key_x4 = (const uint32_t*) key; size_t i = len >> 2; do { uint32_t k = *key_x4++; k *= 0xcc9e2d51; k = (k << 15) | (k >> 17); k *= 0x1b873593; h ^= k; h = (h << 13) | (h >> 19); h = (h * 5) + 0xe6546b64; } while (--i); key = (const uint8_t*) key_x4; } if (len & 3) { size_t i = len & 3; uint32_t k = 0; key = &key[i - 1]; do { k <<= 8; k |= *key--; } while (--i); k *= 0xcc9e2d51; k = (k << 15) | (k >> 17); k *= 0x1b873593; h ^= k; } h ^= len; h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } inline uint32_t hashFNV (const void *arr, size_t sz) { // FNV-1a hash uint32_t rv = 0x811c9dc5; while(sz--) { rv = (rv*16777619) ^ *(uint8_t*)arr++; } return rv; } inline uint32_t spectrHash_hashVal (const void *val, spectrHash_t* hash) { return murmur3_32 (val, sizeof(permutInt_t)*SIZE_RING, hash->seed) % hash->sz; //return hashFNV (val, sizeof(permutInt_t)*SIZE_RING) % hash->sz; } int spectrHash_init (spectrHash_t* hash) { hash->size_cnt = 0; hash->sz = hashPrimes[hash->size_cnt]; hash->num = 0; #ifdef HEAP_PROFILE hash->collisions = 0; #endif // HEAP_PROFILE hash->seed = hash->sz-2; //< should actually be random hash->tbl = malloc ( hash->sz*sizeof(hash->tbl[0]) ); if (hash->tbl) { memset (hash->tbl, 0, hash->sz*sizeof(hash->tbl[0])); return 0; } else { return 1; } } /** * Tries to find a node with given value in the table. If the value is not in the table * insert it. * @returns a pointer to either the found or inserted value. */ spectrNode_t *spectrHash_findOrInsert (spectrHash_t *hash, permut_t val) { uint32_t valHash = spectrHash_hashVal (val, hash); spectrNode_t *node = hash->tbl [valHash], *last; if (node) { do { if (memcmp (val, node->spectVector, sizeof(*val)*SIZE_RING) == 0) { // we found the node; return ir return node; } last = node; node = node->next; } while (node); // we haven't found the node; insert to the last node last->next = malloc (sizeof(spectrNode_t)); node = last->next; #ifdef HEAP_PROFILE hash->collisions++; #endif // HEAP_PROFILE } else { // hash key is empty; insert directly to the table hash->tbl [valHash] = malloc (sizeof(spectrNode_t)); node = hash->tbl [valHash]; } assert (node); memcpy (node->spectVector, val, sizeof(*val)*SIZE_RING); node->next = 0; if (4*++hash->num > 3*hash->sz) { spectrHash_rehash (hash); } return node; } int spectrHash_rehash (spectrHash_t *hash) { hash->size_cnt++; size_t old_sz = hash->sz; if (hash->size_cnt < COUNT_OF(hashPrimes)-1) { hash->sz = hashPrimes[hash->size_cnt]; } else { hash->sz = old_sz<<1; } spectrNode_t **new_tbl = malloc (hash->sz*sizeof(new_tbl[0])); assert (new_tbl); memset (new_tbl, 0, hash->sz*sizeof(new_tbl[0])); #ifdef HEAP_PROFILE hash->collisions = 0; #endif // HEAP_PROFILE hash->seed = hash->sz-2; //< should be random for (size_t i=0; i<old_sz; i++) { for (spectrNode_t *node = hash->tbl[i], *next; node; node=next) { next = node->next; node->next = 0; uint32_t valHash = spectrHash_hashVal (node->spectVector, hash); if (!new_tbl[valHash]) { new_tbl[valHash] = node; } else { #ifdef HEAP_PROFILE hash->collisions++; #endif // HEAP_PROFILE spectrNode_t *tail; for (tail = new_tbl[valHash]; tail->next; tail=tail->next) {} tail->next = node; } } } free ( hash->tbl ); hash->tbl = new_tbl; return 0; } void countSpectVector(spectrHash_t *hash, permut_t perm) { permut_t intSpectr; int i = 0; permutInt_t diffTmp = 0, betta = 0; memset(intSpectr, 0, SIZE_RING * sizeof(intSpectr[0])); for (i = 0; i < SIZE_RING; i++){ betta = ring[i]; diffTmp = perm[( 1 + betta ) % SIZE_RING] - perm[betta]; if (diffTmp < 0) diffTmp += SIZE_RING; intSpectr[diffTmp]++; } spectrHash_findOrInsert(hash, intSpectr); } void generatePerms(permut_t arr, spectrHash_t *hash) { permut_t c; int i; i = 0; memset(c, 0, SIZE_RING * sizeof(c[0])); while (i < SIZE_RING) { if (c[i] < i) { if (i % 2 == 0) SWAP(arr[0], arr[i]); else{ SWAP(arr[c[i]], arr[i]); } countSpectVector(hash, arr); c[i] += 1; i = 0; } else { c[i] = 0; i += 1; } } } int main() { permut_t perm; spectrHash_t hash; spectrHash_init (&hash); for (size_t i = 0; i < SIZE_RING; i++) perm[i] = i; generatePerms(perm, &hash); #ifdef HEAP_PROFILE int collisionCounters[36]; memset (collisionCounters, 0, sizeof(collisionCounters)); for (size_t i=0; i<hash.sz; i++) { if(hash.tbl[i]) { size_t col = 0; for (spectrNode_t *head = hash.tbl[i]; head->next; head=head->next) { col++; } if(col) { switch(col) { case 0: case 1: case 2: case 3: case 4: collisionCounters[col-1]++; break; default: collisionCounters[LOG2l(col)+2]++; // smart-ass for sort array to the buckets there each bucket contains element <2^N break; } } } } printf("size = %zd, num = %zd collision = %zd\n", hash.sz, hash.num, hash.collisions); long i; bool is_print_started = false; for(i=COUNT_OF (collisionCounters)-1; i > 4; i--) { if (is_print_started || collisionCounters[i]) { printf ("%4d collisions with depth <%lu\n", collisionCounters[i], 1l<<(i-2)); is_print_started = 1; } } for(; i>=0; i--) { if (is_print_started || collisionCounters[i]) { printf ("%4d collisions with depth %ld\n", collisionCounters[i], i+1); is_print_started = 1; } } #endif // HEAP_PROFILE return 0; } extern inline uint32_t hashFNV (const void *arr, size_t sz); extern inline uint32_t murmur3_32(const uint8_t* key, size_t len, uint32_t seed); extern inline uint32_t spectrHash_hashVal (const void *val, spectrHash_t* hash);

Only a binary tree implemented in the forehead may also not be the best option: it is necessary to minimize the number of comparisons of the array ... the hash should work best (I don’t know why I wrote a bunch in the answer [fixed]).
I mean that, at least when I walked through a tree, I would compare the array not from the first element, but from which the previous two comparisons showed equality, although this may be premature ... I didn’t really understand how much are obtained scattered.
Unfortunately, the mathematical relationship between elements of the tree is not revealed :(

paralleling code with generating permutations in C, openmp

1 answer 1

An example of a hash implementation.

More articles: