Task: Parallelize the search for substrings in a string.
If you use a trivial algorithm (apply the pattern to all places of the line), then you can parallelize it by distributing the intervals among the threads (for example, 1 stream - apply to 1..4 positions, 2 stream - 5..8 positions, etc.). What other algorithms for searching substrings can also be parallelized? (I don't want to use the trivial method)
Development Tools: C, POSIX Threads.
#include <stdio.h> #include <pthread.h> #include <string.h> #include <stdlib.h> #include <time.h> #include <stdbool.h> char *pattern; struct args { int num_thread; char *pattern; char *line; long int x0; long int x1; }; char* read_pattern(char *filename) { FILE *fp; long int lSize; char *buffer; fp = fopen ( filename , "rb" ); if( !fp ) perror(filename),exit(1); fseek( fp , 0L , SEEK_END); lSize = ftell( fp ); rewind( fp ); printf("Size = %ld\n",lSize); /* allocate memory for entire content */ buffer = calloc( 1, lSize+1 ); if( !buffer ) fclose(fp),fputs("memory alloc fails",stderr),exit(1); /* copy the file into the buffer */ if( 1!=fread( buffer , lSize, 1 , fp) ) fclose(fp),free(buffer),fputs("entire read fails",stderr),exit(1); fclose(fp); return buffer; } void* seek_substring_KMP (void *ptr) { struct args * a = (struct args*)ptr; long int i, j, N, M; N = a->x1-a->x0; M = strlen(a->line); int *d =(int*)malloc(M*sizeof(int)); /* Π΄ΠΈΠ½Π°ΠΌΠΈΡΠ΅ΡΠΊΠΈΠΉ ΠΌΠ°ΡΡΠΈΠ² Π΄Π»ΠΈΠ½Ρ Π*/ /* ΠΡΡΠΈΡΠ»Π΅Π½ΠΈΠ΅ ΠΏΡΠ΅ΡΠΈΠΊΡ-ΡΡΠ½ΠΊΡΠΈΠΈ */ d[0]=0; for(i=1,j=0;i<M;i++) { while(j>0 && a->line[j]!=a->line[i]) j = d[j-1]; if(a->line[j]==a->line[i]) j++; d[i]=j; } /* ΠΏΠΎΠΈΡΠΊ */ for(i=a->x0,j=0;i<a->x1; i++) { while(j>0 && a->line[j]!=a->pattern[i]) j=d[j-1]; if(a->line[j]==a->pattern[i]) j++; if (j==M) { printf("Pos = %ld\n",i-j+1); } } free (d); /* ΠΎΡΠ²ΠΎΠ±ΠΎΠΆΠ΄Π΅Π½ΠΈΠ΅ ΠΏΠ°ΠΌΡΡΠΈ ΠΌΠ°ΡΡΠΈΠ²Π° d */ pthread_exit(NULL); } int main(int argc, char** argv) { if(argc!=4) return 0; pattern = read_pattern(argv[1]); char *line = read_pattern(argv[2]); int threads_count = atoi(argv[3]); printf("Threads count = %d\n",threads_count); int NUM =1+(strlen(pattern)/(threads_count)); struct args * a = (struct args*)malloc(threads_count*sizeof(struct args)); pthread_t *threads = (pthread_t*)malloc(threads_count*sizeof(pthread_t)); int error_code; for(int i=0;i<threads_count;i++) { a[i].num_thread = i; a[i].pattern = pattern; a[i].line = line; a[i].x0=i*NUM; if(i==threads_count-1) { a[i].x1 = strlen(pattern) - 1; } else { a[i].x1 = i*NUM + strlen(line) + NUM -1; } } clock_t t = clock(); for(int i=0;i<threads_count;i++) { error_code = pthread_create( &threads[i], NULL, seek_substring_KMP, (void*) &a[i]); if(error_code) { fprintf(stderr,"Error - pthread_create() return code: %d\n",error_code); exit(0); } //else printf("Thread %d is created\n",i); } for(int i = 0; i<threads_count;i++) { pthread_join(threads[i],NULL); } t = clock() - t; free(a); free(threads); printf("Work time = %f\n",((float)t)/CLOCKS_PER_SEC); return 0; }