pthreads in C
https://hpc-tutorials.llnl.gov/posix/
pthreads for C
Main program is main thread - waits for termination of additional threads.
Thread code = C functions that return error code or status information
<pthread.h>
header
pthread_
prefix for names and functions
gcc -Wall -o pthreadshello pthreadshello.c -pthread
Spawning
spawning thread
#include <pthread.h>
int pthread_create(
pthread_t *thread, // pointer as thread ID (thread object, opaque)
const pthread_attr_t *attr, // attributes
void *(*start_routine)(void *), // function of thread
void *arg // arguments of funciton
);
#include <pthread.h>
int pthread_attr_init(pthread_attr_t *attr); int
pthread_attr_destroy(pthread_attr_t *attr);
void *start_routine(void *genericargs) { // executed by thread
myarg_t *realargs = (myarg_t*)genericargs;
}
struct { // pointer to arguments must have been allocated by spawning thread on heap
// args
} *
Example for spawning
starting threads
example (with race condition)
#include <stdio.h> #include <stdlib.h>// pthreads header #include <pthread.h>// global state; number of threads (bad practice) int threads_glob;// code for thread functions void *something(void *argument){ int rank = (int)argument; // bad practice (casting) printf("Thread rank %d of %d responding\n", rank, threads_glob); pthread_exit(NULL); }// main int main(int argc, char *argv[]){ int threads = 1; // default int i; pthread_t *handle; for (i=1; i<argc && argv[i][0]=='-'; i++) { if (argv[i][1]=='t') { i++; sscanf(argv[i], "%d", &threads); // get number of threads from commandline } } threads_glob = threads; handle = (pthread_t*)malloc(threads*sizeof(pthread_t));// fork the threads for (i=0; i<threads; i++) { pthread_create(&handle[i],NULL,something,(void*)i); // bad practice (casting) }// ... }
This program is non deterministic - race condition:
Variable
i
read by many threads.Value may have been overwritten before thread copied into local.
Solution:
Spawning “master” threads puts rank for each spawned thread into seperate location.
No race condition: Master thread writes each rank value once, before spawned thread starts and reads.
example (without race condition)
Spawning “master” threads puts rank for each spawned thread into seperate location.
No race condition: Master thread writes each rank value once, before spawned thread starts and reads.
#include <stdio.h> #include <stdlib.h> #include <assert.h>// pthreads header #include <pthread.h>// global state; number of threads (bad practice) int threads_glob;// code for thread functions void *something(void *argument){ int rank = (int)argument; // bad practice (casting) printf("Thread rank %d of %d responding\n", rank, threads_glob); pthread_exit(NULL); }// main int main(int argc, char *argv[]){ int threads = 1; // default int rank; pthread_t *handle; for (i=1; i<argc && argv[i][0]=='-'; i++) { if (argv[i][1]=='t') { rank[i] = i; sscanf(argv[i], "%d", &threads); // get number of threads from commandline } } threads_glob = threads;handle = (pthread_t*)malloc(threads*sizeof(pthread_t)); rank = (int*)malloc(threads*sizeof(int));// fork the threads for (i=0; i<threads; i++) { rank[i] = i; errcode = pthread_create(&handle[i], NULL, something, &rank[i]); assert(errcode==0); } // join the threads again for (i=0; i<threads; i++) { pthread_join(handle[i],NULL); // wait to terminate }free(rank); free(handle); return 0; }
The performance is limited when we scale up because the sequential part is too big (amdahl):
Thread creation is expensive.
// fork the threads for (i=0; i<threads; i++) { rank[i] = i; errcode = pthread_create(&handle[i], NULL, something, &rank[i]); assert(errcode==0); }// join the threads again for (i=0; i<threads; i++) { pthread_join(handle[i],NULL); // wait to terminate }
Solutions:
- Spawn recursively
- postpone thread creation overhead by keeping threads alive and only exit at the very end. (ie. wait on condition variable , broadcast wakeup, ...)
example (increased performance through recursive spawn)
#include <stdio.h> #include <stdlib.h> #include <assert.h>// pthreads header #include <pthread.h>// global state; number of threads (bad practice) int threads_glob;// code for thread functions void *something(void *argument){ int rank = (int)argument; // bad practice (casting) printf("Thread rank %d of %d responding\n", rank, threads_glob); pthread_exit(NULL); }// main int main(int argc, char *argv[]){ int threads = 1; // default int rank; pthread_t *handle; for (i=1; i<argc && argv[i][0]=='-'; i++) { if (argv[i][1]=='t') { rank[i] = i; sscanf(argv[i], "%d", &threads); // get number of threads from commandline } } threads_glob = threads;handle = (pthread_t*)malloc(threads*sizeof(pthread_t)); rank = (int*)malloc(threads*sizeof(int));// fork the threads for (i=0; i<threads; i++) { rank[i] = i; errcode = pthread_create(&handle[i], NULL, something, &rank[i]); assert(errcode==0); } // join the threads again for (i=0; i<threads; i++) { pthread_join(handle[i],NULL); // wait to terminate }free(rank); free(handle); return 0; }
Error codes
pthread error codes
Good practice: checking return value (
error
)
#include <pthread.h>
error = pthread_<any pthreads function>
if (error!=0) {
fprintf(stderr, "Thread %d in trouble with %d\", myid, error);
// ...
}
Finalizing
finalizing thread
#include <pthread.h>
void pthread_exit(void *status);
#include <pthread.h>
int pthread_join(pthread_t thread, void **status); // allows catching status
Binding threads to cores
Binding threads to cores
_np
non-portable, non-standard extension to pthreads.
#define _GNU_SOURCE
#include <pthread.h>int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset);
int pthread_getaffinity_np(pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset);