📎

pthreads in C

https://hpc-tutorials.llnl.gov/posix/

pthreads for C

Main program is main thread - waits for termination of additional threads.

Thread code = C functions that return error code or status information

<pthread.h> header

pthread_ prefix for names and functions

gcc -Wall -o pthreadshello pthreadshello.c -pthread

Spawning

spawning thread

#include <pthread.h>
int pthread_create(
pthread_t *thread, // pointer as thread ID (thread object, opaque)
const pthread_attr_t *attr, // attributes
void *(*start_routine)(void *), // function of thread
void *arg // arguments of funciton
);
#include <pthread.h>
int pthread_attr_init(pthread_attr_t *attr); int
pthread_attr_destroy(pthread_attr_t *attr);
void *start_routine(void *genericargs) { // executed by thread
myarg_t *realargs = (myarg_t*)genericargs;
}
struct { // pointer to arguments must have been allocated by spawning thread on heap
// args
} *

Example for spawning

starting p\small p threads [0;p1]\small [0;p-1]

  • example (with race condition)
    #include <stdio.h>
    #include <stdlib.h>// pthreads header
    #include <pthread.h>// global state; number of threads (bad practice)
    int threads_glob;// code for thread functions
    void *something(void *argument){
    int rank = (int)argument; // bad practice (casting)
    printf("Thread rank %d of %d responding\n", rank, threads_glob);
    pthread_exit(NULL);
    }// main
    int main(int argc, char *argv[]){
    int threads = 1; // default
    int i;
    pthread_t *handle;
    for (i=1; i<argc && argv[i][0]=='-'; i++) {
    if (argv[i][1]=='t') {
    i++;
    sscanf(argv[i], "%d", &threads); // get number of threads from commandline
    }
    }
    threads_glob = threads;
    handle = (pthread_t*)malloc(threads*sizeof(pthread_t));// fork the threads
    for (i=0; i<threads; i++) {
    pthread_create(&handle[i],NULL,something,(void*)i); // bad practice (casting)
    }// ...
    }

    This program is non deterministic - race condition:

    Variable i read by many threads.

    Value may have been overwritten before thread copied into local.

    Solution:

    Spawning “master” threads puts rank for each spawned thread into seperate location.

    No race condition: Master thread writes each rank value once, before spawned thread starts and reads.

  • example (without race condition)

    Spawning “master” threads puts rank for each spawned thread into seperate location.

    No race condition: Master thread writes each rank value once, before spawned thread starts and reads.

    #include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>// pthreads header
    #include <pthread.h>// global state; number of threads (bad practice)
    int threads_glob;// code for thread functions
    void *something(void *argument){
    int rank = (int)argument; // bad practice (casting)
    printf("Thread rank %d of %d responding\n", rank, threads_glob);
    pthread_exit(NULL);
    }// main
    int main(int argc, char *argv[]){
    int threads = 1; // default
    int rank;
    pthread_t *handle;
    for (i=1; i<argc && argv[i][0]=='-'; i++) {
    if (argv[i][1]=='t') {
    rank[i] = i;
    sscanf(argv[i], "%d", &threads); // get number of threads from commandline
    }
    }
    threads_glob = threads;handle = (pthread_t*)malloc(threads*sizeof(pthread_t));
    rank = (int*)malloc(threads*sizeof(int));// fork the threads
    for (i=0; i<threads; i++) {
    rank[i] = i;
    errcode = pthread_create(&handle[i], NULL, something, &rank[i]);
    assert(errcode==0);
    }
    // join the threads again
    for (i=0; i<threads; i++) {
    pthread_join(handle[i],NULL); // wait to terminate
    }free(rank);
    free(handle);
    return 0;
    }

    The performance is limited when we scale up because the sequential part is too big (amdahl):

    Thread creation is expensive.

    // fork the threads
    for (i=0; i<threads; i++) {
    rank[i] = i;
    errcode = pthread_create(&handle[i], NULL, something, &rank[i]);
    assert(errcode==0);
    }// join the threads again
    for (i=0; i<threads; i++) {
    pthread_join(handle[i],NULL); // wait to terminate
    }	

    Solutions:

    1. Spawn recursively
    1. postpone thread creation overhead by keeping threads alive and only exit at the very end. (ie. wait on condition variable , broadcast wakeup, ...)

  • example (increased performance through recursive spawn)
    #include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>// pthreads header
    #include <pthread.h>// global state; number of threads (bad practice)
    int threads_glob;// code for thread functions
    void *something(void *argument){
    int rank = (int)argument; // bad practice (casting)
    printf("Thread rank %d of %d responding\n", rank, threads_glob);
    pthread_exit(NULL);
    }// main
    int main(int argc, char *argv[]){
    int threads = 1; // default
    int rank;
    pthread_t *handle;
    for (i=1; i<argc && argv[i][0]=='-'; i++) {
    if (argv[i][1]=='t') {
    rank[i] = i;
    sscanf(argv[i], "%d", &threads); // get number of threads from commandline
    }
    }
    threads_glob = threads;handle = (pthread_t*)malloc(threads*sizeof(pthread_t));
    rank = (int*)malloc(threads*sizeof(int));// fork the threads
    for (i=0; i<threads; i++) {
    rank[i] = i;
    errcode = pthread_create(&handle[i], NULL, something, &rank[i]);
    assert(errcode==0);
    }
    // join the threads again
    for (i=0; i<threads; i++) {
    pthread_join(handle[i],NULL); // wait to terminate
    }free(rank);
    free(handle);
    return 0;
    }

Error codes

pthread error codes

Good practice: checking return value ( error )

#include <pthread.h>
error = pthread_<any pthreads function>
if (error!=0) {
fprintf(stderr, "Thread %d in trouble with %d\", myid, error);
// ...
}

Finalizing

finalizing thread

#include <pthread.h>
void pthread_exit(void *status);
#include <pthread.h>
int pthread_join(pthread_t thread, void **status); // allows catching status

Binding threads to cores

Binding threads to cores

_np non-portable, non-standard extension to pthreads.

#define _GNU_SOURCE
#include <pthread.h>int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset);
int pthread_getaffinity_np(pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset);