Problem: I have to increment x1 and x2 variable which should be done by separate threads and next increment of both variables should not be called until pr
Instead of having a bunch of threads to do x1 things, pausing them, then having a bunch of threads do x2 things, consider a threadpool. A threadpool is a bunch of threads which sit idle until you have work for them to do, then they unpause and do the work.
An advantage of this system is that it uses condition variables and mutexes rather than semaphores. On many systems, mutexes are faster that semaphores (because they are more limited).
// a task is an abstract class describing "something that can be done" which
// can be put in a work queue
class Task
{
public:
virtual void run() = 0;
};
// this could be made more Object Oriented if desired... this is just an example.
// a work queue
struct WorkQueue
{
std::vector<Task*> queue; // you must hold the mutex to access the queue
bool finished; // if this is set to true, threadpoolRun starts exiting
pthread_mutex_t mutex;
pthread_cond_t hasWork; // this condition is signaled if there may be more to do
pthread_cond_t doneWithWork; // this condition is signaled if the work queue may be empty
};
void threadpoolRun(void* queuePtr)
{
// the argument to threadpoolRun is always a WorkQueue*
WorkQueue& workQueue= *dynamic_cast<WorkQueue*>(queuePtr);
pthread_mutex_lock(&workQueue.mutex);
// precondition: every time we start this while loop, we have to have the
// mutex.
while (!workQueue.finished) {
// try to get work. If there is none, we wait until someone signals hasWork
if (workQueue.queue.empty()) {
// empty. Wait until another thread signals that there may be work
// but before we do, signal the main thread that the queue may be empty
pthread_cond_broadcast(&workQueue.doneWithWOrk);
pthread_cond_wait(&workQueue.hasWork, &workQueue.mutex);
} else {
// there is work to be done. Grab the task, release the mutex (so that
// other threads can get things from the work queue), and start working!
Task* myTask = workQueue.queue.back();
workQueue.queue.pop_back(); // no one else should start this task
pthread_mutex_unlock(&workQueue.mutex);
// now that other threads can look at the queue, take our time
// and complete the task.
myTask->run();
// re-acquire the mutex, so that we have it at the top of the while
// loop (where we need it to check workQueue.finished)
pthread_mutex_lock(&workQueue.mutex);
}
}
}
// Now we can define a bunch of tasks to do your particular problem
class Task_x1a
: public Task
{
public:
Task_x1a(int* inData)
: mData(inData)
{ }
virtual void run()
{
// do some calculations on mData
}
private:
int* mData;
};
class Task_x1b
: public Task
{ ... }
class Task_x1c
: public Task
{ ... }
class Task_x1d
: public Task
{ ... }
class Task_x2a
: public Task
{ ... }
class Task_x2b
: public Task
{ ... }
class Task_x2c
: public Task
{ ... }
class Task_x2d
: public Task
{ ... }
int main()
{
// bet you thought you'd never get here!
static const int numberOfWorkers = 4; // this tends to be either the number of CPUs
// or CPUs * 2
WorkQueue workQueue; // create the workQueue shared by all threads
pthread_mutex_create(&workQueue.mutex);
pthread_cond_create(&workQueue.hasWork);
pthread_cond_create(&workQueue.doneWithWork);
pthread_t workers[numberOfWorkers];
int data[10];
for (int i = 0; i < numberOfWorkers; i++)
pthread_create(&pth1, NULL, &threadpoolRun, &workQueue);
// now all of the workers are sitting idle, ready to do work
// give them the X1 tasks to do
{
Task_x1a x1a(data);
Task_x1b x1b(data);
Task_x1c x1c(data);
Task_x1d x1d(data);
pthread_mutex_lock(&workQueue.mutex);
workQueue.queue.push_back(x1a);
workQueue.queue.push_back(x1b);
workQueue.queue.push_back(x1c);
workQueue.queue.push_back(x1d);
// now that we've queued up a bunch of work, we have to signal the
// workers that the work is available
pthread_cond_broadcast(&workQueue.hasWork);
// and now we wait until the workers finish
while(!workQueue.queue.empty())
pthread_cond_wait(&workQueue.doneWithWork);
pthread_mutex_unlock(&workQueue.mutex);
}
{
Task_x2a x2a(data);
Task_x2b x2b(data);
Task_x2c x2c(data);
Task_x2d x2d(data);
pthread_mutex_lock(&workQueue.mutex);
workQueue.queue.push_back(x2a);
workQueue.queue.push_back(x2b);
workQueue.queue.push_back(x2c);
workQueue.queue.push_back(x2d);
// now that we've queued up a bunch of work, we have to signal the
// workers that the work is available
pthread_cond_broadcast(&workQueue.hasWork);
// and now we wait until the workers finish
while(!workQueue.queue.empty())
pthread_cond_wait(&workQueue.doneWithWork);
pthread_mutex_unlock(&workQueue.mutex);
}
// at the end of all of the work, we want to signal the workers that they should
// stop. We do so by setting workQueue.finish to true, then signalling them
pthread_mutex_lock(&workQueue.mutex);
workQueue.finished = true;
pthread_cond_broadcast(&workQueue.hasWork);
pthread_mutex_unlock(&workQueue.mutex);
pthread_mutex_destroy(&workQueue.mutex);
pthread_cond_destroy(&workQueue.hasWork);
pthread_cond_destroy(&workQueue.doneWithWork);
return data[0];
}
Major notes:
The problem with your program is that you are synchronizing your threads to run in lockstep with each other. In each thread, at each iteration, a counter is incremented, and then two synchronization primitives are called. So, more than half the time in the loop body is spent on synchronization.
In your program, the counters really have nothing to do with each other, so they really should run independently of each other, which means each thread could actually do actual computing during their iterations rather than mostly synchronizing.
For the output requirements, you can allow each thread to put each sub-calculation into an array that the main thread can read from. The main thread waits for each thread to completely finish, and can then read from each array to create your output.
void *threadfunc1(void *parm)
{
int *output = static_cast<int *>(parm);
for (int i = 0; i < 10; ++i) {
x1++;
output[i] = x1;
}
return NULL ;
}
void *threadfunc2(void *parm)
{
int *output = static_cast<int *>(parm);
for (int i = 0; i < 10; ++i) {
x2++;
output[i] = x2;
}
return NULL ;
}
int main () {
int out1[10];
int out2[10];
pthread_create(&pth1, NULL, threadfunc1, out1);
pthread_create(&pth2, NULL, threadfunc2, out2);
pthread_join(pth1, NULL);
pthread_join(pth2, NULL);
int loop = 0;
while (loop < 9) {
// iterated as a step
loop++;
printf("Final : x1 = %d, x2 = %d\n", out1[loop], out2[loop]);
}
printf("Result : x1 = %d, x2 = %d\n", out1[9], out2[9]);
return 1;
}