C++11 alternative to OpenMP with clang - multithreading

Clang doesn't support OpenMP (yet) but is it possible to implement a "parallel for" with C++11 ?

OpenMP version :
// parallelfor_gcc.cpp
// g++ -O2 -Wall -std=c++11 -fopenmp parallelfor_gcc.cpp
#include <cmath>
#include <vector>
int main() {
unsigned int size = 1e8;
std::vector<double> vect(size);
#pragma omp parallel for
for (unsigned int i=0; i<size; i++) {
vect[i] = sin(2*M_PI*i/(double)size);
}
return 0;
}
C++11 version:
// parallelfor_clang.cpp
// clang++ -O4 -Wall -std=c++11 -lpthread parallelfor_clang.cpp
#include <cmath>
#include <thread>
#include <vector>
void parallelFor(const unsigned int size,
std::function<void(const unsigned int)> func) {
const unsigned int nbThreads = std::thread::hardware_concurrency();
std::vector < std::thread > threads;
for (unsigned int idThread = 0; idThread < nbThreads; idThread++) {
auto threadFunc = [=, &threads]() {
for (unsigned int i=idThread; i<size; i+=nbThreads) {
func(i);
}
};
threads.push_back(std::thread(threadFunc));
}
for (auto & t : threads) t.join();
}
int main() {
unsigned int size = 1e8;
std::vector<double> vect(size);
auto myFunc = [=, &vect](unsigned int i){
vect[i] = sin(2*M_PI*i/(double)size);
};
parallelFor(size, myFunc);
return 0;
}
OpenMP clauses (firstprivate...) can be implemented in the same way but it's (a little) more work...

Related

Pthread program returning expected declaration specifiers or â...â before â&â token

I am getting the same error multiple times when compiling the following pthreads program in linux using:
gcc -c -lpthread proj2_part1.c -lrt
#include <unistd.h>
#include <sys/types.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <semaphore.h>
#define BUFFER_SIZE 10
#define TRUE 1
struct cQueue{
int *buffer;
int front, rear;
}*queue;
int count;
pthread_mutex_t mutex;
sem_t full, empty;
........
/*MAIN*/
int main(int argc,char* argv[])
{
int a=0, b=0, count =0, buff[BUFFER_SIZE];
int p = atoi(argv[1]), c = atoi(argv[2]);
unsigned int s = atoi(argv[0]);
pthread_t prothread[p], conthread[c];
/*Initialize Semaphores*/
if (sem_init(&full,0,0) == -1)
printf("%s\n",strerror(errno));
if (sem_init(&empty,0,BUFFER_SIZE) == -1)
printf("%s\n",strerror(errno));
pthread_mutex_init(&mutex, NULL);
/*Initialize Circular Queue*/
queue->buffer=buff;
queue->front = queue->buffer[0];
queue->rear = queue->buffer[0];
if(argc!=3)
{
fprintf(stderr,"Syntax: ./a.out <int> <int> <int>");
return -1;
}
if(s<0)
{
fprintf(stderr,"Argument %d must be positive value\n",s);
return -1;
}
else
{
/*Create producer threads*/
int i;
for (i=0; i<p; i++)
{
b = pthread_create(&prothread[i], NULL, producerThread, (void*)argv[1]);
if (b<0)
{
printf("Error: unable to create thread, %d\n",b);
return -1;
}
}
/*Create consumer threads*/
int j;
for (j=0; j<c; j++)
(
a = pthread_create(&conthread[j], NULL, consumerThread, (void*)argv[2]);
if (a<0)
{
printf("Error: unable to create thread, %d\n",a);
return -1;
}
}
sleep(atoi(argv[0]));
}
return 0;
}
I am receiving the following error. I think it has something to do with my semaphore declaration.
proj2_part1.c:147:81: error: expected â)â before â;â token
a = pthread_create(&conthread[j], NULL, consumerThread, (void*)argv[2]);
^
proj2_part1.c:153:6: error: expected â;â before â}â token}
^
proj2_part1.c: At top level:
proj2_part1.c:156:6: error: expected identifier or â(â before âreturnâ
return 0;
^
proj2_part1.c:157:1: error: expected identifier or â(â before â}â token
}
You've used a ( where you meant {:
for (j=0; j<c; j++)
(

Linux - Syscall to Iterate over children threads

I am creating a syscall to return some information about a process and its children threads. I am testing this in Ubuntu 14.04. Here is the code that I have written:
#include <linux/list.h>
#include <linux/linkage.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#define NUM_THREADS 5
struct thread_info_prj {
int pid;
int nthreads;
int tid[NUM_THREADS];
};
asmlinkage void sys_threadinfo_prj(void *ptr)
{
struct task_struct *task;
struct thread_info_prj t_info;
struct list_head *list;
int num_threads = 0;
t_info.pid = current->pid;
list_for_each(list, &current->children) {
//task = list_entry(&p_task->children, struct task_struct, sibling);
task = list_entry(list, struct task_struct, sibling);
t_info.tid[num_threads] = task->pid;
num_threads++;
}
t_info.nthreads = num_threads;
copy_to_user(ptr, &t_info, sizeof(struct thread_info_prj));
}
This code is built into the kernel as a syscall , and tested with the following program.
#include <pthread.h>
#include <stdio.h>
#include <sys/syscall.h>
#include <errno.h>
#define NUM_THREADS 5
struct thread_info_prj {
int pid;
int nthreads;
int tid[NUM_THREADS];
};
void * thread_fn(void *ptr) {
printf("Im a thread!\n");
sleep(4);
pthread_exit(NULL);
}
int main() {
pthread_t threads[NUM_THREADS];
pthread_attr_t attr;
int i;
struct thread_info_prj t_info;
void *status;
pthread_attr_init(&attr);
//pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
for (i = 0; i < NUM_THREADS; i++)
pthread_create(&(threads[i]), &attr, thread_fn, NULL);
sleep(1);
syscall(352, &t_info);
//for (i = 0; i < NUM_THREADS; i++)
// pthread_join(threads[i], &status);
printf("PID: %d\n", t_info.pid);
printf("Num Threads: %d\n", t_info.nthreads);
for (i = 0; i < t_info.nthreads; i++) {
printf("Thread ID: %d\n", t_info.tid[i]);
}
printf("%s\n",strerror(errno));
return 0;
}
Unfortunately, the only output is the proper parent pid. Otherwise, it returns that there are no threads and does not print any additional PIDs. By adding a printk inside the for loop, I discovered that it never even enters the for loop. Any suggestions?
Thanks so much.

seperate the cuda host code in .cpp file

main.cpp
#include<iostream>
#include "cuda.h"
using namespace std;
void cuda_calculation();
int main()
{
cuda_calculation();
return 0;
}
cu.h
void call(int , int ,float* , int );
cuda.cpp
#include <stdio.h>
#include <cuda.h>
#include "cu.h"
void cuda_calculation()
{
float *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);
a_h = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
void call(n_blocks, block_size,&a_d, N);
/*square_array <<< n_blocks, block_size >>> (a_d, N);*/
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h); cudaFree(a_d);
}
cu.cu
#include <stdio.h>
#include "cu.h"
#include <cuda.h>
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
//}
void call(int a,int b,float* c,int d)
{
square_array <<< 3,4 >>> (c,d);
}
I tried to seperate the kernal code and host code in a cpp file, however the following error prevails:
Error 'cudaMemcpy': identifier not found and the other cuda related identifier is not identified.
how to use the cuda related identifier in cpp file and call the kernal functions
There are some errors: void cuda_calculation(); needs to be visible to main.cpp through a header file (cu.h).
Also make sure to compile your .cu files with nvcc and NOT as a standard C++ file. Use CUDA compilation rules to make this process easy (installed by default as part of CUDA toolkit)
after a long trial ,I came with the proper output,
to include the cuda identifier in the cpp files we not only need to include cuda.h but also we need to include cuda_runtime.h as
cuda.cpp as
#include <stdio.h>
#include <cuda.h>
#include<cuda_runtime.h>
#include "cu.h"
#include "cud.h"
//void call(int , int ,float * , int );
void cuda_calculation()
{
float *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);
a_h = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
call(n_blocks, block_size,a_d, N);
/*square_array <<< n_blocks, block_size >>> (a_d, N);*/
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h);
cudaFree(a_d);
}
so the others files are
main.cpp
#include<iostream>
#include "cud.h"
using namespace std;
int main()
{
cuda_calculation();
return 0;
}
cud.h
void cuda_calculation();
cu.h
void call(int , int ,float* , int );
cu.cu
#include <stdio.h>
#include "cu.h"
#include <cuda.h>
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
//}
void call(int a,int b,float* c,int d)
{
square_array <<< 3,4 >>> (c,d);
}

C++ 11 threading error

I have a C++11 program that gives me this error:
terminate called after throwing an instance of 'std::system_error'
what(): Operation not permitted
Code:
const int popSize=100;
void initializePop(mt3dSet mt3dPop[], int index1, int index2, std::string ssmName, std::string s1, std::string s2, std::string s3, std::string s4, std::string mt3dnam, std::string obf1, int iNSP, int iNRM, int iNCM, int iNLY, int iOPT, int iNPS, int iNWL, int iNRO, int ssmPosition, int obsPosition ){
if((index1 >= index2)||index1<0||index2>popSize){
std::cout<<"\nInitializing population...\nIndex not valid..\nQuitting...\n";
exit(1);
}
for(int i=index1; i<index2; i++){
mt3dPop[i].setSSM(ssmName, iNSP, iNRM, iNCM, iNLY);
mt3dPop[i].setNam(toString(s1,s3,i));
mt3dPop[i].setObsName(toString(s1,s4,i));
mt3dPop[i].setSsmName(toString(s1,s2,i));
mt3dPop[i].getSSM().generateFl(toString(s1,s2,i),iOPT,iNPS);
mt3dPop[i].generateNam(mt3dnam, ssmPosition, obsPosition);
mt3dPop[i].setFitness(obf1, iNWL, iNRO);
}}
void runPackage(ifstream& inFile){
//all variables/function parameters for function call are read from inFile
unsigned int numThreads = std::thread::hardware_concurrency();// =4 in my computer
std::vector<std::thread> vt(numThreads-1);//three threads
for(int j=0; j<numThreads-1; j++){
vt[j]= std::thread(initializePop,mt3dPop,j*popSize/numThreads, (j+1)*popSize/numThreads, ssmName, s1,s2, s3, s4, mt3dnam,obf1,iNSP, iNRM, iNCM, iNLY, iOPT, iNPS, iNWL, iNRO, ssmPosition, obsPosition ); //0-24 in thread 1, 25-49 in thread 2, 50-74 in thread 3
}
//remaining 75 to 99 in main thread
initializePop(mt3dPop,(numThreads-1)*popSize/numThreads, popSize, ssmName, s1,s2, s3, s4, mt3dnam,obf1,iNSP, iNRM, iNCM, iNLY, iOPT, iNPS, iNWL, iNRO, ssmPosition, obsPosition);
for(int j=0; j<numThreads-1; j++){
vt[j].join();
}}
What does the error mean and how do I fix it?
You need to link correctly, and compile with -std=c++11 - see this example.
I'm guessing you had the same problem as me! (I compiled with -pthread and -std=c++11 rather than linking with those two. (But you will need to compile with std=c++11 as well as linking with it.))
Probably you want to do something like this:
g++ -c <input_files> -std=c++11
then
g++ -o a.out <input_files> -std=c++11 -pthread
... at least I think that's right. (Someone to confirm?)
How to reproduce these errors:
#include <iostream>
#include <stdlib.h>
#include <string>
using namespace std;
void task1(std::string msg){
cout << "task1 says: " << msg;
}
int main() {
std::thread t1(task1, "hello");
t1.join();
return 0;
}
Compile and run:
el#defiant ~/foo4/39_threading $ g++ -o s s.cpp
s.cpp: In function ‘int main()’:
s.cpp:9:3: error: ‘thread’ is not a member of ‘std’
s.cpp:9:15: error: expected ‘;’ before ‘t1’
You forgot to #include <thread>, include it and try again:
#include <iostream>
#include <stdlib.h>
#include <string>
#include <thread>
using namespace std;
void task1(std::string msg){
cout << "task1 says: " << msg;
}
int main() {
std::thread t1(task1, "hello");
t1.join();
return 0;
}
Compile and run:
el#defiant ~/foo4/39_threading $ g++ -o s s.cpp -std=c++11
el#defiant ~/foo4/39_threading $ ./s
terminate called after throwing an instance of 'std::system_error'
what(): Operation not permitted
Aborted (core dumped)
More errors, as you defined above, because you didn't specify -pthread in the compile:
el#defiant ~/foo4/39_threading $ g++ -o s s.cpp -pthread -std=c++11
el#defiant ~/foo4/39_threading $ ./s
task1 says: hello
Now it works.

Why semaphore object is not initialized?

I'm learning to use semaphore object. But I can't initialize it.
A sem_init function always return value -1 rain or shine.
return value -1 indicates first argument is not valid pointer, say my reference.
But I can't find miss print in my code. I compiled my code in Xcode on OS X.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
void * thread_snd(void *arg);
void * thread_rcv(void* arg);
sem_t bin_sem;
int number = 0;
char thread1[] = "A thread";
char thread2[] = "B thread";
char thread3[] = "C thread";
int main(int argc, char** argv)
{
pthread_t t1, t2 ,t3;
void *thread_result;
int state;
state = sem_init(&bin_sem, 0, 0);
if(state != 0)
{
puts("fail to initialize semaphore");
exit(1);
}
pthread_create(&t1, NULL, thread_snd, &thread1);
pthread_create(&t2, NULL, thread_rcv, &thread2);
pthread_create(&t3, NULL, thread_rcv, &thread3);
pthread_join(t1, &thread_result);
pthread_join(t2, &thread_result);
pthread_join(t3, &thread_result);
printf("final number : %d \n", number);
sem_destroy(&bin_sem);
return 0;
}
void * thread_snd(void * arg)
{
int i;
for(i = 0 ; i < 4; i++)
{
while(number != 0)
sleep(1);
number++;
printf("execution : %s, number : %d \n", (char*) arg, number);
sem_post(&bin_sem);
}
}
void * thread_rcv(void* arg)
{
int i;
for(i = 0 ; i < 2; i++)
{
sem_wait(&bin_sem);
number--;
printf("execution : %s number : %d \n", (char*)arg, number);
}
}
On Mac OS X (10.6.8) there is no sem_init() and sem_destroy().
Use sem_open() and sem_unlink() instead.
/*
cat semaphore_test.c
source:
"Why semaphore object is not initialized?",
https://stackoverflow.com/questions/13834367/why-semaphore-object-is-not-initialized
compiled on Mac OS X 10.6.8 with:
gcc -ansi -pedantic -std=gnu99 -Os -Wall -Wextra -Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes \
-Wmissing-prototypes -Wformat=2 -Wreturn-type -Wunreachable-code -finline -l pthread -o semaphore_test semaphore_test.c
./semaphore_test
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
void * thread_snd(void *arg);
void * thread_rcv(void* arg);
//sem_t bin_sem;
static sem_t *bin_sem;
static const char *semname = "Semaphore";
static int number = 0;
char thread1[] = "A thread";
char thread2[] = "B thread";
char thread3[] = "C thread";
int main(void)
{
pthread_t t1, t2 ,t3;
void *thread_result;
int state;
/*
state = sem_init(&bin_sem, 0, 0);
if(state != 0)
{
puts("fail to initialize semaphore");
exit(1);
}
*/
bin_sem = sem_open(semname, O_CREAT, 0777, 0);
if (bin_sem == SEM_FAILED)
{
fprintf(stderr, "%s\n", "ERROR creating semaphore semname");
exit(EXIT_FAILURE);
}
pthread_create(&t1, NULL, thread_snd, &thread1);
pthread_create(&t2, NULL, thread_rcv, &thread2);
pthread_create(&t3, NULL, thread_rcv, &thread3);
pthread_join(t1, &thread_result);
pthread_join(t2, &thread_result);
pthread_join(t3, &thread_result);
printf("final number : %d \n", number);
//sem_destroy(&bin_sem);
sem_unlink(semname);
return 0;
}
void * thread_snd(void * arg)
{
int i;
for(i = 0 ; i < 4; i++)
{
while(number != 0)
sleep(1);
number++;
printf("snd execution : %s, number : %d \n", (char*) arg, number);
//sem_post(&bin_sem);
sem_post(bin_sem);
}
}
void * thread_rcv(void* arg)
{
int i;
for(i = 0 ; i < 2; i++)
{
//sem_wait(&bin_sem);
sem_wait(bin_sem);
number--;
printf("rcv execution : %s number : %d \n", (char*)arg, number);
}
}
See also:
sem_init on OS X
Program using Semaphores runs fine on Linux... unexpected results on Mac OS X

Resources