I want to scan one array and get the scanning result .
In My code , I make a disorder array with the shuffle function. then scan it get some number ( which is bigger than 60000 ). I split the array into threadnum-part ,every thread get one part to deal with. It seems that there are no shared memory between different thread.
So, why the two parallel code is so slow, since it is too slow ,I think the padding things maybe not the main reason. Could anyone give me some tips ? I am a beginner in parallel programming ,thank you.
here is my testing code with three part: serialization/Pthread/OpenMP. you can copy it and test on your own machine.
serialization code :
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define N (65532)
#define threadnum 2
typedef struct{
int *mydata;
int *myres;
int val;
int datalen;
int reslen;
int tid;
}arg_t;
int randint(int i, int k){
int ret;
if(i> k){
int t= i;i=k;k=t;
}
ret = i + rand()%(k-i+1);
return ret;
}
void shuffle(int *org,int n){
int j=0,i;
for(i = n-1;i !=0;i--){
j = randint(0,i);
int t = org[j];org[j] = org[i];org[i] =t;
}
}
void scan(arg_t *arg){
int i;
arg->reslen=0;
for(i=0;i<arg->datalen;i++){
//if(arg->mydata[i] > arg->val){
arg->myres[arg->reslen] = arg->mydata[i];
arg->reslen +=(arg->mydata[i] > arg->val);
//}
}
}
int main(){
struct timeval begin,end;
int i,A[N],*res,reslen,perthread;
double diff_usec;
arg_t args[threadnum];
for(i=0;i<N;i++){
A[i] = i+1;
}
shuffle(A,N);
gettimeofday(&begin,NULL);
res = malloc(sizeof(int)*688);
reslen = 0;
for(i=0;i<N;i++){
//if(arg->mydata[i] > arg->val){
res[reslen] =A[i];
reslen +=(A[i] > 60000);
//}
}
gettimeofday(&end,NULL);
diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
printf("\n%.4lf %d\n",diff_usec,reslen);
return 0;
}
Pthread code :
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <pthread.h>
#define N 65532
#define threadnum 2
typedef struct{
int *mydata;
int *myres;
int val;
int datalen;
int reslen;
int tid;
}arg_t;
int randint(int i, int k){
int ret;
if(i> k){
int t= i;i=k;k=t;
}
ret = i + rand()%(k-i+1);
return ret;
}
void shuffle(int *org,int n){
int j=0,i;
for(i = n-1;i !=0;i--){
j = randint(0,i);
int t = org[j];org[j] = org[i];org[i] =t;
}
}
void scan(arg_t *arg){
int i;
arg->reslen=0;
for(i=0;i<arg->datalen;i++){
//if(arg->mydata[i] > arg->val){
arg->myres[arg->reslen] = arg->mydata[i];
arg->reslen +=(arg->mydata[i] > arg->val);
//}
}
}
int get_time(struct timespec *begin , struct timespec *end){
return 1000 * (end -> tv_sec - begin -> tv_sec) + (end -> tv_sec - begin->tv_sec)/1000000;
}
int main(){
struct timeval begin,end;
//struct timespec begin,end;
int i,A[N],*res,reslen,perthread;
arg_t args[threadnum];
double diff_usec;
pthread_t tid[threadnum];
for(i=0;i<N;i++){
A[i] = i+1;
}
shuffle(A,N);
gettimeofday(&begin,NULL);
//clock_gettime(CLOCK_REALTIME,&begin);
perthread = N/threadnum;
for(i=0;i<threadnum;i++){
args[i].mydata = A+i*perthread;
args[i].myres = malloc(sizeof(int)*perthread);
args[i].datalen = (i == threadnum-1)?(N-(threadnum-1)*perthread):perthread;
args[i].val = 60000;
args[i].tid = i;
pthread_create(&tid[i],NULL,scan,(void*)&args[i]);
}
reslen =0;
for(i=0;i<threadnum;i++){
pthread_join(tid[i],NULL);
reslen += args[i].reslen;
}
res = malloc(sizeof(int)*reslen);
reslen=0;
for(i=0;i<threadnum;i++){
memcpy(res+reslen,args[i].myres,args[i].reslen);
reslen += args[i].reslen;
}
//clock_gettime(CLOCK_REALTIME,&end);
gettimeofday(&end,NULL);
diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
//printf("\n%dms %d\n",get_time(&begin,&end),reslen);
printf("\n%.4lfms %d\n",diff_usec,reslen);
return 0;
}
OpenMP code :
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <time.h>
#define N 65532
#define threadnum 16
typedef struct{
int *mydata;
int *myres;
int val;
int datalen;
int reslen;
}arg_t;
int get_time(struct timespec *begin , struct timespec *end){
return 1000 * (end -> tv_sec - begin -> tv_sec) + (end -> tv_sec - begin->tv_sec)/1000000;
}
int randint(int i, int k){
int ret;
if(i> k){
int t= i;i=k;k=t;
}
ret = i + rand()%(k-i+1);
return ret;
}
void shuffle(int *org,int n){
int j=0,i;
for(i = n-1;i !=0;i--){
j = randint(0,i);
int t = org[j];org[j] = org[i];org[i] =t;
}
}
void scan(arg_t *arg){
int i;
arg->reslen=0;
for(i=0;i<arg->datalen;i++){
//if(arg->mydata[i] > arg->val){
arg->myres[arg->reslen] = arg->mydata[i];
arg->reslen +=(arg->mydata[i] > arg->val);
//}
}
}
int main(){
struct timeval begin,end;
//struct timespec begin,end;
int i,A[N],*res,reslen,perthread;
double diff_usec;
arg_t args[threadnum];
for(i=0;i<N;i++){
A[i] = i+1;
}
shuffle(A,N);
gettimeofday(&begin,NULL);
//clock_gettime(CLOCK_REALTIME,&begin);
perthread = N/threadnum;
for(i=0;i<threadnum;i++){
args[i].mydata = A+i*perthread;
args[i].myres = malloc(sizeof(int)*perthread);
args[i].datalen = (i == threadnum-1)?(N-(threadnum-1)*perthread):perthread;
args[i].val = 60000;
}
omp_set_num_threads(threadnum);
#pragma omp parallel
{
scan(&args[omp_get_thread_num()]);
}
reslen =0;
for(i=0;i<threadnum;i++){
reslen += args[i].reslen;
}
res = malloc(sizeof(int)*reslen);
reslen=0;
for(i=0;i<threadnum;i++){
memcpy(res+reslen,args[i].myres,args[i].reslen);
reslen += args[i].reslen;
}
gettimeofday(&end,NULL);
//clock_gettime(CLOCK_REALTIME,&end);
diff_usec = (((end).tv_sec*1000000L + (end).tv_usec)- ((begin).tv_sec*1000000L+(begin).tv_usec));
printf("\n%.4lfms %d\n",diff_usec,reslen);
return 0;
}
Related
The question essentially is how to correctly apply gain to an audio sample?
I'm programming on FreeBSD and OSS, but manipulate volume in audio sample is probably the same for other OS and applications.
I'm studying others' applications internals like ecasound (in C++) and SoX (in C) but I don't know whats wrong when I read a sample and apply gain to it : it becomes distorted and noisy. My point is to understand why it is not working to turn the volume down (gain lesser than 1).
I'm working with stereo 16 bit LE samples. Without applying gain, it works perfectly (recording and playback).
I thought that I should convert an integer sample to float; multiply by a gain factor and restore it to integer. But it is not working. And it seems to be the exact same approach for SoX in src/vol.c in function static int flow.
Below is my code (no additional libs used). The function playback is where I'm applying gain.
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/raw-file.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
int *buf[fragmentSize];
read(fdDsp, buf, fragmentSize);
write(fdOutput, buf, fragmentSize);
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
int buf[fragmentSize];
eof = read(fdInput, buf, fragmentSize); //bytes read or -1 if EOF
// audio processing:
for(int i=0;i<fragmentSize;i++){
// learning how to get left and right channels from buffer
int l = (buf)[i] & 0xffff;
int r = ((buf)[i] >> 16) & 0xffff ;
// FIXME: it is causing distortion:
float fl = l;
float fr = r;
fl *= 1.0;
fr *= 0.3; //if different than 1, sounds distorted and noisy
l = fl;
r = fr;
// OK: unite Left and Right channels again
int lr = (l ) | (r << 16);
// OK: other options to mix these two channels:
int lleft = l; //Just the left channel
int rright = (r << 16); //Just the right channel
int lmono = (l << 16) | l; //Left ch. on both channels
int rmono = (r << 16) | r; //Right ch. on both channels
// the output:
(buf)[i] = lr;
}
write(fdDsp, buf, fragmentSize);
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
UPDATE:
As pointed out by CL, I was using the wrong type and the last parameter of read()/write() is greater than the size of the buffer.
So, in FreeBSD I changed the buffer type to int16_t (short) defined in #include <stdint.h> .
Now I can correctly apply a gain as desired:
float fl = l;
float fr = r;
fl *= 1.0f;
fr *= 1.5f;
l = fl;
r = fr;
I'll accept CL's answer.
Now the audio processing loop is working with one sample per time (left and right interleaved).
Updated code:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include "/usr/include/sys/soundcard.h"
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/stat.h> //man 2 chmod
#include <signal.h>
#include <stdint.h> //has type int16_t (short)
#define DEBUG 1
#define log(msg) if (DEBUG) printf("[LOG] %s\n",msg)
#define err(msg) {printf("[ERR] %s\n",msg); exit(1); }
const char *device = "/dev/dsp3.1"; //Audio device
char *rawFile = "/tmp/stereo.wav"; //Raw file to record and playback
int fragmentSize = 256;
int b_continue = 1;
void signalHandler(int sigNum){
log("Signal captured");
b_continue = 0;
}
void configDevice(int fdDsp){
int ossCapabilities = 0;
if(fdDsp == -1)
err("can't open device");
if( ioctl(fdDsp, SNDCTL_DSP_GETCAPS, &ossCapabilities) == -1)
err("unsupported: SNDCTL_DSP_GETCAPS");
/*
* http://www.opensound.com/pguide/audio2.html
*/
if(ossCapabilities & DSP_CAP_TRIGGER != DSP_CAP_TRIGGER){
err("Triggering of recording/playback is not possible with this OSS device.");
}
if(ossCapabilities & DSP_CAP_REALTIME != DSP_CAP_REALTIME){
err("No DSP_CAP_REALTIME.");
}
if(ioctl(fdDsp, SNDCTL_DSP_SETDUPLEX, &ossCapabilities) == -1)
err("can't SNDCTL_DSP_SETDUPLEX");
if(ossCapabilities & DSP_CAP_DUPLEX != DSP_CAP_DUPLEX)
err("can't DSP_CAP_DUPLEX");
int format = AFMT_S16_LE; //set format
if(ioctl(fdDsp, SNDCTL_DSP_SETFMT, &format ) == -1){
err("Error setting format.");
}
int channels = 1; //mono=0 stereo=1
if(ioctl(fdDsp, SNDCTL_DSP_STEREO, &channels ) == -1){
err("Error setting channels." );
}
// FREQUENCY RATE
int speed = 44100;
if(ioctl(fdDsp, SNDCTL_DSP_SPEED, &speed ) == -1){
err("Error setting speed.");
}
// FRAGMENT SIZE
if(ioctl(fdDsp, SNDCTL_DSP_SETBLKSIZE, &fragmentSize) == -1){ //normalmente 2048 bits
err("Cannot SNDCTL_DSP_SETBLKSIZE.");
}
}
void record(){
int fdDsp = open(device, O_RDONLY);
configDevice(fdDsp);
//create file for writing
const int fdOutput = open(rawFile, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR);
if(fdOutput ==-1)
err("can't open file to write");
log("Recording...");
do{
// Triggers recording
int enableBits = PCM_ENABLE_INPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1)
err("Can't record: SNDCTL_DSP_SETTRIGGER");
// Wrong:
// int *buf[fragmentSize];
// read(fdDsp, buf, fragmentSize);
// write(fdOutput, buf, fragmentSize);
int16_t *buf[fragmentSize/sizeof (int16_t)];
read(fdDsp, buf, fragmentSize/sizeof (int16_t));
write(fdOutput, buf, fragmentSize/sizeof (int16_t));
} while(b_continue == 1);
close(fdOutput);
close(fdDsp);
}
void playback(){
log("Opening file:");
log(rawFile);
log("On device:");
log(device);
int fdDsp = open(device, O_WRONLY);
configDevice(fdDsp);
const int fdInput = open(rawFile, O_RDONLY);
if(fdInput ==-1)
err("can't open file");
log("Playing...");
int eof = 0;
do{
// TRIGGERs PLAYBACK
int enableBits = PCM_ENABLE_OUTPUT;
if(ioctl(fdDsp, SNDCTL_DSP_SETTRIGGER, &enableBits) == -1){
err("Cannot SNDCTL_DSP_SETTRIGGER.");
}
//Wrong buffer type (too large) and wrong last parameter for read():
// int buf[fragmentSize];
// eof = read(fdInput, buf, fragmentSize);
int16_t buf[fragmentSize/sizeof (int16_t)];
eof = read(fdInput, buf, fragmentSize/sizeof (int16_t));
// audio processing:
for(int i=0;i<fragmentSize/sizeof (int16_t);i++){
int16_t l = buf[i];
int16_t r = buf[i+1];
// Using int16_t (short) buffer, gain works but stereo is inverted with factor >= 1.4f
float fl = l;
float fr = r;
fl *= 2.0f;
fr *= 3.0f;
l = fl;
r = fr;
// the output:
(buf)[i] = l;
i++;
(buf)[i] = r;
}
// write(fdDsp, buf, fragmentSize); //wrong
write(fdDsp, buf, fragmentSize/sizeof (int16_t));
if(b_continue == 0) break;
} while(eof > 0);
close(fdInput);
close(fdDsp);
}
int main(int argc, char *argv[])
{
signal(SIGINT, signalHandler);
log("Ctrl^C to stop recording/playback");
record();
b_continue = 1; playback();
log("Stopped.");
return 0;
}
Thanks,
The last parameter of read()/write() is the number of bytes, but an entry in buf[] has more than one byte.
In the two's complement representation of binary numbers, negative values are (or must be) sign extended, i.e., the most significant bits are ones. In this code, neither extracting L/R channels nor combining them works correctly for negative samples.
The easiest way of handling negative samples would be to use one array entry per sample, i.e., short int.
Why? I didn't do anything on the other peer!
I was using massive threads to get data from the server. When the thread count is small, it's ok. But when the thread count is very large, recv() return -1 and errno indicates "Connection reset by peer".
Here is an example to reproduce the issue:
server.cc
#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <pthread.h>
#include <unistd.h>
char buffer[4096];
inline int send_all(int socket_fd, const char* data, size_t size, int flags)
{
int result;
const char* pos = data;
while (size > 0)
{
result = send(socket_fd, pos, size, flags);
assert(result > 0);
pos += result;
size -= result;
}
return 0;
}
inline int recv_all(int socket_fd, void* data, size_t size, int flags)
{
int result = recv(socket_fd, data, size, flags | MSG_WAITALL);
assert(((size_t) result) == size);
return 0;
}
void* server_thread(void* arg)
{
int socket_fd = (int) ((long) arg);
// recv some info first
recv_all(socket_fd, buffer, 1, 0);
// simulate some computation
for (int i = 0; i < 0xffff; i ++)
for (int j = 0; j < 0xffff; j ++);
// send data
for (int i = 0; i < 4096; i ++)
send_all(socket_fd, buffer, sizeof(buffer), 0);
// the peer is closed, return 0
recv(socket_fd, buffer, 1, MSG_WAITALL);
close(socket_fd);
pthread_detach(pthread_self());
return NULL;
}
int main(void)
{
int listen_socket = socket(AF_INET, SOCK_STREAM, 0);
assert(listen_socket != -1);
struct sockaddr_in listen_address;
listen_address.sin_family = AF_INET;
listen_address.sin_port = htons(11282);
listen_address.sin_addr.s_addr = INADDR_ANY;
int result = bind(listen_socket,
(struct sockaddr*) &listen_address,
sizeof(listen_address));
assert(result != -1);
result = listen(listen_socket, 5);
assert(result != -1);
while (true)
{
int server_socket = accept(listen_socket, NULL, NULL);
assert(server_socket != -1);
pthread_t tid;
result = pthread_create(&tid,
NULL,
server_thread,
(void *) ((long) server_socket));
assert(result != -1);
}
return 0;
}
client.cc
#include <arpa/inet.h>
#include <assert.h>
#include <netinet/in.h>
#include <pthread.h>
#include <stdlib.h>
#include <unistd.h>
pthread_t threads[4096];
char buffer[4096];
inline int send_all(int socket_fd, const char* data, size_t size, int flags)
{
int result;
const char* pos = data;
while (size > 0)
{
result = send(socket_fd, pos, size, flags);
assert(result > 0);
pos += result;
size -= result;
}
return 0;
}
inline int recv_all(int socket_fd, void* data, size_t size, int flags)
{
int result = recv(socket_fd, data, size, flags | MSG_WAITALL);
assert(((size_t) result) == size);
return 0;
}
void* client_thread(void* arg)
{
int socket_fd = socket(AF_INET, SOCK_STREAM, 0);
assert(socket_fd != -1);
struct sockaddr_in server_address;
server_address.sin_family = AF_INET;
server_address.sin_port = htons(11282);
server_address.sin_addr.s_addr = inet_addr("127.0.0.1");
int result = connect(socket_fd,
(struct sockaddr *) &server_address,
sizeof(server_address));
assert(result != -1);
// send some info first
send_all(socket_fd, buffer, 1, 0);
// recv the reply data
for (int i = 0; i < 4096; i ++)
recv_all(socket_fd, buffer, sizeof(buffer), 0);
close(socket_fd);
return NULL;
}
int main(int argc, char* argv[])
{
assert(argc == 2);
// get client thread count
int thread_count = atoi(argv[1]);
assert(thread_count <= 4096);
for (int i = 0; i < thread_count; i ++)
{
int result = pthread_create(&threads[i], NULL, client_thread, NULL);
assert(result != -1);
}
for (int i = 0; i < thread_count; i ++)
pthread_join(threads[i], NULL);
return 0;
}
Usage:
./server
./client [thread_count]
I was using 480 as the thread_count, sometime I could reproduce the issue.
This is the Oculus street view project.....just i am trying to understand and run but i could not do it......please help me
// dllmain.cpp : Defines the entry point for the DLL application.
include "stdafx.h"
BOOL APIENTRY DllMain( HMODULE hModule,
DWORD ul_reason_for_call,
LPVOID lpReserved
)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
case DLL_THREAD_ATTACH:
case DLL_THREAD_DETACH:
case DLL_PROCESS_DETACH:
break;
}
return TRUE;
}
include "stdafx.h"
#include "libovrwrapper.h"
#include <OVR.h>
using namespace OVR;
// Ptr<> AddRef'ed, AutoCleaned
bool bInited = false;
Ptr<DeviceManager> pManager;
Ptr<HMDDevice> pHMD;
Ptr<SensorDevice> pSensor;
SensorFusion pSensorFusion;
LIBOVRWRAPPER_API int OVR_Init()
{
bInited = false;
System::Init(Log::ConfigureDefaultLog(LogMask_Regular));
if (System::IsInitialized())
{
int stage = -1;
while (++stage > -1 && !bInited)
{
switch (stage)
{
case 0:
pManager = *DeviceManager::Create();
if (pManager == NULL)
return bInited;
break;
case 1:
pHMD = *pManager->EnumerateDevices<HMDDevice>().CreateDevice();
if (pHMD == NULL)
return bInited;
break;
case 2:
pSensor = *pHMD->GetSensor();
if (pSensor == NULL)
return bInited;
break;
default:
bInited = true;
break;
};
}
}
pSensorFusion.AttachToSensor(pSensor);
return (bInited?1:0);
}
LIBOVRWRAPPER_API void OVR_Exit()
{
if (bInited)
{
System::Destroy();
}
}
LIBOVRWRAPPER_API int OVR_QueryHMD(OVR_HMDInfo* refHmdInfo)
{
if (!bInited)
{
return 0;
}
HMDInfo src;
if (pHMD->GetDeviceInfo(&src))
{
refHmdInfo->HResolution = src.HResolution;
refHmdInfo->VResolution = src.VResolution;
refHmdInfo->HScreenSize = src.HScreenSize;
refHmdInfo->VScreenSize = src.VScreenSize;
refHmdInfo->VScreenCenter = src.VScreenCenter;
refHmdInfo->EyeToScreenDistance = src.EyeToScreenDistance;
refHmdInfo->LensSeparationDistance = src.LensSeparationDistance;
refHmdInfo->InterpupillaryDistance = src.InterpupillaryDistance;
refHmdInfo->DistortionK[0] = src.DistortionK[0];
refHmdInfo->DistortionK[1] = src.DistortionK[1];
refHmdInfo->DistortionK[2] = src.DistortionK[2];
refHmdInfo->DistortionK[3] = src.DistortionK[3];
refHmdInfo->DesktopX = src.DesktopX;
refHmdInfo->DesktopY = src.DesktopY;
memcpy(refHmdInfo->DisplayDeviceName, src.DisplayDeviceName, sizeof(refHmdInfo->DisplayDeviceName));
}
return 1;
}
LIBOVRWRAPPER_API int OVR_PeekYPL(float* yaw, float* pitch, float* roll)
{
if (!bInited)
{
return 0;
}
Quatf hmdOrient = pSensorFusion.GetOrientation();
hmdOrient.GetEulerAngles<Axis_Y, Axis_X, Axis_Z>(yaw, pitch, roll);
return 1;
}
LIBOVRWRAPPER_API int OVR_Peek(float* w, float* x, float* y,float * z)
{
if (!bInited)
{
return 0;
}
Quatf hmdOrient = pSensorFusion.GetOrientation();
*w = hmdOrient.w;
*x = hmdOrient.x;
*y = hmdOrient.y;
*z = hmdOrient.z;
//hmdOrient.GetEulerAngles<Axis_Y, Axis_X, Axis_Z>(yaw, pitch, roll);
return 1;
}
#ifdef LIBOVRWRAPPER_EXPORTS
#if defined __cplusplus
#define LIBOVRWRAPPER_API extern "C" __declspec(dllexport)
#else
#define LIBOVRWRAPPER_API __declspec(dllexport)
#endif
#else
#if defined __cplusplus
#define LIBOVRWRAPPER_API extern "C" __declspec(dllimport)
#else
#define LIBOVRWRAPPER_API __declspec(dllimport)
#endif
#endif
struct OVR_HMDInfo
{
unsigned HResolution;
unsigned VResolution;
float HScreenSize;
float VScreenSize;
float VScreenCenter;
float EyeToScreenDistance;
float LensSeparationDistance;
float InterpupillaryDistance;
float DistortionK[4];
int DesktopX;
int DesktopY;
char DisplayDeviceName[32];
};
LIBOVRWRAPPER_API int OVR_Init();
LIBOVRWRAPPER_API void OVR_Exit();
LIBOVRWRAPPER_API int OVR_QueryHMD(struct OVR_HMDInfo* refHmdInfo);
LIBOVRWRAPPER_API int OVR_Peek(float* w, float* x, float* y,float * z);
LIBOVRWRAPPER_API int OVR_PeekYPL(float* yaw, float* pitch, float* roll);
Your question is very difficult to understand, as it doesn't have any real details about what you've tried, what isn't working, etc.
It looks like you are trying to "run" a DLL, which isn't really possible. A DLL has to be loaded by another application. So you would have an executable of some kind that would load this DLL, and call into its OVR_Init and OVR_Exit functions.
I have a bit of a problem in using IPC (inter-process communication) program below.
Please let me explain:
I want to pass Linux commands such as "ls" or "wc file.txt"
from a parent to a child to execute using the message queue, and
then have the child returning the command outputs back to
the parent process using shared memory method.
But this is what I got: The parent process always got the output 1 step behind;
in the following fashion:
Step1) ls file.txt
(Nothing showed up.)
Step2) wc file.txt
(Output of earlier command "ls file.txt" showed up here instead.)
Step 3) cat file.txt
(Output of earlier command "wc file.txt" showed up instead.)
Any help is appreciated.
To compile: gcc -o program ./program.c
To run: -./program -v
Code:
#define BUFSZ 512
#define ERRBUFSZ 512
#define TIMEOUT_TIMEDIO 20
#define SHM_SIZE 5120
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <setjmp.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
static sigjmp_buf jmpbuf;
int timed_io(char* buf, int len, FILE* rfp, int sec);
static void sigalrm_handler(int signo);
void do_cmd(char *buf, int len, int linenum, char *errbuf);
int parse_cmd(char *buf, char **vbuf, char *errbuf);
int process_cmd_ipc(char *argv, int linenum, char *errbuf);
struct my_msgbuf {
long mtype;
char mtext[256];
};
static void sigalrm_handler(int signo)
{
siglongjmp(jmpbuf, 1);
}
int timed_io(char* buf, int len, FILE* rfp, int sec)
{
struct sigaction nsigaction[1];
struct sigaction osigaction[1];
int prev_alrm;
int st = 0;
if(sigsetjmp(jmpbuf, 1) == 0)
{
nsigaction->sa_handler = sigalrm_handler;
sigemptyset(&nsigaction->sa_mask);
nsigaction->sa_flags = SA_RESTART;
prev_alrm = alarm(0);
sigaction(SIGALRM, nsigaction, osigaction);
alarm(sec);
if (fgets(buf, len, rfp) == NULL)
st = -1; // EOF
buf[strlen(buf) - 1] = 0;
}
else { st = -2; } // Time-out
alarm(0); // Reset old alarm and handler
sigaction(SIGALRM, osigaction, 0);
return st;
}
int process_cmd_ipc(char *argv, int linenum, char* errbuf)
{
struct my_msgbuf buf;
int msqid, msqid_parent, st, shmid, str_len;
key_t key, key_shm;
char* shared_buf;
FILE *fd;
// create key for shared memory segment
if ((key_shm = ftok("shm_key.txt", 'R')) == -1) {
perror("ftok");
exit(1);
}
// Connect to shared memory segment
if ((shmid = shmget(key_shm, SHM_SIZE, 0644 | IPC_CREAT)) == -1)
{
perror("shmget");
exit(1);
}
// Attach to shared memory segment
shared_buf = shmat(shmid, (void *) 0, 0);
if (shared_buf == (char *) (-1)) {
perror("shmat");
exit(1);
}
// End of shared memory section` //
// Begin: message queue section
pid_t cpid=fork();
if (cpid<0) {
fprintf(stderr,"ERR: \"fork\" error! (Line=%d)\n", linenum);
exit (-1);
} else if (cpid==0) // child process
{ // Begin: message queue
if ((key = ftok("mysh.c", 'B')) == -1) {
perror("ftok");
exit(1);
}
if ((msqid = msgget(key, 0644)) == -1) {
perror("msgget from child");
exit(1);
}
memset(buf.mtext, 0, sizeof(buf.mtext)); // Clear buffer
if(msgrcv(msqid, (struct msgbuf*) &buf, sizeof(buf), 0,0) == -1)
{
perror("msgrcv");
exit(1);
}
// End: message queue
// begin: shared memory segment
memset(shared_buf, 0, SHM_SIZE); // zeroize shared_buf
fd = popen(buf.mtext, "r");
str_len = 0;
while(fgets(shared_buf + str_len, SHM_SIZE, fd) != NULL)
{ str_len = strlen(shared_buf); }
pclose(fd);
// end: shared memory segment
}
else { // parent
// Begin - message queue
if ((key = ftok("mysh.c", 'B')) == -1) {
perror("ftok");
exit(1);
}
if ((msqid_parent = msgget(key, 0644 | IPC_CREAT)) == -1) {
perror("msgget from parent");
exit(1);
}
buf.mtype = 1;
strncpy(buf.mtext, argv, strlen(argv));
if(msgsnd(msqid_parent, (struct my_msgbuf*) &buf, strlen(buf.mtext), 0) == -1)
perror("msgsnd");
// End - message queue
// Begin - shared memory
// usleep(10000);
printf("%s", shared_buf);
// End - shared memory
} // if-else fork
}
int parse_cmd(char *buf, char **vbuf, char *errbuf)
{
int i=0;
char *delim=" ,\t\n";
char *tok;
tok=strtok(buf,delim);
while (tok) {
vbuf[i]=(char *)malloc(BUFSZ*sizeof(char));
strcpy(vbuf[i],tok);
tok=strtok(NULL,delim);
i++;
}
vbuf[i]=0;
return i;
}
void do_cmd(char *buf, int len, int linenum, char *errbuf) {
int i=0; int numargs;
char *vbuf[128];
char* copy = (char *) malloc(strlen(buf) + 1);
int maxargs=sizeof(vbuf)/sizeof(char *);
strcpy(copy, buf);
numargs = parse_cmd(copy,vbuf,errbuf);
process_cmd_ipc(buf,linenum, errbuf);
for (i=0;i<numargs; i++) { free(vbuf[i]); }
free(copy);
copy = NULL;
return;
}
int main(int argc, char **argv)
{
int i; int st; int linenum=0;
char *buf=(char *)malloc(BUFSZ*sizeof(char));
char *errbuf=(char *)malloc(ERRBUFSZ*sizeof(char));
char *mysh = "";
FILE *rfp=stdin;
if (isatty(fileno(rfp))) {
mysh = "mysh (Ctrl-C to exit)>";
fprintf(stderr,"%s",mysh);
}
while(1)
{
st = timed_io(buf, BUFSZ, stdin, TIMEOUT_TIMEDIO);
if (st != 0)
{
fprintf(stderr, "ERR: No input %s (Status=%d)\n", errbuf, st);
return -1;
}
else
{
linenum++;
if (*buf)
{ do_cmd(buf, BUFSZ, linenum,errbuf); }
if (mysh)
fprintf(stderr,"%s",mysh);
}
}
}
main.cpp
#include<iostream>
#include "cuda.h"
using namespace std;
void cuda_calculation();
int main()
{
cuda_calculation();
return 0;
}
cu.h
void call(int , int ,float* , int );
cuda.cpp
#include <stdio.h>
#include <cuda.h>
#include "cu.h"
void cuda_calculation()
{
float *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);
a_h = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
void call(n_blocks, block_size,&a_d, N);
/*square_array <<< n_blocks, block_size >>> (a_d, N);*/
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h); cudaFree(a_d);
}
cu.cu
#include <stdio.h>
#include "cu.h"
#include <cuda.h>
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
//}
void call(int a,int b,float* c,int d)
{
square_array <<< 3,4 >>> (c,d);
}
I tried to seperate the kernal code and host code in a cpp file, however the following error prevails:
Error 'cudaMemcpy': identifier not found and the other cuda related identifier is not identified.
how to use the cuda related identifier in cpp file and call the kernal functions
There are some errors: void cuda_calculation(); needs to be visible to main.cpp through a header file (cu.h).
Also make sure to compile your .cu files with nvcc and NOT as a standard C++ file. Use CUDA compilation rules to make this process easy (installed by default as part of CUDA toolkit)
after a long trial ,I came with the proper output,
to include the cuda identifier in the cpp files we not only need to include cuda.h but also we need to include cuda_runtime.h as
cuda.cpp as
#include <stdio.h>
#include <cuda.h>
#include<cuda_runtime.h>
#include "cu.h"
#include "cud.h"
//void call(int , int ,float * , int );
void cuda_calculation()
{
float *a_h, *a_d; // Pointer to host & device arrays
const int N = 10; // Number of elements in arrays
size_t size = N * sizeof(float);
a_h = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **) &a_d, size); // Allocate array on device
// Initialize host array and copy it to CUDA device
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
// Do calculation on device:
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
call(n_blocks, block_size,a_d, N);
/*square_array <<< n_blocks, block_size >>> (a_d, N);*/
// Retrieve result from device and store it in host array
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
// Cleanup
free(a_h);
cudaFree(a_d);
}
so the others files are
main.cpp
#include<iostream>
#include "cud.h"
using namespace std;
int main()
{
cuda_calculation();
return 0;
}
cud.h
void cuda_calculation();
cu.h
void call(int , int ,float* , int );
cu.cu
#include <stdio.h>
#include "cu.h"
#include <cuda.h>
// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N) a[idx] = a[idx] * a[idx];
}
//}
void call(int a,int b,float* c,int d)
{
square_array <<< 3,4 >>> (c,d);
}