Pointer to device array inside host struct - struct

I'm trying to create a struct that will hold both the host and device arrays in one place and should reside on the host. I later intend expand it to be a element of a linked list. The basic struct looks like this:
typedef struct Data{
double *h;
double *d;
} Data;
Where *h points to an array of doubles on the host and *d points to an array of doubles on the device.
There are various answers on SO about copying whole structs to the the device (CUDA cudaMemcpy Struct of Arrays) but none of them quite do what I need. I have the following code but keep getting illegal memory access errors.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "cuda.h"
/*
* CUDA Error stuff
*/
static void HandleError( cudaError_t err,
const char *file,
int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define HANDLE_NULL( a ) {if (a == NULL) { \
printf( "Host memory failed in %s at line %d\n", \
__FILE__, __LINE__ ); \
exit( EXIT_FAILURE );}}
//malloc error code
int errMsg(const char *message, int errorCode)
{
printf("%s\n", message);
return errorCode;
}
typedef struct Data{
double *h;
double *d;
} Data;
__global__ void kernel(Data *d)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid<100){
d->d[tid] = 2;
}
}
int main()
{
Data *d;
d = (Data*)malloc(sizeof(Data));
d->h = (double*)malloc(sizeof(double)*100);
HANDLE_ERROR( cudaMalloc((void**) &(d->d), 100*sizeof(double)) );
for(int i=0; i<100; i++){
d->h[i] = i;
}
HANDLE_ERROR( cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice) );
printf("%f\n", d->h[1]);
kernel<<<1, 102>>>(d);
printf("done\n");
{
cudaError_t cudaerr = cudaDeviceSynchronize();
if (cudaerr != cudaSuccess)
printf("kernel launch failed with error \"%s\"->\n",
cudaGetErrorString(cudaerr));
}
HANDLE_ERROR( cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost) );
printf("%f\n", d->h[99]);
return 0;
}
The output I get is:
1.000000
done
kernel launch failed with error "an illegal memory access was encountered"->
an illegal memory access was encountered in linkedListGPU.cu at line 77
I suspect I have just messed up my pointers a bit. The error handling code is from the Wiley introduction to CUDA book, if there code is not allowed on here I'll remove it.
Thanks.

The problem is d itself is a pointer to a host-allocated struct (where the d and h pointers are contained. When you pass the d struct pointer to the kernel like so:
kernel<<<1, 102>>>(d);
^
this is a pointer to memory on the host
and then attempt to dereference that pointer in device code here:
d->...;
^
This operator dereferences the pointer to the left of it
you get an illegal memory access.
There are at least 2 obvious ways to fix this:
Pass the struct by value instead of by pointer.
Here is an example:
$ cat t1311.cu
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "cuda.h"
/*
* CUDA Error stuff
*/
static void HandleError( cudaError_t err,
const char *file,
int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define HANDLE_NULL( a ) {if (a == NULL) { \
printf( "Host memory failed in %s at line %d\n", \
__FILE__, __LINE__ ); \
exit( EXIT_FAILURE );}}
//malloc error code
int errMsg(const char *message, int errorCode)
{
printf("%s\n", message);
return errorCode;
}
typedef struct Data{
double *h;
double *d;
} Data;
__global__ void kernel(Data d)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid<100){
d.d[tid] = 2;
}
}
int main()
{
Data d;
d.h = (double*)malloc(sizeof(double)*100);
HANDLE_ERROR( cudaMalloc((void**) &(d.d), 100*sizeof(double)) );
for(int i=0; i<100; i++){
d.h[i] = i;
}
HANDLE_ERROR( cudaMemcpy(d.d, d.h, 100*sizeof(double), cudaMemcpyHostToDevice) );
printf("%f\n", d.h[1]);
kernel<<<1, 102>>>(d);
printf("done\n");
{
cudaError_t cudaerr = cudaDeviceSynchronize();
if (cudaerr != cudaSuccess)
printf("kernel launch failed with error \"%s\"->\n",
cudaGetErrorString(cudaerr));
}
HANDLE_ERROR( cudaMemcpy(d.h, d.d, 100*sizeof(double), cudaMemcpyDeviceToHost) );
printf("%f\n", d.h[99]);
return 0;
}
$ nvcc -arch=sm_35 -o t1311 t1311.cu
$ cuda-memcheck ./t1311
========= CUDA-MEMCHECK
1.000000
done
2.000000
========= ERROR SUMMARY: 0 errors
$
Make a device copy of the struct that the d host pointer points to:
Here is an example:
$ cat t1311.cu
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "cuda.h"
/*
* CUDA Error stuff
*/
static void HandleError( cudaError_t err,
const char *file,
int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define HANDLE_NULL( a ) {if (a == NULL) { \
printf( "Host memory failed in %s at line %d\n", \
__FILE__, __LINE__ ); \
exit( EXIT_FAILURE );}}
//malloc error code
int errMsg(const char *message, int errorCode)
{
printf("%s\n", message);
return errorCode;
}
typedef struct Data{
double *h;
double *d;
} Data;
__global__ void kernel(Data *d)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid<100){
d->d[tid] = 2;
}
}
int main()
{
Data *d, *dev_d;
d = (Data*)malloc(sizeof(Data));
HANDLE_ERROR(cudaMalloc(&dev_d, sizeof(Data)));
d->h = (double*)malloc(sizeof(double)*100);
HANDLE_ERROR( cudaMalloc((void**) &(d->d), 100*sizeof(double)) );
for(int i=0; i<100; i++){
d->h[i] = i;
}
HANDLE_ERROR( cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice) );
HANDLE_ERROR(cudaMemcpy(dev_d, d, sizeof(Data), cudaMemcpyHostToDevice));
printf("%f\n", d->h[1]);
kernel<<<1, 102>>>(dev_d);
printf("done\n");
{
cudaError_t cudaerr = cudaDeviceSynchronize();
if (cudaerr != cudaSuccess)
printf("kernel launch failed with error \"%s\"->\n",
cudaGetErrorString(cudaerr));
}
HANDLE_ERROR( cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost) );
printf("%f\n", d->h[99]);
return 0;
}
$ nvcc -arch=sm_35 -o t1311 t1311.cu
$ cuda-memcheck ./t1311
========= CUDA-MEMCHECK
1.000000
done
2.000000
========= ERROR SUMMARY: 0 errors
$
As an aside, you can follow the method outlined here to carry your debug process a bit farther.

Related

Using eBPF to measure CPU mode switch overhead incured by making system call

As title, but the measurement result is unreasonable. Let me describe the current status.
I'm using syscall getuid as measurement target, I started by measureing the complete overhead with two clock_gettime bounded around, then measure the entry (what SYSCALL instruction does before executing the actual getuid code) and leaving overhead saparately (with eBPF program hook onto the entry and leaving point).
The result for the complete overhead is ~65ns, and regarding to the entry and leaving overhead, it's ~77ns and ~70ns respectively.
It's obvious that my measurement has some additional overhead except the typical overhead. However, it's weird that since clock_gettime is a vDSO syscall, it should barely have noticeable overhead. And BPF, which is a lightweight instrumental tool (JIT-ed and etc.) these day in Linux, shouldn't have noticeable overhead too.
Is there anyone have idea what additional overhead my measurement incurs?
Following is my measurement code:
userland (measuring the return-from-kernel overhead):
#define _GNU_SOURCE
#include <bpf.h>
#include <libbpf.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <string.h>
#include <asm/errno.h>
#include <linux/if_link.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>
#include <asm/unistd.h>
#include <time.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <sched.h>
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#define TEST_CNT 1000000
#define BPF_FILE_NAME "mkern.o"
#define BPF_MAP_NAME "msys"
static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
int cpu, int group_fd,
unsigned long flags)
{
attr->size = sizeof(*attr);
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
}
static int attach_kprobe(int prog_fd)
{
int err, fd, id;
char buf[32];
struct perf_event_attr attr = {};
err = system("echo 'r:kp_sys_batch __x64_sys_getuid' > /sys/kernel/debug/tracing/kprobe_events");
if (err < 0) {
fprintf(stderr, "Failed to create kprobe, error '%s'\n", strerror(errno));
return -1;
}
fd = open("/sys/kernel/debug/tracing/events/kprobes/kp_sys_batch/id", O_RDONLY, 0);
if (fd < 0) {
fprintf(stderr, "Failed to open event %s\n", "sys_batch");
return -1;
}
err = read(fd, buf, sizeof(buf));
if (err < 0 || err >= sizeof(buf)) {
fprintf(stderr, "read from '%s' failed '%s'\n", "sys_batch", strerror(errno));
return -1;
}
close(fd);
buf[err] = 0;
id = atoi(buf);
attr.config = id;
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
fd = sys_perf_event_open(&attr, 0/*this process*/, -1/*any cpu*/, -1/*group leader*/, 0);
if (fd < 0) {
perror("sys_perf_event_open");
fprintf(stderr, "Failed to open perf_event (id: %llu)\n", attr.config);
return -1;
}
err = ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
if (err < 0) {
fprintf(stderr, "ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
strerror(errno));
return -1;
}
err = ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (err < 0) {
fprintf(stderr, "ioctl PERF_EVENT_IOC_SET_BPF failed: %s\n",
strerror(errno));
return -1;
}
return 0;
}
static void maxi_memlock_rlimit(void)
{
struct rlimit rlim_new = {
.rlim_cur = RLIM_INFINITY,
.rlim_max = RLIM_INFINITY,
};
if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) {
fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n");
exit(-1);
}
}
static int find_map_fd(struct bpf_object *bpf_obj, const char *mapname)
{
struct bpf_map *map;
int map_fd = -1;
map = bpf_object__find_map_by_name(bpf_obj, mapname);
if (!map) {
fprintf(stderr, "Failed finding map by name: %s\n", mapname);
exit(-1);
}
map_fd = bpf_map__fd(map);
return map_fd;
}
int main(int argc, char **argv)
{
int bpf_map_fd;
int bpf_prog_fd = -1;
int err;
int key = 0;
struct timespec tp;
struct bpf_object *bpf_obj;
struct reals map;
struct bpf_prog_load_attr xattr = {
.prog_type = BPF_PROG_TYPE_KPROBE,
.file = BPF_FILE_NAME,
};
maxi_memlock_rlimit();
err = bpf_prog_load_xattr(&xattr, &bpf_obj, &bpf_prog_fd);
if (err) {
fprintf(stderr, "Failed loading bpf object file\n");
exit(-1);
}
if (attach_kprobe(bpf_prog_fd)) {
fprintf(stderr, "Failed attaching kprobe\n");
exit(-1);
}
bpf_map_fd = find_map_fd(bpf_obj, BPF_MAP_NAME);
if (find_map_fd < 0) {
fprintf(stderr, "Failed finding map fd\n");
exit(-1);
}
/* warm up */
for (int i = 0; i < TEST_CNT; i++) {
syscall(__NR_getuid); /* dummy call */
clock_gettime(CLOCK_MONOTONIC, &tp);
if (unlikely(bpf_map_lookup_elem(bpf_map_fd, &key, &map))) {
fprintf(stderr, "Failed to lookup map element\n");
perror("lookup");
exit(-1);
}
}
uint64_t delta = 0;
for (int i = 0; i < TEST_CNT; i++) {
syscall(__NR_getuid); /* dummy call */
clock_gettime(CLOCK_MONOTONIC, &tp);
if (unlikely(bpf_map_lookup_elem(bpf_map_fd, &key, &map))) {
fprintf(stderr, "Failed to lookup map element\n");
perror("lookup");
exit(-1);
}
delta += (1000000000 * tp.tv_sec + tp.tv_nsec) - map.ts;
}
printf("avg: %fns\n", (double) delta / TEST_CNT);
return 0;
}
user land (measuring the enter-kernel overhead, almost same as the above, except what I pointed out):
err = system("echo 'p:kp_sys_batch sys_batch' > /sys/kernel/debug/tracing/kprobe_events");
...
clock_gettime(CLOCK_MONOTONIC, &tp);
syscall(__NR_getuid); /* dummy call */
...
delta += map.ts - (1000000000 * tp.tv_sec + tp.tv_nsec);
kernel land:
SEC("getuid")
int kp_sys_batch(struct pt_regs *ctx)
{
__u32 i = 0;
struct reals *r;
r = bpf_map_lookup_elem(&reals, &i);
if (!r)
return 1;
r->ts = bpf_ktime_get_ns();
return 0;
}
Except the additional overhead I mentioned above, inside the return-from-kernel measurement code, if the echo 'r:kp_sys_batch sys_batch' is changed to echo 'p:kp_sys_batch sys_batch' (which means that the measurement would take the syscall execution overhead into account), the result would be ~48ns, this means that the result includes overhead of syscall execution and return-from-kernel. Any idea why this could be only ~48ns?
Thanks!

write_proc is not invoked when written from userspace

I am trying to understand procfs for communication between userspace and kernel module. My module has basic two functions for procfs write_proc, driver_mmap.
I call multiple times write_proc by calling fputs("123456789",fd). where fd is file descriptor to procfs entry in /proc directory. But I don't see write_proc called multiple time. Code is attached by here.
<code>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <sound/core.h>
#include <sound/initval.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/mm.h> /* mmap related stuff */
#define BUF_SIZE 64 * 1024
int *MmapBuffer;
int Factor = 1;
static int write_proc(struct file *filp, int *buf, size_t count, loff_t *offp)
{
int rc,i;
printk("in Write \n");
for (i = 1; i <= 16*1024 ; i++)
MmapBuffer[i-1] = (i+1)*Factor;
Factor++;
return count;
}
static int driver_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret;
vma->vm_flags |= VM_LOCKED|VM_SHARED;
ret = remap_pfn_range(vma, vma->vm_start,
virt_to_phys(MmapBuffer) >> PAGE_SHIFT,
vma->vm_end-vma->vm_start, vma->vm_page_prot);
if(ret != 0)
printk("MMAP Failed \n");
SetPageReserved(virt_to_page(MmapBuffer));
printk("MMAP Succeeded \n");
return 0;
}
// file operations
struct file_operations proc_fops =
{
.write = write_proc,
.mmap = driver_mmap,
};
// init module
int init_module_test(void)
{
printk("<1>Hello world\n");
MmapBuffer = kzalloc(BUF_SIZE,__GFP_COLD|GFP_DMA);
if(MmapBuffer == NULL)
printk("Kzalloc failed. reduce buffer size \n");
proc_create ("Test_fs",0,NULL, &proc_fops);
return 0;
}
// exit module
void cleanup_module_test(void)
{
kfree(MmapBuffer);
remove_proc_entry ("Test_fs", NULL);
printk("Goodbye world\n");
}
module_init(init_module_test);
module_exit(cleanup_module_test);
MODULE_LICENSE("GPL");
</code>
Application code
<code>
#include<stdio.h>
#include<stdlib.h>
#include<sys/mman.h>
#include<errno.h>
#include <fcntl.h>
int main(void)
{
int fd;
int i,j;
int *msg ;
printf("Allocation started \n ");
msg=(int*)malloc(64*1024);
if(msg == NULL)
printf("Allocation failed \n");
//unsigned int *addr;
printf("Starting opening \n ");
if((fd=open("/proc/Test_fs", O_RDONLY ))<0)
{
printf("File not opened ");
}
printf("Starting mapping \n ");
msg = mmap(NULL, 64*1024, PROT_READ, MAP_SHARED , fd, 0);
printf("done from module \n ");
if(msg == MAP_FAILED)
{
printf("MAP failed and error is %s", strerror(errno));
return 0;
}
close(fd);
printf("Successful mapping");
FILE *f;
f=fopen("/proc/Test_fs", "wr");
if(!f)
{
printf("File not opened ");
}
for (j = 0; j < 10 ; j++)
{
if(fputs("1234567890,",f) <= 0)
printf("write failed, ");
for (i = 0; i < 16*1024 ; i++)
printf("%d, ", msg[i]);
printf("\n \n done \n \n ");
}
fclose(f);
return 0;
}
</code>

Produser Consumer wthout semaphore

I am trying to solve producer consumer problem using threads without semaphoere.In my client i create 4 threads 2 for producer and 2 for consumer, each of them send M produce/consume messages. Here is my client code.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netinet/in.h>
#include <pthread.h>
#define BUFSIZE 4096
#define N 4
/*
** Client
*/
int M = 10;
pthread_t threads[N];
char buf[BUFSIZE];
char *service;
char *host = "localhost";
int cc;
int csock;
int consumed,produced;
void *connect_and_handle(void *msg){
/* Create the socket to the controller */
if ( ( csock = connectsock( host, service, "tcp" )) == 0 ) {
fprintf( stderr, "Cannot connect to server.\n" );
exit( -1 );
}
printf( "The server is ready, please start entering commands.\n" );
fflush( stdout );
// Start the loop
int k;
//char msg[50];
for (k=0;k<M;k++){
strcpy(buf, msg);
// Send to the server
if ( write( csock, buf, strlen(buf) ) < 0 ) {
fprintf( stderr, "client write failed: %s\n", strerror(errno) );
exit( -1 );
}
if ( (cc = read( csock, buf, BUFSIZE )) <= 0 )
break;
buf[cc] = 0;
printf( "Server replied: %s\n", buf );
}
close( csock );
// exit thread
pthread_exit(NULL);
}
int main( int argc, char *argv[] ){
char *msg, *msg2;
switch( argc ) {
case 2:
service = argv[1];
break;
case 3:
host = argv[1];
service = argv[2];
break;
default:
fprintf( stderr, "usage: chat [host] port\n" );
exit(-1);
}
// thread code goes here
int i, n = N;
for (i=0;i<N;i++){
msg = (char*)malloc(32*sizeof(char));
msg2 = (char*)malloc(32*sizeof(char));
sprintf(msg,"PRODUCE This is the item #%i", i);
sprintf(msg2, "CONSUME");
//producer thread
produced = pthread_create( &threads[i], NULL, connect_and_handle, (void *) msg );
if ( produced != 0 ) { printf( "Error: pthread_create returned code %d.\n", produced); exit( -1 );}
//consumer thread
/*
i++;
consumed = pthread_create( &threads[i], NULL, connect_and_handle, (void *) msg2 );
if ( consumed != 0 ){ printf( "Error: pthread_create returned code %d.\n", consumed ); exit( -1 );}
*/
}
}
and server :
// This server implements part of the 333 protocol
// NUMBER - number of clients served
// NAMES - developers
// GOODBYE - close connection
// ADD - increment
// SUBTRACT- decrement
//
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <netinet/in.h>
#include <semaphore.h>
#define QLEN 5
#define BUFSIZE 4096
#define MAX 100
/*
** This server ... is threaded
*/
// Function prototypes
int passivesock( char *, char *, int, int * );
void *handle_a_client( void *arg );
// Global variables are shared by the threads
int clients = 0;
char *buffer[MAX];
char *maloc_buf;
int count=0;
int full_count=0;
int empty_count=MAX;
int main( int argc, char *argv[] ) {
char *service;
struct sockaddr_in fsin;
int alen;
int msock;
int ssock;
int rport = 0;
switch (argc) {
case 1:
// No args? let the OS choose a port and tell the user
rport = 1;
break;
case 2:
// User provides a port? then use it
service = argv[1];
break;
default:
fprintf( stderr, "usage: server [port]\n" );
exit(-1);
}
msock = passivesock( service, "tcp", QLEN, &rport );
if (rport) {
// Tell the user the selected port
printf( "server: port %d\n", rport );
fflush( stdout );
}
// Keep accepting clients until you are killed
for (;;) {
int ssock;
pthread_t pid;
alen = sizeof(fsin);
ssock = accept( msock, (struct sockaddr *)&fsin, &alen );
if (ssock < 0) {
fprintf( stderr, "accept: %s\n", strerror(errno) );
exit(-1);
}
clients++;
printf("connected , %i", clients);
// Launch a thread to manage this client
// YES, pid is getting overwritten each time, but it is unused
pthread_create( &pid, NULL, handle_a_client, (void *) ssock );
}
}
void *handle_a_client( void *arg ) {
char requestbuf[BUFSIZE];
char replybuf[BUFSIZE];
int ssock = (int) arg;
int cc;
for (;;) {
if ( (cc = read( ssock, requestbuf, BUFSIZE )) <= 0 ) {
printf( "The client has gone.\n");
(void) close(ssock);
pthread_exit(0);
break;
}
else {
// Remove the newline and null-terminate the string
requestbuf[cc] = '\0';
int size = cc-7;
printf( "The client on %d says: %s\n", ssock, requestbuf );
if ( strncasecmp( requestbuf, "goodbye", 7 ) == 0 ) {
close( ssock );
break;
}
else if ( strncasecmp( requestbuf, "PRODUCE", 7 ) == 0 ) {
if (full_count == MAX){
strcpy(replybuf,"FULL\n");
write(ssock,replybuf,strlen(replybuf));
}
else {
maloc_buf=(char*) malloc((size)*sizeof(char));
strcpy(maloc_buf, (requestbuf+8));
buffer[full_count]=maloc_buf;
int num=full_count+1;
sprintf(replybuf, "Client produced item no%i: %s",full_count, buffer[full_count]);
full_count++;
empty_count--;
}
}
else if ( strncasecmp( requestbuf, "CONSUME", 7 ) == 0 ) {
if (empty_count == MAX) {
strcpy(replybuf,"EMPTY\n");
write( ssock, replybuf, strlen(replybuf) );
}
else {
sprintf(replybuf,"OK %s", buffer
[full_count]);
free(buffer[full_count]);
full_count--;
empty_count++;
}
}
}
}
}
When i run my server and then try to connect to it in client, nothing happens. Debugging showed (i am not sure) that in client code after
if ( ( csock = connectsock( host, service, "tcp" )) == 0 ) {
i am exiting, nothing is printed to console both in client and server.

Message queue/shared memory method

I have a bit of a problem in using IPC (inter-process communication) program below.
Please let me explain:
I want to pass Linux commands such as "ls" or "wc file.txt"
from a parent to a child to execute using the message queue, and
then have the child returning the command outputs back to
the parent process using shared memory method.
But this is what I got: The parent process always got the output 1 step behind;
in the following fashion:
Step1) ls file.txt
(Nothing showed up.)
Step2) wc file.txt
(Output of earlier command "ls file.txt" showed up here instead.)
Step 3) cat file.txt
(Output of earlier command "wc file.txt" showed up instead.)
Any help is appreciated.
To compile: gcc -o program ./program.c
To run: -./program -v
Code:
#define BUFSZ 512
#define ERRBUFSZ 512
#define TIMEOUT_TIMEDIO 20
#define SHM_SIZE 5120
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <setjmp.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/shm.h>
static sigjmp_buf jmpbuf;
int timed_io(char* buf, int len, FILE* rfp, int sec);
static void sigalrm_handler(int signo);
void do_cmd(char *buf, int len, int linenum, char *errbuf);
int parse_cmd(char *buf, char **vbuf, char *errbuf);
int process_cmd_ipc(char *argv, int linenum, char *errbuf);
struct my_msgbuf {
long mtype;
char mtext[256];
};
static void sigalrm_handler(int signo)
{
siglongjmp(jmpbuf, 1);
}
int timed_io(char* buf, int len, FILE* rfp, int sec)
{
struct sigaction nsigaction[1];
struct sigaction osigaction[1];
int prev_alrm;
int st = 0;
if(sigsetjmp(jmpbuf, 1) == 0)
{
nsigaction->sa_handler = sigalrm_handler;
sigemptyset(&nsigaction->sa_mask);
nsigaction->sa_flags = SA_RESTART;
prev_alrm = alarm(0);
sigaction(SIGALRM, nsigaction, osigaction);
alarm(sec);
if (fgets(buf, len, rfp) == NULL)
st = -1; // EOF
buf[strlen(buf) - 1] = 0;
}
else { st = -2; } // Time-out
alarm(0); // Reset old alarm and handler
sigaction(SIGALRM, osigaction, 0);
return st;
}
int process_cmd_ipc(char *argv, int linenum, char* errbuf)
{
struct my_msgbuf buf;
int msqid, msqid_parent, st, shmid, str_len;
key_t key, key_shm;
char* shared_buf;
FILE *fd;
// create key for shared memory segment
if ((key_shm = ftok("shm_key.txt", 'R')) == -1) {
perror("ftok");
exit(1);
}
// Connect to shared memory segment
if ((shmid = shmget(key_shm, SHM_SIZE, 0644 | IPC_CREAT)) == -1)
{
perror("shmget");
exit(1);
}
// Attach to shared memory segment
shared_buf = shmat(shmid, (void *) 0, 0);
if (shared_buf == (char *) (-1)) {
perror("shmat");
exit(1);
}
// End of shared memory section` //
// Begin: message queue section
pid_t cpid=fork();
if (cpid<0) {
fprintf(stderr,"ERR: \"fork\" error! (Line=%d)\n", linenum);
exit (-1);
} else if (cpid==0) // child process
{ // Begin: message queue
if ((key = ftok("mysh.c", 'B')) == -1) {
perror("ftok");
exit(1);
}
if ((msqid = msgget(key, 0644)) == -1) {
perror("msgget from child");
exit(1);
}
memset(buf.mtext, 0, sizeof(buf.mtext)); // Clear buffer
if(msgrcv(msqid, (struct msgbuf*) &buf, sizeof(buf), 0,0) == -1)
{
perror("msgrcv");
exit(1);
}
// End: message queue
// begin: shared memory segment
memset(shared_buf, 0, SHM_SIZE); // zeroize shared_buf
fd = popen(buf.mtext, "r");
str_len = 0;
while(fgets(shared_buf + str_len, SHM_SIZE, fd) != NULL)
{ str_len = strlen(shared_buf); }
pclose(fd);
// end: shared memory segment
}
else { // parent
// Begin - message queue
if ((key = ftok("mysh.c", 'B')) == -1) {
perror("ftok");
exit(1);
}
if ((msqid_parent = msgget(key, 0644 | IPC_CREAT)) == -1) {
perror("msgget from parent");
exit(1);
}
buf.mtype = 1;
strncpy(buf.mtext, argv, strlen(argv));
if(msgsnd(msqid_parent, (struct my_msgbuf*) &buf, strlen(buf.mtext), 0) == -1)
perror("msgsnd");
// End - message queue
// Begin - shared memory
// usleep(10000);
printf("%s", shared_buf);
// End - shared memory
} // if-else fork
}
int parse_cmd(char *buf, char **vbuf, char *errbuf)
{
int i=0;
char *delim=" ,\t\n";
char *tok;
tok=strtok(buf,delim);
while (tok) {
vbuf[i]=(char *)malloc(BUFSZ*sizeof(char));
strcpy(vbuf[i],tok);
tok=strtok(NULL,delim);
i++;
}
vbuf[i]=0;
return i;
}
void do_cmd(char *buf, int len, int linenum, char *errbuf) {
int i=0; int numargs;
char *vbuf[128];
char* copy = (char *) malloc(strlen(buf) + 1);
int maxargs=sizeof(vbuf)/sizeof(char *);
strcpy(copy, buf);
numargs = parse_cmd(copy,vbuf,errbuf);
process_cmd_ipc(buf,linenum, errbuf);
for (i=0;i<numargs; i++) { free(vbuf[i]); }
free(copy);
copy = NULL;
return;
}
int main(int argc, char **argv)
{
int i; int st; int linenum=0;
char *buf=(char *)malloc(BUFSZ*sizeof(char));
char *errbuf=(char *)malloc(ERRBUFSZ*sizeof(char));
char *mysh = "";
FILE *rfp=stdin;
if (isatty(fileno(rfp))) {
mysh = "mysh (Ctrl-C to exit)>";
fprintf(stderr,"%s",mysh);
}
while(1)
{
st = timed_io(buf, BUFSZ, stdin, TIMEOUT_TIMEDIO);
if (st != 0)
{
fprintf(stderr, "ERR: No input %s (Status=%d)\n", errbuf, st);
return -1;
}
else
{
linenum++;
if (*buf)
{ do_cmd(buf, BUFSZ, linenum,errbuf); }
if (mysh)
fprintf(stderr,"%s",mysh);
}
}
}

Why semaphore object is not initialized?

I'm learning to use semaphore object. But I can't initialize it.
A sem_init function always return value -1 rain or shine.
return value -1 indicates first argument is not valid pointer, say my reference.
But I can't find miss print in my code. I compiled my code in Xcode on OS X.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
void * thread_snd(void *arg);
void * thread_rcv(void* arg);
sem_t bin_sem;
int number = 0;
char thread1[] = "A thread";
char thread2[] = "B thread";
char thread3[] = "C thread";
int main(int argc, char** argv)
{
pthread_t t1, t2 ,t3;
void *thread_result;
int state;
state = sem_init(&bin_sem, 0, 0);
if(state != 0)
{
puts("fail to initialize semaphore");
exit(1);
}
pthread_create(&t1, NULL, thread_snd, &thread1);
pthread_create(&t2, NULL, thread_rcv, &thread2);
pthread_create(&t3, NULL, thread_rcv, &thread3);
pthread_join(t1, &thread_result);
pthread_join(t2, &thread_result);
pthread_join(t3, &thread_result);
printf("final number : %d \n", number);
sem_destroy(&bin_sem);
return 0;
}
void * thread_snd(void * arg)
{
int i;
for(i = 0 ; i < 4; i++)
{
while(number != 0)
sleep(1);
number++;
printf("execution : %s, number : %d \n", (char*) arg, number);
sem_post(&bin_sem);
}
}
void * thread_rcv(void* arg)
{
int i;
for(i = 0 ; i < 2; i++)
{
sem_wait(&bin_sem);
number--;
printf("execution : %s number : %d \n", (char*)arg, number);
}
}
On Mac OS X (10.6.8) there is no sem_init() and sem_destroy().
Use sem_open() and sem_unlink() instead.
/*
cat semaphore_test.c
source:
"Why semaphore object is not initialized?",
https://stackoverflow.com/questions/13834367/why-semaphore-object-is-not-initialized
compiled on Mac OS X 10.6.8 with:
gcc -ansi -pedantic -std=gnu99 -Os -Wall -Wextra -Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes \
-Wmissing-prototypes -Wformat=2 -Wreturn-type -Wunreachable-code -finline -l pthread -o semaphore_test semaphore_test.c
./semaphore_test
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
void * thread_snd(void *arg);
void * thread_rcv(void* arg);
//sem_t bin_sem;
static sem_t *bin_sem;
static const char *semname = "Semaphore";
static int number = 0;
char thread1[] = "A thread";
char thread2[] = "B thread";
char thread3[] = "C thread";
int main(void)
{
pthread_t t1, t2 ,t3;
void *thread_result;
int state;
/*
state = sem_init(&bin_sem, 0, 0);
if(state != 0)
{
puts("fail to initialize semaphore");
exit(1);
}
*/
bin_sem = sem_open(semname, O_CREAT, 0777, 0);
if (bin_sem == SEM_FAILED)
{
fprintf(stderr, "%s\n", "ERROR creating semaphore semname");
exit(EXIT_FAILURE);
}
pthread_create(&t1, NULL, thread_snd, &thread1);
pthread_create(&t2, NULL, thread_rcv, &thread2);
pthread_create(&t3, NULL, thread_rcv, &thread3);
pthread_join(t1, &thread_result);
pthread_join(t2, &thread_result);
pthread_join(t3, &thread_result);
printf("final number : %d \n", number);
//sem_destroy(&bin_sem);
sem_unlink(semname);
return 0;
}
void * thread_snd(void * arg)
{
int i;
for(i = 0 ; i < 4; i++)
{
while(number != 0)
sleep(1);
number++;
printf("snd execution : %s, number : %d \n", (char*) arg, number);
//sem_post(&bin_sem);
sem_post(bin_sem);
}
}
void * thread_rcv(void* arg)
{
int i;
for(i = 0 ; i < 2; i++)
{
//sem_wait(&bin_sem);
sem_wait(bin_sem);
number--;
printf("rcv execution : %s number : %d \n", (char*)arg, number);
}
}
See also:
sem_init on OS X
Program using Semaphores runs fine on Linux... unexpected results on Mac OS X

Resources