Shared memory across processes on Linux/x86_64 - linux

I have a few questions on using shared memory with processes. I looked at several previous posts and couldn't glean the answers precisely enough. Thanks in advance for your help.
I'm using shm_open + mmap like below. This code works as intended with parent and child alternating to increment g_shared->count (the synchronization is not portable; it works only for certain memory models, but good enough for my case for now). However, when I change MAP_SHARED to MAP_ANONYMOUS | MAP_SHARED, the memory isn't shared and the program hangs since the 'flag' doesn't get flipped. Removing the flag confirms what's happening with each process counting from 0 to 10 (implying that each has its own copy of the structure and hence the 'count' field). Is this the expected behavior? I don't want the memory to be backed by a file; I really want to emulate what might happen if these were threads instead of processes (they need to be processes for other reasons).
Do I really need shm_open? Since the processes belong to the same hierarchy, can I just use mmap alone instead? I understand this would be fairly straightforward if there wasn't an 'exec,' but how do I get it to work when there is an 'exec' following the 'fork?'
I'm using kernel version 3.2.0-23 on x86_64 (Intel i7-2600). For this implementation, does mmap give the same behavior (correctness as well as performance) as shared memory with pthreads sharing the same global object? For example, does the MMU map the segment with 'cacheable' MTRR/TLB attributes?
Is the cleanup_shared() code correct? Is it leaking any memory? How could I check? For example, is there an equivalent of System V's 'ipcs?'
thanks,
/Doobs
shmem.h:
#ifndef __SHMEM_H__
#define __SHMEM_H__
//includes
#define LEN 1000
#define ITERS 10
#define SHM_FNAME "/myshm"
typedef struct shmem_obj {
int count;
char buff[LEN];
volatile int flag;
} shmem_t;
extern shmem_t* g_shared;
extern char proc_name[100];
extern int fd;
void cleanup_shared() {
munmap(g_shared, sizeof(shmem_t));
close(fd);
shm_unlink(SHM_FNAME);
}
static inline
void init_shared() {
int oflag;
if (!strcmp(proc_name, "parent")) {
oflag = O_CREAT | O_RDWR;
} else {
oflag = O_RDWR;
}
fd = shm_open(SHM_FNAME, oflag, (S_IREAD | S_IWRITE));
if (fd == -1) {
perror("shm_open");
exit(EXIT_FAILURE);
}
if (ftruncate(fd, sizeof(shmem_t)) == -1) {
perror("ftruncate");
shm_unlink(SHM_FNAME);
exit(EXIT_FAILURE);
}
g_shared = mmap(NULL, sizeof(shmem_t),
(PROT_WRITE | PROT_READ),
MAP_SHARED, fd, 0);
if (g_shared == MAP_FAILED) {
perror("mmap");
cleanup_shared();
exit(EXIT_FAILURE);
}
}
static inline
void proc_write(const char* s) {
fprintf(stderr, "[%s] %s\n", proc_name, s);
}
#endif // __SHMEM_H__
shmem1.c (parent process):
#include "shmem.h"
int fd;
shmem_t* g_shared;
char proc_name[100];
void work() {
int i;
for (i = 0; i &lt ITERS; ++i) {
while (g_shared->flag);
++g_shared->count;
sprintf(g_shared->buff, "%s: %d", proc_name, g_shared->count);
proc_write(g_shared->buff);
g_shared->flag = !g_shared->flag;
}
}
int main(int argc, char* argv[], char* envp[]) {
int status, child;
strcpy(proc_name, "parent");
init_shared(argv);
fprintf(stderr, "Map address is: %p\n", g_shared);
if (child = fork()) {
work();
waitpid(child, &status, 0);
cleanup_shared();
fprintf(stderr, "Parent finished!\n");
} else { /* child executes shmem2 */
execvpe("./shmem2", argv + 2, envp);
}
}
shmem2.c (child process):
#include "shmem.h"
int fd;
shmem_t* g_shared;
char proc_name[100];
void work() {
int i;
for (i = 0; i &lt ITERS; ++i) {
while (!g_shared->flag);
++g_shared->count;
sprintf(g_shared->buff, "%s: %d", proc_name, g_shared->count);
proc_write(g_shared->buff);
g_shared->flag = !g_shared->flag;
}
}
int main(int argc, char* argv[], char* envp[]) {
int status;
strcpy(proc_name, "child");
init_shared(argv);
fprintf(stderr, "Map address is: %p\n", g_shared);
work();
cleanup_shared();
return 0;
}

Passing MAP_ANONYMOUS causes the kernel to ignore your file descriptor argument and give you a private mapping instead. That's not what you want.
Yes, you can create an anonymous shared mapping in a parent process, fork, and have the child process inherit the mapping, sharing the memory with the parent and any other children. That obvoiusly doesn't survive an exec() though.
I don't understand this question; pthreads doesn't allocate memory. The cacheability will depend on the file descriptor you mapped. If it's a disk file or anonymous mapping, then it's cacheable memory. If it's a video framebuffer device, it's probably not.
That's the right way to call munmap(), but I didn't verify the logic beyond that. All processes need to unmap, only one should call unlink.

2b) as a middle-ground of a sort, it is possible to call:
int const shm_fd = shm_open(fn,...);
shm_unlink(fn);
in a parent process, and then pass fd to a child process created by fork()/execve() via argp or envp. since open file descriptors of this type will survive the fork()/execve(), you can mmap the fd in both the parent process and any dervied processes. here's a more complete code example copied and simplified/sanitized from code i ran successfully under Ubuntu 12.04 / linux kernel 3.13 / glibc 2.15:
int create_shm_fd( void ) {
int oflags = O_RDWR | O_CREAT | O_TRUNC;
string const fn = "/some_shm_fn_maybe_with_pid";
int fd;
neg_one_fail( fd = shm_open( fn.c_str(), oflags, S_IRUSR | S_IWUSR ), "shm_open" );
if( fd == -1 ) { rt_err( strprintf( "shm_open() failed with errno=%s", str(errno).c_str() ) ); }
// for now, we'll just pass the open fd to our child process, so
// we don't need the file/name/link anymore, and by unlinking it
// here we can try to minimize the chance / amount of OS-level shm
// leakage.
neg_one_fail( shm_unlink( fn.c_str() ), "shm_unlink" );
// by default, the fd returned from shm_open() has FD_CLOEXEC
// set. it seems okay to remove it so that it will stay open
// across execve.
int fd_flags = 0;
neg_one_fail( fd_flags = fcntl( fd, F_GETFD ), "fcntl" );
fd_flags &= ~FD_CLOEXEC;
neg_one_fail( fcntl( fd, F_SETFD, fd_flags ), "fcntl" );
// resize the shm segment for later mapping via mmap()
neg_one_fail( ftruncate( fd, 1024*1024*4 ), "ftruncate" );
return fd;
}
it's not 100% clear to me if it's okay spec-wise to remove the FD_CLOEXEC and/or assume that after doing so the fd really will survive the exec. the man page for exec is unclear; it says: "POSIX shared memory regions are unmapped", but to me that's redundant with the general comments earlier that mapping are not preserved, and doesn't say that shm_open()'d fd will be closed. any of course there's the fact that, as i mentioned, the code does seem to work in at least one case.
the reason i might use this approach is that it would seem to reduce the chance of leaking the shared memory segment / filename, and it makes it clear that i don't need persistence of the memory segment.

Related

Is it possible to update the flags of an existing memory mapping?

I am writing a small program in x64 assembly that will spawn children, all sharing their memory mappings so they can modify each other's code. For this, since the argument CLONE_VM of sys_clone seems to place the program into undefined behaviour, I plan to use mmap's MAP_SHARED argument.
However, I would also need the children to modify the code of the father. One option is to also allocate a MAP_SHARED mapping and give it to the father, but I'd like to avoid this if possible (only for elegance reasons).
Since the base mapping (the 0x400000 one on 64-bits Linux) of the program will not have the MAP_SHARED flag by default, is it possible to update it using a syscall to set this flag? munmap then mmap will not do and cause a SIGSEGV, and mprotect can only change the RWX permissions.
You can't change whether an existing mapping is private or shared, but you can map a new shared mapping over an existing private mapping. You can even do so in C, like this:
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
int main(void) {
FILE *stream = fopen("/proc/self/maps", "rb");
if(!stream) {
perror("fopen");
return 1;
}
char *text_start, *text_end;
do {
if(fscanf(stream, " %p-%p%*[^\n]", &text_start, &text_end) != 2) {
perror("scanf");
return 1;
}
} while(!(text_start <= main && main < text_end));
if(fclose(stream)) {
perror("fclose");
return 1;
}
size_t text_len = text_end - text_start;
char *mem = mmap(NULL, text_len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
if(mem == MAP_FAILED) {
perror("mmap");
return 1;
}
memcpy(mem, text_start, text_len);
__builtin___clear_cache(mem, mem + text_len);
if(mremap(mem, text_len, text_len, MREMAP_MAYMOVE|MREMAP_FIXED, text_start) == MAP_FAILED) {
perror("mremap");
return 1;
}
/* you can check /proc/PID/maps now to see the new mapping */
getchar();
}
As a bonus, this program supports ASLR and doesn't require that the text section start at 0x400000.

How to handle more than one SIGSEGV occurrence in linux?

I have written a program to scan kernel memory for a pattern from user space. I run it from root. I expect that it will generate SIGSEGVs when it hits pages that aren't accessible; I would like to ignore those faults and just jump to the next page to continue the search. I have set up a signal handler that works fine for the first occurrence, and it continues onward as expected. However, when a second SIGSEGV occurs, the handler is ignored (it was reregistered after the first occurrence) and the program terminates. The relevant portions of the code are:
jmp_buf restore_point;
void segv_handler(int sig, siginfo_t* info, void* ucontext)
{
longjmp(restore_point, SIGSEGV);
}
void setup_segv_handler()
{
struct sigaction sa;
sa.sa_flags = SA_SIGINFO|SA_RESTART|SA_RESETHAND;
sigemptyset (&sa.sa_mask);
sa.sa_sigaction = &segv_handler;
if (sigaction(SIGSEGV, &sa, NULL) == -1) {
fprintf(stderr, "failed to setup SIGSEGV handler\n");
}
}
unsigned long search_kernel_memory_area(unsigned long start_address, size_t area_len, const void* pattern, size_t pattern_len)
{
int fd;
char* kernel_mem;
fd = open("/dev/kmem", O_RDONLY);
if (fd < 0)
{
perror("open /dev/kmem failed");
return -1;
}
unsigned long page_size = sysconf(_SC_PAGESIZE);
unsigned long page_aligned_offset = (start_address/page_size)*page_size;
unsigned long area_pages = area_len/page_size + (area_len%page_size ? 1 : 0);
kernel_mem =
mmap(0, area_pages,
PROT_READ, MAP_SHARED,
fd, page_aligned_offset);
if (kernel_mem == MAP_FAILED)
{
perror("mmap failed");
return -1;
}
if (!mlock((const void*)kernel_mem,area_len))
{
perror("mlock failed");
return -1;
}
unsigned long offset_into_page = start_address-page_aligned_offset;
unsigned long start_area_address = (unsigned long)kernel_mem + offset_into_page;
unsigned long end_area_address = start_area_address+area_len-pattern_len+1;
unsigned long addr;
setup_segv_handler();
for (addr = start_area_address; addr < end_area_address;addr++)
{
unsigned char* kmp = (unsigned char*)addr;
unsigned char* pmp = (unsigned char*)pattern;
size_t index = 0;
for (index = 0; index < pattern_len; index++)
{
if (setjmp(restore_point) == 0)
{
unsigned char p = *pmp;
unsigned char k = *kmp;
if (k != p)
{
break;
}
pmp++;
kmp++;
}
else
{
addr += page_size -1;
setup_segv_handler();
break;
}
}
if (index >= pattern_len)
{
return addr;
}
}
munmap(kernel_mem,area_pages);
close(fd);
return 0;
}
I realize I can use functions like memcmp to avoid programming the matching part directly (I did this initially), but I subsequently wanted to insure the finest grained control for recovering from the faults so I could see exactly what was happening.
I scoured the Internet to find information about this behavior, and came up empty. The linux system I am running this under is arm 3.12.30.
If what I am trying to do is not possible under linux, is there some way I can get the current state of the kernel pages from user space (which would allow me to avoid trying to search pages that are inaccessible.) I searched for calls that might provide such information, but also came up empty.
Thanks for your help!
While longjmp is perfectly allowed to be used in the signal handler (the function is known as async-signal-safe, see man signal-safety) and effectively exits from the signal handling, it doesn't restore signal mask. The mask is automatically modified at the time when signal handler is called to block new SIGSEGV signal to interrupt the handler.
While one may restore signal mask manually, it is better (and simpler) to use siglongjmp function instead: aside from the effect of longjmp, it also restores the signal mask. Of course, in that case sigsetjmp function should be used instead of setjmp:
// ... in main() function
if(sigsetjmp(restore_point, 1)) // Aside from other things, store signal mask
// ...
// ... in the signal handler
siglongjmp(restore_point); // Also restore signal mask as it was at sigsetjmp() call

IOCTL Method - Linux

I have an exam question and I can't quite see how to solve it.
A driver that needs the ioctl method to be implemented and tested.
I have to write the ioctl() method, the associated test program as well as the common IOCTL definitions.
The ioctl() method should only handle one command. In this command, I need to transmit a data structure from user space to kernel space.
Below is the structure shown:
struct data
{
     char label [10];
     int value;
}
The driver must print the IOCTL command data, using printk();
Device name is "/dev/mydevice"
The test program must validate driver mode using an initialized data structure.
Hope there are some that can help
thanks in advance
My suggestion:
static int f_on_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
unsigned long arg)
{
int ret;
switch (cmd)
{
case PASS_STRUCT:
struct data pass_data;
ret = copy_from_user(&pass_data, arg, sizeof(*pass_data));
if(ret < 0)
{
printk("PASS_STRUCT\n");
return -1;
}
printk(KERN ALERT "Message PASS_STRUCT : %d and %c\n",pass_data.value, pass_data.label);
break;
default:
return ENOTTY;
}
return 0;
}
Definitions:
Common.h
#define SYSLED_IOC_MAGIC 'k'
#define PASS_STRUCT _IOW(SYSLED_IOC_MAGIC, 1, struct data)
The test program:
int main()
{
int fd = open("/dev/mydevice", O_RDWR);
data data_pass;
data_pass.value = 2;
data_pass.label = "hej";
ioctl(fd, PASS_STRUCT, &data_pass);
close(fd);
return 0;
}
Is this completely wrong??

Direct access to linux framebuffer - copyarea

I want to move very quickly a rectangle over a framebuffer in an embedded linux application. I have found that the function cfb_copyarea may be useful. But I cannot find any ioctl over the /dev/fb device to call the function. Or can this function be called directly?
Here is a code to init and close FrameBuffer
class CFrameBuffer
{
void* m_FrameBuffer;
struct fb_fix_screeninfo m_FixInfo;
struct fb_var_screeninfo m_VarInfo;
int m_FBFD;
int InitFB()
{
int iFrameBufferSize;
/* Open the framebuffer device in read write */
m_FBFD = open(FB_NAME, O_RDWR);
if (m_FBFD < 0) {
printf("Unable to open %s.\n", FB_NAME);
return 1;
}
/* Do Ioctl. Retrieve fixed screen info. */
if (ioctl(m_FBFD, FBIOGET_FSCREENINFO, &m_FixInfo) < 0) {
printf("get fixed screen info failed: %s\n",
strerror(errno));
close(m_FBFD);
return 1;
}
/* Do Ioctl. Get the variable screen info. */
if (ioctl(m_FBFD, FBIOGET_VSCREENINFO, &m_VarInfo) < 0) {
printf("Unable to retrieve variable screen info: %s\n",
strerror(errno));
close(m_FBFD);
return 1;
}
/* Calculate the size to mmap */
iFrameBufferSize = m_FixInfo.line_length * m_VarInfo.yres;
printf("Line length %d\n", m_FixInfo.line_length);
/* Now mmap the framebuffer. */
m_FrameBuffer = mmap(NULL, iFrameBufferSize, PROT_READ | PROT_WRITE,
MAP_SHARED, m_FBFD,0);
if (m_FrameBuffer == NULL) {
printf("mmap failed:\n");
close(m_FBFD);
return 1;
}
return 0;
}
void CloseFB()
{
munmap(m_FrameBuffer,0);
close(m_FBFD);
}
};
Note that this code is not entirely correct, although it will work on many linux devices, on some it won't. To calculate the framebuffer size, do not do this:
iFrameBufferSize = m_FixInfo.line_length * m_VarInfo.yres;
Instead, do this:
iFrameBufferSize = m_FixInfo.smem_len;
And your code will be more portable.
As far as I know after a few days of research, there is no ioctl for invoking this function. I have to write my own system call preferrably in a kernel module. Or copy the algorithm the from kernel source and use it in the user space via nmaped memory.

msemaphore on linux?

AIX (and HPUX if anyone cares) have a nice little feature called msemaphores that make it easy to synchronize granular pieces (e.g. records) of memory-mapped files shared by multiple processes. Is anyone aware of something comparable in linux?
To be clear, the msemaphore functions are described by following the related links here.
POSIX semaphores can be placed in memory shared between processes, if the second argument to sem_init(3), "pshared", is true. This seems to be the same as what msem does.
#include <semaphore.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
int main() {
void *shared;
sem_t *sem;
int counter, *data;
pid_t pid;
srand(time(NULL));
shared = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
sem_init(sem = shared, 1, 1);
data = shared + sizeof(sem_t);
counter = *data = 0;
pid = fork();
while (1) {
sem_wait(sem);
if (pid)
printf("ping>%d %d\n", data[0] = rand(), data[1] = rand());
else if (counter != data[0]) {
printf("pong<%d", counter = data[0]);
sleep(2);
printf(" %d\n", data[1]);
}
sem_post(sem);
if (pid) sleep(1);
}
}
This is a pretty dumb test, but it works:
$ cc -o test -lrt test.c
$ ./test
ping>2098529942 315244699
pong<2098529942 315244699
pong<1195826161 424832009
ping>1195826161 424832009
pong<1858302907 1740879454
ping>1858302907 1740879454
ping>568318608 566229809
pong<568318608 566229809
ping>1469118213 999421338
pong<1469118213 999421338
ping>1247594672 1837310825
pong<1247594672 1837310825
ping>478016018 1861977274
pong<478016018 1861977274
ping>1022490459 935101133
pong<1022490459 935101133
...
Because the semaphore is shared between the two processes, the pongs don't get interleaved data from the pings despite the sleeps.
This can be done using POSIX shared-memory mutexes:
pthread_mutexattr_t attr;
int pshared = PTHREAD_PROCESS_SHARED;
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, &pshared);
pthread_mutex_init(&some_shared_mmap_structure.mutex, &attr);
pthread_mutexattr_destroy(&attr);
Now you can unlock and lock &some_shared_mmap_structure.mutex using ordinary pthread_mutex_lock() etc calls, from multiple processes that have it mapped.
Indeed, you can even implement the msem API in terms of this: (untested)
struct msemaphore {
pthread_mutex_t mut;
};
#define MSEM_LOCKED 1
#define MSEM_UNLOCKED 0
#define MSEM_IF_NOWAIT 1
msemaphore *msem_init(msemaphore *msem_p, int initialvalue) {
pthread_mutex_attr_t attr;
int pshared = PTHREAD_PROCESS_SHARED;
assert((unsigned long)msem_p & 7 == 0); // check alignment
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, &pshared); // might fail, you should probably check
pthread_mutex_init(&msem_p->mut, &attr); // never fails
pthread_mutexattr_destroy(&attr);
if (initialvalue)
pthread_mutex_lock(&attr);
return msem_p;
}
int msem_remove(msemaphore *msem) {
return pthread_mutex_destroy(&msem->mut) ? -1 : 0;
}
int msem_lock(msemaphore *msem, int cond) {
int ret;
if (cond == MSEM_IF_NOWAIT)
ret = pthread_mutex_trylock(&msem->mut);
else
ret = pthread_mutex_lock(&msem->mut);
return ret ? -1 : 0;
}
int msem_unlock(msemaphore *msem, int cond) {
// pthreads does not allow us to directly ascertain whether there are
// waiters. However, a unlock/trylock with no contention is -very- fast
// using linux's pthreads implementation, so just do that instead if
// you care.
//
// nb, only fails if the mutex is not initialized
return pthread_mutex_unlock(&msem->mut) ? -1 : 0;
}
Under Linux, you may be able to achieve what you want with SysV shared memory; quick googling turned up this (rather old) guide that may be of help.

Resources