Thread management in a cloned process with CLONE_VM flag set

Thread management in a cloned process with CLONE_VM flag set - linux

I need help with thread management inside cloned process.
The point is that I need to create and destroy threads inside a cloned process asynchronously (PTHREAD_CANCEL_ASYNCHRONOUS is set). Everything is fine if CLONE_VM is not set, but I need it because main and child process must share the same address space.
Below is an example that demonstrate the question. Being compiled with CLONE_VM flag the program hangs at pthread_join while without CLONE_VM everything is fine.
Are there any ways to deal with this problem?
P.S. Tried at Debian GNU/Linux versions 7,8 and unstable (kernels 3.2, 3.16, 4.0).
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
#include <string.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
/* Compile with # gcc -o pthread_test -lpthread pthread_test.c */
#define CLONE 1
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
void *thread_test(void *unused)
{
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
pthread_mutex_unlock(&mutex);
pause();
return NULL;
}
int test(void *unused)
{
void *status;
int r;
pthread_t thread;
pthread_mutex_lock(&mutex);
if ((r = pthread_create(&thread, NULL, thread_test, NULL)) != 0) {
printf("pthread_create returned %d\n", r);
exit(1);
}
pthread_mutex_lock(&mutex);
printf("cancelling thread\n");
r = pthread_cancel(thread);
printf("pthread cancel returned : %s(%d)\n", strerror(r), r);
printf("joining thread\n");
r = pthread_join(thread, &status);
printf("pthread_join returned : %s\n", strerror(r));
if (status == PTHREAD_CANCELED)
printf("child was canceled\n");
else
printf("child exit status is %u\n", (unsigned)status);
return 0;
}
int main(void)
{
#if CLONE
void *stack = (void *)malloc(16384);
if (clone(test, stack + 16383, CLONE_VM | CLONE_FS | CLONE_FILES, NULL) < 0) {
printf("clone failed\n");
exit(1);
}
waitpid(0, NULL, __WCLONE);
return 0;
#else
void *stack = (void *)malloc(16384);
if (clone(test, stack + 16383, /*CLONE_VM |*/ CLONE_FS | CLONE_FILES, NULL) < 0) {
printf("clone failed\n");
exit(1);
}
waitpid(0, NULL, __WCLONE);
return 0;
#endif
}

Related

robust_list not calling FUTEX_WAKE

The Linux robust_list mechanism is a tool used by robust mutexes to support automatic unlocking in the event that the lock owner fails to unlock before terminating, maybe due to unexpected death. According to man set_robust_list:
The purpose of the robust futex list is to ensure that if a thread accidentally fails to unlock a futex before terminating or calling execve(2), another thread that is waiting on that futex is notified that the former owner of the futex has died. This notification consists of two pieces: the FUTEX_OWNER_DIED bit is set in the futex word, and the kernel performs a futex(2) FUTEX_WAKE operation on one of the threads waiting on the futex.
This is not the behavior I'm seeing.
I'm seeing the futex replaced with FUTEX_OWNER_DIED, not ored with.
And I'm not getting the FUTEX_WAKE call.
#include <chrono>
#include <thread>
#include <linux/futex.h>
#include <stdint.h>
#include <stdio.h>
#include <syscall.h>
#include <unistd.h>
using ftx_t = uint32_t;
struct mtx_t {
mtx_t* next;
mtx_t* prev;
ftx_t ftx;
};
thread_local robust_list_head robust_head;
void robust_init() {
robust_head.list.next = &robust_head.list;
robust_head.futex_offset = offsetof(mtx_t, ftx);
robust_head.list_op_pending = NULL;
syscall(SYS_set_robust_list, &robust_head.list, sizeof(robust_head));
}
void robust_op_start(mtx_t* mtx) {
robust_head.list_op_pending = (robust_list*)mtx;
__sync_synchronize();
}
void robust_op_end() {
__sync_synchronize();
robust_head.list_op_pending = NULL;
}
void robust_op_add(mtx_t* mtx) {
mtx_t* old_first = (mtx_t*)robust_head.list.next;
mtx->prev = (mtx_t*)&robust_head;
mtx->next = old_first;
__sync_synchronize();
robust_head.list.next = (robust_list*)mtx;
if (old_first != (mtx_t*)&robust_head) {
old_first->prev = mtx;
}
}
int futex(ftx_t* uaddr,
int futex_op,
int val,
uintptr_t timeout_or_val2,
ftx_t* uaddr2,
int val3) {
return syscall(SYS_futex, uaddr, futex_op, val, timeout_or_val2, uaddr2, val3);
}
int ftx_wait(ftx_t* ftx, int confirm_val) {
return futex(ftx, FUTEX_WAIT, confirm_val, 0, NULL, 0);
}
int main() {
mtx_t mtx = {0};
std::thread t0{[&]() {
fprintf(stderr, "t0 start\n");
ftx_wait(&mtx.ftx, 0);
fprintf(stderr, "t0 done\n");
}};
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::thread t1{[&]() {
fprintf(stderr, "t1 start\n");
robust_init();
robust_op_start(&mtx);
__sync_bool_compare_and_swap(&mtx.ftx, 0, syscall(SYS_gettid));
robust_op_add(&mtx);
robust_op_end();
fprintf(stderr, "t1 ftx: %x\n", mtx.ftx);
fprintf(stderr, "t1 done\n");
}};
t1.join();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
fprintf(stderr, "ftx: %x\n", mtx.ftx);
t0.join();
}
Running
g++ -o ./example ~/example.cpp -lpthread && ./example
prints something like:
t0 start
t1 start
t1 ftx: 12ea65
t1 done
ftx: 40000000
and hangs.
I would expect the final value of the futex to be 4012ea65 and for thread 0 to unblock after thread 1 completes.

Ptrace prevents signal from interrupting pselect() in traced process

I'm trying to monitor syscalls for a binary using ptrace. The binary sleeps in pselect() and without ptrace, a SIGQUIT makes it return from pselect. The mask of blocked signals passed to pselect includes SIGQUIT.
When executed with ptrace, it exits from sys_pselect6 but not all the way out of glibc's pselect. What am I doing that prevents sys_pselect6 from exiting out to user code ?
Tracer:
#include <stdio.h>
#include <sys/ptrace.h>
#include <sys/reg.h>
#include <err.h>
#include <wait.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
int pid = fork(), sys_in = 1, status;
if (pid == 0) {
if (ptrace(PTRACE_TRACEME, getppid(), NULL, NULL) < 0)
err(1, "TRACEME()");
execl("./child", "./child", NULL);
err(1, "execl()");
}
if (waitpid(pid, &status, 0) != pid) err(1, "wait()");
for (;; sys_in ^= 1) {
if (ptrace(PTRACE_SYSCALL, pid, NULL, NULL) < 0) err(1, "SYSCALL");
if (waitpid(pid, &status, 0) != pid) err(1, "wait()");
if (sys_in) {
long long sys_no = ptrace(PTRACE_PEEKUSER, pid, 8 * ORIG_RAX, NULL);
printf("syscall entry %lld\n", sys_no);
}
else printf("syscall exit\n");
}
return 0;
}
Child:
#include <stdio.h>
#include <sys/select.h>
#include <signal.h>
#include <err.h>
void handle_sigquit(int sig, siginfo_t* info, void *ctx)
{
}
int main()
{
sigset_t mask;
sigset_t orig_mask;
struct sigaction sa = {};
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = handle_sigquit;
sigaction(SIGQUIT, &sa, NULL);
sigemptyset(&mask);
sigaddset(&mask, SIGQUIT);
if (sigprocmask(SIG_BLOCK, &mask, &orig_mask) < 0) err(1, "sigprocmask()");
pselect(0, NULL, NULL, NULL, NULL, &orig_mask);
warn("pselect()");
return 0;
}

ptrace(PTRACE_SYSCALL, pid, NULL, NULL)
Whenever your debugger gets a notification, you just assume that that notification is about a system call, and handle it accordingly. That is not the case.
Some of the notifications you receive using wait are for signals that your debugee has received. When those happen, the last NULL in your PTRACE_SYSCALL call eliminates, effectively masks, the signal from arriving at the debugee process.
When processing ptrace results, you need to check the signal that caused your debugger to wake up. At the very least, check if it's a SIGTRAP or something else. If it is something else, the best bet is to pass it on to the debugee process.
Check out this small program to see a simple way of doing it.

trying to use pipe(2) with the sort unix tool but not working

I have been struggling to find what I'm doing wrong and I can't seem to find the issue. When I compile the code below, I get an I/O error.
e.g: /usr/bin/sort: read failed: -: Input/output error
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
int main(int argc, char **argv, char **envp)
{
int fd[2];
pid_t pid;
pipe(fd);
pid = fork();
if (pid == -1) {
exit(EXIT_FAILURE);
}
if (pid == 0) { /* child */
char *exe[]= { "/usr/bin/sort", NULL };
close(fd[0]);
execve("/usr/bin/sort", exe, envp);
}
else {
char *a[] = { "zilda", "andrew", "bartholomeu", NULL };
int i;
close(fd[1]);
for (i = 0; a[i]; i++)
printf("%s\n", a[i]);
}
return 0;
}

dup2(fd[0], 0) in the child.
dup2(fd[1], 1) in the parent.
close the other fd.

pclose() returns SIGPIPE intermittently

When the following C program is executed, and SIGUSR1 is sent to the running process repeatedly, the pclose() call will sometimes return 13. 13 corresponds to SIGPIPE on my system.
Why does this happen?
I am using while true; do kill -SIGUSR1 <process-id>; done to send SIGUSR1 to the program. The program is executed on Ubuntu 14.04.
#include <pthread.h>
#include <signal.h>
#include <unistd.h>
#include <stdio.h>
void handler(int i) {}
void* task(void*)
{
FILE *s;
char b [BUFSIZ];
while (1) {
if ((s = popen("echo hello", "r")) == NULL) {
printf("popen() failed\n");
}
while (fgets(b, BUFSIZ, s) != NULL) ;
if (int r = pclose(s)) {
printf("pclose() failed (%d)\n", r);
}
}
return 0;
}
int main(int argc, char **argv)
{
struct sigaction action;
action.sa_handler = handler;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
sigaction(SIGUSR1, &action, NULL);
pthread_t tid;
pthread_create(&tid, 0, task, NULL);
pthread_join(tid, NULL);
}

This happens when fgets gets interrupted by the signal. The program doesn't read the pipe to the end and closes it. The other program then SIGPIPEs.
The correct pipe reading operation is:
do {
while (fgets(b, BUFSIZ, s) != NULL) ;
} while (errno == EINTR);

How to read the valgrind return value from child processes?

i am running valgrind in a bash script to use it for automated testing. I already added the option to return an exit code on error and to trace children.
/usr/bin/valgrind --error-exitcode=1 --trace-children=yes ./test_prog
My programm forks other processes and I can see the output of valgrind running the different processes in the terminal. The problem is, that the exit code option only seems to work when there is an error in the parent process. Because even though there is an error (SIGSEGV) in one of the child processes the exit code of valgrind is still 0, which means it is useless for the automated testing of several processes.
So is there any option, that would make the parent valgrind catch the error in the child and return it? I already looked into the man page. Maybe there would be another solution to this problem, like grepping the output of the children to the terminal for any error messages?
Thanks in advance.

it's important to implement a proper error handling in the code. Compare following two pieces of code.
A:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <signal.h>
#define BUFSIZE 1024
int incr=0;
int loop=1;
void runTicks(const char *n) {
time_t t;
char buf[BUFSIZE+1];
pid_t pid;
int counter;
pid=getpid();
counter=0;
while(loop) {
sleep(1);
t=time(NULL);
strftime(buf,BUFSIZE,"%Y.%m.%d %H:%M:%S",localtime(&t));
printf("%s[%d] %s\n",n,pid,buf);
counter+=incr;
if(counter>5) memcpy((void *)1,buf,1); /* this line is for causing SEGV */
}
}
void handler(int s) {
if(s==SIGCHLD) {
printf("Received SIGCHLD\n");
loop=0;
}
}
void setHandler() {
struct sigaction sa;
sa.sa_handler=handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags=SA_NOCLDSTOP;
if(sigaction(SIGCHLD,&sa,NULL)!=0) {
printf("Cannot set signal handler, there is no purpose in running the test\n");
exit(0);
}
}
int main() {
pid_t pid;
printf("start\n");
pid=fork();
if(pid==-1) {
printf("fork failed\n");
exit(10);
}
if(pid==0) {
printf("child\n");
incr=1;
usleep(500000);
runTicks("C");
exit(1);
} else {
printf("parent spawned child pid=%d\n",pid);
setHandler();
runTicks("P");
exit(0);
}
}
B:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <signal.h>
#include <sys/wait.h>
#define BUFSIZE 1024
int incr=0;
int loop=1;
void runTicks(const char *n) {
time_t t;
char buf[BUFSIZE+1];
pid_t pid;
int counter;
pid=getpid();
counter=0;
while(loop) {
sleep(1);
t=time(NULL);
strftime(buf,BUFSIZE,"%Y.%m.%d %H:%M:%S",localtime(&t));
printf("%s[%d] %s\n",n,pid,buf);
counter+=incr;
if(counter>5) memcpy((void *)1,buf,1); /* this line is for causing SEGV */
}
}
void handler(int s) {
if(s==SIGCHLD) {
int status;
printf("Received SIGCHLD\n");
wait(&status);
printf("Exit code from child: %d\n",status);
if(status!=0) exit(status);
loop=0;
}
}
void setHandler() {
struct sigaction sa;
sa.sa_handler=handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags=SA_NOCLDSTOP;
if(sigaction(SIGCHLD,&sa,NULL)!=0) {
printf("Cannot set signal handler, there is no purpose in running the test\n");
exit(0);
}
}
int main() {
pid_t pid;
printf("start\n");
pid=fork();
if(pid==-1) {
printf("fork failed\n");
exit(10);
}
if(pid==0) {
printf("child\n");
incr=1;
usleep(500000);
runTicks("C");
exit(1);
} else {
printf("parent spawned child pid=%d\n",pid);
setHandler();
runTicks("P");
exit(0);
}
}
Run those first without valgrind and compare the exit code of both programs. Then run them under valgrind and enjoy.
Using such construction you even don't need to run it under valgrind, to catch segfaults from child processes.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Thread management in a cloned process with CLONE_VM flag set - linux

Related

robust_list not calling FUTEX_WAKE

Ptrace prevents signal from interrupting pselect() in traced process

trying to use pipe(2) with the sort unix tool but not working

pclose() returns SIGPIPE intermittently

How to read the valgrind return value from child processes?

Categories

Resources