ptrace'ing of parent process - linux

Can child process use the ptrace system call to trace its parent?
Os is linux 2.6
Thanks.
upd1:
I want to trace process1 from "itself". It is impossible, so I do fork and try to do ptrace(process1_pid, PTRACE_ATTACH) from child process. But I can't, there is a strange error, like kernel prohibits child from tracing their parent processes
UPD2: such tracing can be prohibited by security policies. Which polices do this? Where is the checking code in the kernel?
UPD3: on my embedded linux I have no errors with PEEKDATA, but not with GETREGS:
child: getregs parent: -1
errno is 1, strerror is Operation not permitted
errno = EPERM

This question really interested me. So I wrote some code to try it out.
Firstly keep in mind, that when tracing a process, the tracing process becomes a parent for most purposes, except in name (i.e. getppid()). Firstly, a snippet of the PTRACE_ATTACH section of the manual is helpful:
PTRACE_ATTACH
Attaches to the process specified in pid, making it a traced
"child" of the calling process; the behavior of the child is as
if it had done a PTRACE_TRACEME. The calling process actually
becomes the parent of the child process for most purposes (e.g.,
it will receive notification of child events and appears in
ps(1) output as the child's parent), but a getppid(2) by the
child will still return the PID of the original parent. The
child is sent a SIGSTOP, but will not necessarily have stopped
by the completion of this call; use wait(2) to wait for the
child to stop. (addr and data are ignored.)
Now here is the code I wrote to test and verify that you can in fact ptrace() your parent (you can build this by dumping it in a file named blah.c and running make blah:
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
int main()
{
pid_t pid = fork();
assert(pid != -1);
int status;
long readme = 0;
if (pid)
{
readme = 42;
printf("parent: child pid is %d\n", pid);
assert(pid == wait(&status));
printf("parent: child terminated?\n");
assert(0 == status);
}
else
{
pid_t tracee = getppid();
printf("child: parent pid is %d\n", tracee);
sleep(1); // give parent time to set readme
assert(0 == ptrace(PTRACE_ATTACH, tracee));
assert(tracee == waitpid(tracee, &status, 0));
printf("child: parent should be stopped\n");
printf("child: peeking at parent: %ld\n", ptrace(PTRACE_PEEKDATA, tracee, &readme));
}
return 0;
}
Note that I'm exploiting the replication of the parent's virtual address space to know where to look. Also note that when the child then terminates, I suspect there's an implicit detach which must allow the parent to continue, I didn't investigate further.

Yes it is possible...
Even GETREGS works.
Checked on x86
(based on Matt Joiner code, thanks him)
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/user.h>
int main()
{
pid_t pid = fork();
// assert(pid != -1);
int status;
long readme = 0;
struct user_regs_struct regs;
if (pid)
{
readme = 42;
printf("parent: child pid is %d\n", pid);
assert(pid == wait(&status));
printf("parent: child terminated?\n");
assert(0 == status);
}
else
{
pid_t tracee = getppid();
printf("child: parent pid is %d\n", tracee);
sleep(1); // give parent time to set readme
assert(0 == ptrace(PTRACE_ATTACH, tracee));
assert(tracee == waitpid(tracee, &status, 0));
printf("child: parent should be stopped\n");
printf("child: peeking at parent: %ld\n", ptrace(PTRACE_PEEKDATA, tracee, &readme, NULL));
printf("Regs was %p, %p, %p, %p; &status is %p \n", regs.eax, regs.ebx, regs.ecx, regs.edx, &status);
printf("child: getregs parent: %ld\n", ptrace(PTRACE_GETREGS, tracee, NULL, &regs));
printf("Regs is %p, %p, %p, %p; &status is %p \n", regs.eax, regs.ebx, regs.ecx, regs.edx, &status);
}
return 0;
}
result:
child: parent pid is 1188
parent: child pid is 1189
child: parent should be stopped
child: peeking at parent: 42
Regs was (nil), (nil), (nil), (nil); &status is 0xbfffea50
child: getregs parent: 0
Regs is 0xfffffe00, 0xffffffff, 0xbfffea50, (nil); &status is 0xbfffea50
parent: child terminated?

Related

Is this a bug in linux kernel concerning write to /proc/self/loginuid?

There is a possibility that i found a bug in linux kernel. Let's consider application that write to /proc/self/loginuid from main thread and one auxiliary thread. The code is below:
#include <stdio.h>
#include <pthread.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
void write_loginuid(char *str)
{
int fd;
printf("%s\n", str);
fd = open("/proc/self/loginuid", O_RDWR);
if (fd < 0) {
perror(str);
return;
}
if (write(fd, "0", 2) != 2) {
printf("write\n");
perror(str);
}
close(fd);
}
void *thread_function(void *arg)
{
fprintf(stderr, "Hello from thread! my pid = %u, tid = %u, parent pid = %u\n", getpid(), syscall(SYS_gettid), getppid());
write_loginuid("thread");
return NULL;
}
int main()
{
pthread_t thread;
pthread_create(&thread, NULL, thread_function, NULL);
write_loginuid("main process");
fprintf(stderr, "test my pid = %u, tid = %u, parent pid = %u\n", getpid(), syscall(SYS_gettid), getppid());
pthread_join(thread, NULL);
return 0;
}
After executing this application we get:
main process
test my pid = 3487, tid = 3487, parent pid = 3283
Hello from thread! my pid = 3487, tid = 3488, parent pid = 3283
thread
write
thread: Operation not permitted
That tells us the thread write failed by -EPERM.
Looking at the kernel file fs/proc/base.c and function proc_loginuid_write() we see at the beginning check:
static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
uid_t loginuid;
kuid_t kloginuid;
int rv;
/* this is the probably buggy check */
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
rcu_read_unlock();
return -EPERM;
}
rcu_read_unlock();
So, looking at the code above we see that only for exact PID (checked by me with printks) we pass through.Thread doesn't satisfy the condition, because compared pids differs.
So my question is: is this a bug ? Why to not allow thread's of particular process to change the loginuid? I encountered this in login application that spawned another thread for PAM login.
Whether this is bug or not i written a fix that extends writing permission to this file by threads:
rcu_read_lock();
/*
* I changed the condition that it checks now the tgid as returned in sys_getpid()
* rather than task_struct pointers
*/
if (task_tgid_vnr(current) != task_tgid_vnr(pid_task(proc_pid(inode), PIDTYPE_PID))) {
rcu_read_unlock();
return -EPERM;
}
rcu_read_unlock();
What do you think about it? Does it affects security?

How to safely `waitpid()` in a plugin with `SIGCHLD` handler calling `wait()` setup in the main program

I am writing a module for a toolkit which need to execute some sub processes and read their output. However, the main program that uses the toolkit may also spawn some sub processes and set up a signal handler for SIGCHLD which calls wait(NULL) to get rid of zombie processes. As a result, if the subprocess I create exit inside my waitpid(), the child process is handled before the signal handler is called and therefore the wait() in the signal handler will wait for the next process to end (which could take for ever). This behavior is described in the man page of waitpid (See grantee 2) since the linux implementation doesn't seem to allow the wait() family to handle SIGCHLD. I have tried popen() and posix_spawn() and both of them have the same problem. I have also tried to use double fork() so that the direct child exist immediately but I still cannot garentee that waitpid() is called after SIGCHLD is recieved.
My question is, if other part of the program sets up a signal handler which calls wait() (maybe it should rather call waidpid but that is not sth I can control), is there a way to safely execute child processes without overwriting the SIGCHLD handler (since it might do sth useful in some programs) or any zombie processes.
A small program which shows the problem is here (Noted that the main program only exit after the long run child exit, instead of the short one which is what it is directly waiting for with waitpid()):
#include <signal.h>
#include <sys/wait.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
static void
signalHandler(int sig)
{
printf("%s: %d\n", __func__, sig);
int status;
int ret = waitpid(-1, &status, 0);
printf("%s, ret: %d, status: %d\n", __func__, ret, status);
}
int
main()
{
struct sigaction sig_act;
memset(&sig_act, 0, sizeof(sig_act));
sig_act.sa_handler = signalHandler;
sigaction(SIGCHLD, &sig_act, NULL);
if (!fork()) {
sleep(20);
printf("%s: long run child %d exit.\n", __func__, getpid());
_exit(0);
}
pid_t pid = fork();
if (!pid) {
sleep(4);
printf("%s: %d exit.\n", __func__, getpid());
_exit(0);
}
printf("%s: %d -> %d\n", __func__, getpid(), pid);
sleep(1);
printf("%s, start waiting for %d\n", __func__, pid);
int status;
int ret = waitpid(pid, &status, 0);
printf("%s, ret: %d, pid: %d, status: %d\n", __func__, ret, pid, status);
return 0;
}
If the process is single-threaded, you can block the CHLD signal temporarily (using sigprocmask), fork/waitpid, then unblock again.
Do not forget to unblock the signal in the forked child - although POSIX states the signal mask is undefined when a process starts, most existing programs expect it to be completely unset.

Why couldn't I intercept segfault signal of a child process?

I'm trying to monitor the child process for segment fault errors, but that doesn't work.
I always receive ABRT signal.
I see gdb can catch segment fault, so what's wrong with my code?
pid_t child;
int wstatus, signum;
struct user_regs_struct regs;
child = fork();
if (child == 0)
{
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
char buf[10];
// make it always crash
strcpy (buf, "aaaaaaabbbbbbbbbbbaaaaaaaaaaaaaaaa");
printf ("Buf is %s\n", buf);
exit(0);
}
while(1)
{
wait(&wstatus);
if (WIFEXITED(wstatus) || WIFSIGNALED(wstatus))
break;
signum = WSTOPSIG(wstatus);
ptrace(PTRACE_GETREGS, child, NULL, &regs);
printf ("signal: %d, eip: 0x%08lx\n", signum, regs.eip);
ptrace(PTRACE_CONT, child, NULL, signum);
}
what's wrong with my code
Your code breaks out of the loop when child is signalled (WIFSIGNALED). Since you are expecting to catch a signal (most likely SIGSEGV), perhaps you shouldn't break out of the loop when the child is signalled?
I've looked at your code some more. It is not clear why your child is crashing at all. Perhaps you are building it with -fstack-protector or some such?
Here is a complete compilable test case (which you should have put into your question), that does crash (note: removed exit from child):
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <wait.h>
#include <sys/ptrace.h>
#include <sys/user.h>
int main()
{
pid_t child;
int wstatus, signum;
struct user_regs_struct regs;
child = fork();
if (child == 0)
{
ptrace(PTRACE_TRACEME, 0, NULL, NULL);
char buf[10];
// make it always crash
strcpy (buf, "aaaaaaabbbbbbbbbbbaaaaaaaaaaaaaaaa");
printf ("Buf is %s\n", buf);
}
while(1)
{
wait(&wstatus);
if (WIFEXITED(wstatus))
break;
signum = WSTOPSIG(wstatus);
ptrace(PTRACE_GETREGS, child, NULL, &regs);
printf ("signal: %d, eip: 0x%08lx\n", signum, regs.eip);
ptrace(PTRACE_CONT, child, NULL, signum);
}
return wstatus;
}
And got infinite loop
You normally should get an infinite loop: you are resuming the child, which re-executes its current instruction, which should trigger the exact same signal again.
That's not what is happening with the above program on my system though, and I currently can't explain what I am observing:
$ ./a.out
Buf is aaaaaaabbbbbbbbbbbaaaaaaaaaaaaaaaa
signal: 159, eip: 0x08049ff4
signal: 159, eip: 0x08049ff4
...
signal: 159, eip: 0x08049ff4
*** stack smashing detected ***: ./a.out terminated
signal: 11, eip: 0xf759fb19
signal: 0, eip: 0xf759fb19
signal: 0, eip: 0xf759fb19
...

Reading a child process's /proc/pid/mem file from the parent

In the program below, I am trying to cause the following to happen:
Process A assigns a value to a stack variable a.
Process A (parent) creates process B (child) with PID child_pid.
Process B calls function func1, passing a pointer to a.
Process B changes the value of variable a through the pointer.
Process B opens its /proc/self/mem file, seeks to the page containing a, and prints the new value of a.
Process A (at the same time) opens /proc/child_pid/mem, seeks to the right page, and prints the new value of a.
The problem is that, in step 6, the parent only sees the old value of a in /proc/child_pid/mem, while the child can indeed see the new value in its /proc/self/mem. Why is this the case? Is there any way that I can get the parent to to see the child's changes to its address space through the /proc filesystem?
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <unistd.h>
#define PAGE_SIZE 0x1000
#define LOG_PAGE_SIZE 0xc
#define PAGE_ROUND_DOWN(v) ((v) & (~(PAGE_SIZE - 1)))
#define PAGE_ROUND_UP(v) (((v) + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1)))
#define OFFSET_IN_PAGE(v) ((v) & (PAGE_SIZE - 1))
# if defined ARCH && ARCH == 32
#define BP "ebp"
#define SP "esp"
#else
#define BP "rbp"
#define SP "rsp"
#endif
typedef struct arg_t {
int a;
} arg_t;
void func1(void * data) {
arg_t * arg_ptr = (arg_t *)data;
printf("func1: old value: %d\n", arg_ptr->a);
arg_ptr->a = 53;
printf("func1: address: %p\n", &arg_ptr->a);
printf("func1: new value: %d\n", arg_ptr->a);
}
void expore_proc_mem(void (*fn)(void *), void * data) {
off_t frame_pointer, stack_start;
char buffer[PAGE_SIZE];
const char * path = "/proc/self/mem";
int child_pid, status;
int parent_to_child[2];
int child_to_parent[2];
arg_t * arg_ptr;
off_t child_offset;
asm volatile ("mov %%"BP", %0" : "=m" (frame_pointer));
stack_start = PAGE_ROUND_DOWN(frame_pointer);
printf("Stack_start: %lx\n",
(unsigned long)stack_start);
arg_ptr = (arg_t *)data;
child_offset =
OFFSET_IN_PAGE((off_t)&arg_ptr->a);
printf("Address of arg_ptr->a: %p\n",
&arg_ptr->a);
pipe(parent_to_child);
pipe(child_to_parent);
bool msg;
int child_mem_fd;
char child_path[0x20];
child_pid = fork();
if (child_pid == -1) {
perror("fork");
exit(EXIT_FAILURE);
}
if (!child_pid) {
close(child_to_parent[0]);
close(parent_to_child[1]);
printf("CHILD (pid %d, parent pid %d).\n",
getpid(), getppid());
fn(data);
msg = true;
write(child_to_parent[1], &msg, 1);
child_mem_fd = open("/proc/self/mem", O_RDONLY);
if (child_mem_fd == -1) {
perror("open (child)");
exit(EXIT_FAILURE);
}
printf("CHILD: child_mem_fd: %d\n", child_mem_fd);
if (lseek(child_mem_fd, stack_start, SEEK_SET) == (off_t)-1) {
perror("lseek");
exit(EXIT_FAILURE);
}
if (read(child_mem_fd, buffer, sizeof(buffer))
!= sizeof(buffer)) {
perror("read");
exit(EXIT_FAILURE);
}
printf("CHILD: new value %d\n",
*(int *)(buffer + child_offset));
read(parent_to_child[0], &msg, 1);
exit(EXIT_SUCCESS);
}
else {
printf("PARENT (pid %d, child pid %d)\n",
getpid(), child_pid);
printf("PARENT: child_offset: %lx\n",
child_offset);
read(child_to_parent[0], &msg, 1);
printf("PARENT: message from child: %d\n", msg);
snprintf(child_path, 0x20, "/proc/%d/mem", child_pid);
printf("PARENT: child_path: %s\n", child_path);
child_mem_fd = open(path, O_RDONLY);
if (child_mem_fd == -1) {
perror("open (child)");
exit(EXIT_FAILURE);
}
printf("PARENT: child_mem_fd: %d\n", child_mem_fd);
if (lseek(child_mem_fd, stack_start, SEEK_SET) == (off_t)-1) {
perror("lseek");
exit(EXIT_FAILURE);
}
if (read(child_mem_fd, buffer, sizeof(buffer))
!= sizeof(buffer)) {
perror("read");
exit(EXIT_FAILURE);
}
printf("PARENT: new value %d\n",
*(int *)(buffer + child_offset));
close(child_mem_fd);
printf("ENDING CHILD PROCESS.\n");
write(parent_to_child[1], &msg, 1);
if (waitpid(child_pid, &status, 0) == -1) {
perror("waitpid");
exit(EXIT_FAILURE);
}
}
}
int main(void) {
arg_t arg;
arg.a = 42;
printf("In main: address of arg.a: %p\n", &arg.a);
explore_proc_mem(&func1, &arg.a);
return EXIT_SUCCESS;
}
This program produces the output below. Notice that the value of a (boldfaced) differs between parent's and child's reading of the /proc/child_pid/mem file.
In main: address of arg.a: 0x7ffffe1964f0
Stack_start: 7ffffe196000
Address of arg_ptr->a: 0x7ffffe1964f0
PARENT (pid 20376, child pid 20377)
PARENT: child_offset: 4f0
CHILD (pid 20377, parent pid 20376).
func1: old value: 42
func1: address: 0x7ffffe1964f0
func1: new value: 53
PARENT: message from child: 1
CHILD: child_mem_fd: 4
PARENT: child_path: /proc/20377/mem
CHILD: new value 53
PARENT: child_mem_fd: 7
PARENT: new value 42
ENDING CHILD PROCESS.
There's one silly mistake in this code:
const char * path = "/proc/self/mem";
...
snprintf(child_path, 0x20, "/proc/%d/mem", child_pid);
printf("PARENT: child_path: %s\n", child_path);
child_mem_fd = open(path, O_RDONLY);
So you always end up reading parent's memory here. However after changing this, I get:
CHILD: child_mem_fd: 4
CHILD: new value 53
read (parent): No such process
And I don't know why it could happen - maybe /proc is too slow in refreshing the entries? (it's from perror("read") in the parent - had to add a comment to see which one fails) But that seems weird, since the seek worked - as well as open itself.
That question doesn't seem to be new either: http://lkml.indiana.edu/hypermail/linux/kernel/0007.1/0939.html (ESRCH is "no such process")
Actually a better link is: http://www.webservertalk.com/archive242-2004-7-295131.html - there was an issue with marking processes pthread-attach-safe. You can find there Alan Cox sending someone to Solar Designer... for me that spells "here be dragons" and that it's not solvable if you don't hack kernels in your sleep :(
Maybe it's enough for you to check what is gdb doing in that case and replicating it? (Probably it just goes via ptrace(PTRACE_PEEKDATA,...))
The solution is to use ptrace to synchronize parent with child. Even though I am already communicating between parent and child (and the man page for ptrace says that it causes the two processes to behave as if they were parent and child), and even though the child is blocking on the read call, the child has apparently not "stopped" enough for Linux to allow the parent to read the child's /proc/child_pid/mem file. But if the parent first calls ptrace (after it receives the message over the pipe) with PTRACE_ATTACH, then it can open the file--and get the correct contents! Then the parent calls ptrace again, with PTRACE_DETACH, before sending the message back to the child to terminate.

Why is RLIMIT_STACK lost after fork or exec on linux?

On linux, it is said that rlimit of a process is kept intact after either fork or exec. But I lose my RLIMIT_STACK in the child either after fork or after exec. Would someone please give some explain?
Here is some descriptive output of my program.
//The parent has an RLIMIT_STACK like this
RLIMIT_STACK, soft - 10485760, hard - -1
//Right after fork, the child loses its RLIMIT_STACK
In child after fork, RLIMIT_STACK, soft - -1, hard - -1
//In the child, before exec, RLIMIT_STACK soft is set to 10485760 again
RLIMIT_STACK set OK.
In child after set, RLIMIT_STACK, soft - 10485760, hard - -1
Child pid = 3096
//After exec, the new process loses its RLIMIT_STACK again
RLIMIT_STACK got, soft - -1, hard - -1
Thanks in advance
Feng
This seems a problem(I am not sure if it is a bug) of linuxthread implementation of libpthread.
I wrote a simple program:
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
int main(int argc, char **argv){
struct rlimit resource_limit;
if(getrlimit(RLIMIT_STACK, &resource_limit) != 0){
fprintf(stderr, "Failed to get rlimit: %s\n", strerror(errno));
return 1;
}
else{
fprintf(stderr, "In parent, RLIMIT_STACK, soft-%d, hard-%d\n",
resource_limit.rlim_cur, resource_limit.rlim_max);
}
int child_status = 0;
pid_t pid = fork();
switch(pid){
case 0://child
if(getrlimit(RLIMIT_STACK, &resource_limit) != 0){
fprintf(stderr, "Failed to get rlimit: %s\n", strerror(errno));
return 1;
}
else{
fprintf(stderr, "In child after fork, RLIMIT_STACK, soft-%d, hard-%d\n",
resource_limit.rlim_cur, resource_limit.rlim_max);
}
break;
case -1:
fprintf(stderr, "Fork error: %s\n", strerror(errno));
break;
default://parent
waitpid(pid, &child_status, 0);
break;
}
return 0;
}
If this program is compiled and linked without -lpthread option, it runs OK everywhere. But when it is linked with -lpthread option, wired things happen: if it is run on a machine where it is dynamically linked to a linuxthread version of libpthread, it gives:
In parent, RLIMIT_STACK, soft-10485760, hard--1
In child after fork, RLIMIT_STACK, soft--1, hard--1
But when run on a machine where it is dynamically linked to a NPTL version of libpthread, it gives the expected result:
In parent, RLIMIT_STACK, soft-10485760, hard--1
In child after fork, RLIMIT_STACK, soft-10485760, hard--1

Resources