Hello I have an issue to work with siglongjmp (multi threading) and sigaction configuring SIGVTALRM handler.
I've configured with sigsetjmp 2 new threads:
1. env[0] with PC on func f
2. env[1] with PC on func g
I've configured an handler to the SIGVTALRM signal, to switch between the 2 threads.
I set itimer_virtual to call to a func called "switchThreads" that switch between the threads.
I've tested 2 cases:
Setting the timer and calling to infinite loop, It's calling to the signal only once and then after siglongjmp it stops calling the handler.
Setting the timer and calling after it to f, not calling even once to the handler.
Here is my code:
#include <stdio.h>
#include <setjmp.h>
#include <signal.h>
#include <unistd.h>
#include <sys/time.h>
#define SECOND 1000000
#define STACK_SIZE 4096
char stack1[STACK_SIZE];
char stack2[STACK_SIZE];
sigjmp_buf env[2];
#ifdef __x86_64__
/* code for 64 bit Intel arch */
typedef unsigned long address_t;
#define JB_SP 6
#define JB_PC 7
/* A translation is required when using an address of a variable.
Use this as a black box in your code. */
address_t translate_address(address_t addr)
{
address_t ret;
asm volatile("xor %%fs:0x30,%0\n"
"rol $0x11,%0\n"
: "=g" (ret)
: "0" (addr));
return ret;
}
#else
/* code for 32 bit Intel arch */
typedef unsigned int address_t;
#define JB_SP 4
#define JB_PC 5
/* A translation is required when using an address of a variable.
Use this as a black box in your code. */
address_t translate_address(address_t addr)
{
address_t ret;
asm volatile("xor %%gs:0x18,%0\n"
"rol $0x9,%0\n"
: "=g" (ret)
: "0" (addr));
return ret;
}
#endif
static struct sigaction sa;
static struct itimerval timer;
static int currentThread = 0;
static void switchThreads(int sig)
{
int ret_val = sigsetjmp(env[currentThread], 1);
printf("SWITCH: ret_val=%d\n", ret_val);
if (ret_val == 1)
{
return;
}
currentThread = 1 - currentThread;
if (setitimer(ITIMER_VIRTUAL, &timer, NULL))
{
printf("setitimer error.");
}
siglongjmp(env[currentThread], 1);
}
void f(void)
{
int i = 0;
while (1)
{
++i;
printf("in f (%d)\n", i);
usleep(SECOND);
}
}
void g(void)
{
int i = 0;
while (1)
{
++i;
printf("in g (%d)\n", i);
usleep(SECOND);
}
}
void setup(void)
{
address_t sp, pc;
sp = (address_t) stack1 + STACK_SIZE - sizeof(address_t);
pc = (address_t) f;
sigsetjmp(env[0], 1);
(env[0]->__jmpbuf)[JB_SP] = translate_address(sp);
(env[0]->__jmpbuf)[JB_PC] = translate_address(pc);
sigemptyset(&env[0]->__saved_mask);
sp = (address_t) stack2 + STACK_SIZE - sizeof(address_t);
pc = (address_t) g;
sigsetjmp(env[1], 1);
(env[1]->__jmpbuf)[JB_SP] = translate_address(sp);
(env[1]->__jmpbuf)[JB_PC] = translate_address(pc);
sigemptyset(&env[1]->__saved_mask);
}
int main(void)
{
printf("Starting...");
// Install timer_handler as the signal handler for SIGVTALRM.
sa.sa_handler = &switchThreads;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGVTALRM, &sa, NULL) < 0)
{
printf("sigaction error.");
}
// Configure the timer to expire after 1 sec... */
timer.it_value.tv_sec = 3;
timer.it_value.tv_usec = 0;
// configure the timer to expire every 1 sec after that.
timer.it_interval.tv_sec = 1;
timer.it_interval.tv_usec = 0;
// Start a virtual timer. It counts down whenever this process is executing.
if (setitimer(ITIMER_VIRTUAL, &timer, NULL))
{
printf("setitimer error.");
}
setup();
// for(;;){}
f();
}
I've also the strace of the 2 calls:
The first (occur only once):
rt_sigaction(SIGVTALRM, {sa_handler=0x563ff48db82d, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7f22c0f72f20}, NULL, 8) = 0
setitimer(ITIMER_VIRTUAL, {it_interval={tv_sec=1, tv_usec=0}, it_value={tv_sec=3, tv_usec=0}}, NULL) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
--- SIGVTALRM {si_signo=SIGVTALRM, si_code=SI_KERNEL} ---
rt_sigprocmask(SIG_BLOCK, NULL, [VTALRM], 8) = 0
write(1, "Starting...SWITCH: ret_val=0\n", 29Starting...SWITCH: ret_val=0
) = 29
setitimer(ITIMER_VIRTUAL, {it_interval={tv_sec=1, tv_usec=0}, it_value={tv_sec=3, tv_usec=0}}, NULL) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
write(1, "in g (1)\n", 9in g (1)
) = 9
nanosleep({tv_sec=1, tv_nsec=0}, NULL) = 0
write(1, "in g (2)\n", 9in g (2)
) = 9
nanosleep({tv_sec=1, tv_nsec=0}, NULL) = 0
write(1, "in g (3)\n", 9in g (3)
) = 9
The Second (not occuring even once):
rt_sigaction(SIGVTALRM, {sa_handler=0x55dc7274e82d, sa_mask=[], sa_flags=SA_RESTORER, sa_restorer=0x7fe6e3768f20}, NULL, 8) = 0
setitimer(ITIMER_VIRTUAL, {it_interval={tv_sec=1, tv_usec=0}, it_value={tv_sec=3, tv_usec=0}}, NULL) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
write(1, "Starting...in f (1)\n", 20Starting...in f (1)
) = 20
nanosleep({tv_sec=1, tv_nsec=0}, NULL) = 0
write(1, "in f (2)\n", 9in f (2)
) = 9
nanosleep({tv_sec=1, tv_nsec=0}, NULL) = 0
write(1, "in f (3)\n", 9in f (3)
) = 9
nanosleep({tv_sec=1, tv_nsec=0}, NULL) = 0
write(1, "in f (4)\n", 9in f (4)
) = 9
nanosleep({tv_sec=1, tv_nsec=0}, NULL) = 0
write(1, "in f (5)\n", 9in f (5)
) = 9
Thanks in advance!!
I've managed to fix that, uslepp stops the execution of the thread so as the timer was configured to 1 second and the 2 funcs were using usleep in their loop, it never got to 1 sec of running time.
From man of usleep:
The usleep() function suspends execution of the calling thread for
(at least) usec microseconds. The sleep may be lengthened slightly
by any system activity or by the time spent processing the call or by
the granularity of system timers.
Related
I have similar problem as here:
Netlink sockets and libnl - nl_recvmsgs_default returning -16 (EBUSY)
But nl_recvmsgs_defaul() return this error value -22 (NLE_MSGTYPE_NOSUPPORT).
Does anyone have an idea why I get this error?
Here is example program:
Kernel:
enum {
DOC_EXMPL_A_UNSPEC,
DOC_EXMPL_A_MSG,
__DOC_EXMPL_A_MAX,
};
#define DOC_EXMPL_A_MAX (__DOC_EXMPL_A_MAX - 1)
#define VERSION_NR 1
static struct genl_family doc_exmpl_gnl_family = {
.id = GENL_ID_GENERATE,
.hdrsize = 0,
.name = "CONTROL_EXMPL",
.version = VERSION_NR,
};
enum {
DOC_EXMPL_C_UNSPEC,
DOC_EXMPL_C_ECHO,
__DOC_EXMPL_C_MAX,
};
#define DOC_EXMPL_C_MAX (__DOC_EXMPL_C_MAX - 1)
int doc_exmpl_echo(struct sk_buff *skb_2, struct genl_info *info)
{
struct sk_buff *skb;
struct nlattr *na, *pos;
int len, rem, type, rc;
char * mydata;
void *msg_head;
if (info == NULL)
goto out;
len = nlmsg_attrlen(info->nlhdr, GENL_HDRLEN);
na = nlmsg_attrdata(info->nlhdr, GENL_HDRLEN);
printk(KERN_DEBUG "len1: %d, sizeof: %d ,len: %d", nlmsg_len(info->nlhdr),
GENL_HDRLEN, len);
nla_for_each_attr(pos, na, len, rem) {
type = nla_type(pos);
mydata = (char *) nla_data(pos);
printk(KERN_DEBUG "Type: %d", type);
if (mydata != NULL)
printk(KERN_DEBUG "Data: %s", mydata);
}
skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto out;
msg_head = genlmsg_put(skb, 0, info->snd_seq+1, &doc_exmpl_gnl_family, 0, DOC_EXMPL_C_ECHO);
if (msg_head == NULL) {
rc = -ENOMEM;
goto out;
}
rc = nla_put_u8(skb, DOC_EXMPL_A_MSG, 8);
if (rc != 0)
goto out;
/* finalize the message */
genlmsg_end(skb, msg_head);
//rc = genlmsg_unicast(skb,info->snd_pid );
rc = genlmsg_unicast(genl_info_net(info), skb,info->snd_pid);
if (rc != 0)
goto out;
printk(KERN_DEBUG "End");
return 0;
out:
printk("an error occured in doc_exmpl_echo:\n");
return 0;
}
struct genl_ops doc_exmpl_gnl_ops_echo = {
.cmd = DOC_EXMPL_C_ECHO,
.flags = 0,
.policy = NULL,
.doit = doc_exmpl_echo,
.dumpit = NULL,
};
Userspace code
#include <stdio.h>
#include <stdlib.h>
#include <netlink/netlink.h>
#include <netlink/genl/genl.h>
#include <netlink/genl/ctrl.h>
#include <netlink/socket.h>
#include <netlink/msg.h>
#include <net/if.h>
static int expectedId;
static int nlCallback(struct nl_msg* msg, void* arg)
{
struct nlmsghdr* ret_hdr = nlmsg_hdr(msg);
struct nlattr *tb_msg[2 + 1];
char *str;
if (ret_hdr->nlmsg_type != expectedId)
{
// what is this??
return NL_STOP;
}
struct genlmsghdr *gnlh = (struct genlmsghdr*) nlmsg_data(ret_hdr);
nla_parse(tb_msg, 2, genlmsg_attrdata(gnlh, 0),
genlmsg_attrlen(gnlh, 0), NULL);
if (tb_msg[1]) {
str = nla_get_string(tb_msg[1]);
printf("str: %s", str);
}
return 0;
}
int main(int argc, char **argv) {
int cmd, rc;
struct nl_handle *sk;
struct nl_msg *msg;
//allocate socket
sk = nl_handle_alloc();
//connect to generic netlink
genl_connect(sk);
//find the nl80211 driver ID
expectedId = genl_ctrl_resolve(sk, "CONTROL_EXMPL");
//allocate a message
msg = nlmsg_alloc();
cmd = 1;
int flags = 0;
// setup the message
genlmsg_put(msg, NL_AUTO_PID, NL_AUTO_SEQ, expectedId, 0, NLM_F_REQUEST, cmd, 1);
NLA_PUT_STRING(msg, 3, "Hello test");
nl_send_auto_complete(sk, msg);
nl_socket_modify_cb(sk, NL_CB_VALID, NL_CB_CUSTOM,
nlCallback, NULL);
rc = nl_recvmsgs_default(sk);
printf("rc = %d \n", rc);
nlmsg_free(msg);
return 0;
nla_put_failure:
nlmsg_free(msg);
return 1;
}
OK I found the problem.
I have bad sequencer number in kernel code, it should be:
genlmsg_put(skb, 0, info->snd_seq, &doc_exmpl_gnl_family, 0, DOC_EXMPL_C_ECHO);
It's the same problem as here: Netlink sockets and libnl - nl_recvmsgs_default returning -16 (EBUSY) but I got other error.
My problem deals with a segmentation fault that I get when I run this program on a linux machine versus my own mac computer. This program runs how I believe it should on my own mac computer, yet when I try to run it on my school's linux computers, I get a segmentation fault that doesn't appear on my mac computer. I'll give a brief background on the assignment and then go over the problem in more detail.
So I have this program which basically simulates baboons crossing a ravine with a single rope. Only one baboon can cross at a time and there are certain restraints on the number of baboons that can cross at a time, as well as how many baboons can cross from one direction before baboons from the other direction are allowed to cross. The implementation of the code.
I have searched for segmentation fault questions already here on stackoverflow, yet most of them deal with multiple processes whereas I am merely using different threads. The segmentation fault ends up coming from waiting on a semaphore that doesn't exist, yet when I checked to see whether it was initialized, it was successfully initialized. Again, this program works on my mac but then doesn't work when I try to run it on my Mac. Any help at all understanding why it can't run on the linux machines but can run on the mac. If any more information is needed, I would be happy to provide it. I did error check at one point but that code was deleted off the school computers. My error checking, as far as I remember, didn't show any errors.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/time.h>
#include <time.h>
#include <pthread.h>
#include <semaphore.h>
#include <fcntl.h>
#include <sys/stat.h> //for mode flags, if needed for future use
#define ATOB_COUNT 20
#define BTOA_COUNT 20
#define RANDOM_SEED 2123
//semaphore names
#define MUTEX_SEM "/mutex"
#define TOB_SEM "/toB"
#define TOA_SEM "/toA"
//define methods here if needed
void *toAThread(void *threadId);
void *toBThread(void *threadId);
void my_sleep(int limit);
void sem_open_errorCheck(char *name, unsigned int startingValue, sem_t *result);
//defining semaphores and shared variables
sem_t *mutex, *toB, *toA;
int xingCount = 0;
int xedCount = 0;
int toBWaitCount = 0;
int toAWaitCount = 0;
enum xingDirectionTypes {
none,
aToB,
bToA
};
enum xingDirectionTypes xingDirection = none;
char orderLeaving[100];
struct threadInfo {
int threadId;
};
struct threadInfo atobIDs[ATOB_COUNT];
struct threadInfo btoaIDs[BTOA_COUNT];
int main(void) {
pthread_t atobPTHREADS[ATOB_COUNT];
pthread_t btoaPTHREADS[BTOA_COUNT];
pthread_attr_t attr;
void *status;
srandom(RANDOM_SEED);
//call helper method which creates semaphore and errorchecks
sem_open_errorCheck(MUTEX_SEM, (unsigned int)1, mutex);
sem_open_errorCheck(TOA_SEM, (unsigned int)0, toA);
sem_open_errorCheck(TOB_SEM, (unsigned int)0, toB);
//Creating a set of attributes to send to the threads
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
//spawn toB baboons
int counter;
for (counter = 0; counter < BTOA_COUNT; counter++) {
atobIDs[counter].threadId = counter;
int result;
if ((result = pthread_create(&atobPTHREADS[counter], &attr, toBThread, (void*) &atobIDs[counter])) == -1) {
perror("Thread Creation Error: atob baboon");
exit(EXIT_FAILURE);
}
}
//spawn toA baboons
for (counter = 0; counter < ATOB_COUNT; counter++) {
btoaIDs[counter].threadId = counter + 20;
int result;
if ((result = pthread_create(&btoaPTHREADS[counter], &attr, toAThread, (void*) &btoaIDs[counter])) == -1) {
perror("Thread Creation Error: btoa baboon");
exit(EXIT_FAILURE);
}
}
//Wait for all the threads to finish
for(counter = 0; counter < ATOB_COUNT; counter++)
{
int result = pthread_join(atobPTHREADS[counter], &status);
if(result == -1)
{
perror("Thread Join: AtoB");
exit(EXIT_FAILURE);
}
}
for(counter = 0; counter < BTOA_COUNT; counter++)
{
int result = pthread_join(btoaPTHREADS[counter], &status);
if(result == -1)
{
perror("Thread Join: BtoA");
exit(EXIT_FAILURE);
}
}
printf("The order leaving %s", orderLeaving);
exit(EXIT_SUCCESS);
}
void *toBThread(void *threadId) {
struct threadInfo *info;
info = (struct threadInfo *)threadId;
int id = info->threadId;
my_sleep(100); //simulate being idle for 1-100ms
//for order checking
char *baboonOrder;
baboonOrder = "B ";
strcat(orderLeaving, baboonOrder);
sem_wait(mutex);
if ((xingDirection == aToB || xingDirection == none) && xingCount < 5 && (xedCount + xingCount) < 10) { //there is an extra parenthesis here in the solutions
xingDirection = aToB;
xingCount++;
printf("AtoB baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
else {
toBWaitCount++;
sem_post(mutex);
sem_wait(toB);
toBWaitCount--;
xingCount++;
xingDirection = aToB;
printf("AtoB baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
//CROSSING
sem_wait(mutex);
printf("AtoB baboon (thread %d) got off the rope\n", id);
xedCount++;
xingCount--;
if (toBWaitCount != 0 && (((xedCount+xingCount)<10) || ((xedCount+xingCount) >= 10 && toAWaitCount == 0))) {
sem_post(toB);
}
else {
if (xingCount == 0 && toAWaitCount != 0 && (toBWaitCount == 0 || (xedCount + xingCount)>=10)) {
xingDirection = bToA;
xedCount = 0;
sem_post(toA);
}
else {
if (xingCount == 0 && toBWaitCount == 0 && toAWaitCount == 0) {
xingDirection = none;
xedCount = 0;
sem_post(mutex);
}
else {
sem_post(mutex);
}
}
}
}
/*
baboons going from side a to side b
*/
void *toAThread(void *threadId) {
struct threadInfo *info;
info = (struct threadInfo *)threadId;
int id = info->threadId;
my_sleep(100);
//for order checking
char *baboonOrder;
baboonOrder = "A ";
strcat(orderLeaving, baboonOrder);
sem_wait(mutex);
if ((xingDirection == bToA || xingDirection == none) && xingCount < 5 && (xedCount + xingCount) < 10) { //there is an extra parenthesis here in the solutions
xingDirection = bToA;
xingCount++;
printf("BtoA baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
else {
toAWaitCount++;
sem_post(mutex);
sem_wait(toA);
toAWaitCount--;
xingCount++;
xingDirection = bToA;
printf("BtoA baboon (thread %d) got on the rope\n", id);
sem_post(mutex);
}
//CROSSING
sem_wait(mutex);
printf("BtoA baboon (thread %d) got off the rope\n", id);
xedCount++;
xingCount--;
if (toAWaitCount != 0 && (((xedCount+xingCount)<10) || ((xedCount+xingCount) >= 10 && toBWaitCount == 0))) {
sem_post(toA);
}
else {
if (xingCount == 0 && toBWaitCount != 0 && (toAWaitCount == 0 || (xedCount + xingCount)>=10)) {
xingDirection = aToB;
xedCount = 0;
sem_post(toB);
}
else {
if (xingCount == 0 && toAWaitCount == 0 && toBWaitCount == 0) {
xingDirection = none;
xedCount = 0;
sem_post(mutex);
}
else {
sem_post(mutex);
}
}
}
}
//taken with permission from readers/writers problem
//Puts the calling thread to sleep to simulate both random start times and random workloads
void my_sleep(int limit) {
struct timespec time_ns;
int duration = random() % limit + 1;
time_ns.tv_sec = 0;
time_ns.tv_nsec = duration * 1000000;
int result = nanosleep(&time_ns, NULL);
if (result != 0)
{
perror("Nanosleep");
exit(EXIT_FAILURE);
}
}
void sem_open_errorCheck(char *name, unsigned int startingValue, sem_t *result) {
sem_unlink(name);
result = sem_open(name, O_CREAT, 0600, startingValue);
if (result == -1) {
perror("sem_open error: semaphore failed to open correctly");
exit(EXIT_FAILURE);
}
}
How to debug stuff like this
The best way to debug this is to run it using the gdb debugger. Like this:
gdb my-monkey-program
(gdb) run
Program received signal SIGSEGV, Segmentation fault.
(gdb) info threads
(gdb) bt
Another excellent idea is to run it with valgrind:
valgrind ./my-monkey-program
which will tell you about invalid memory accesses and all sorts of things.
Your specific problem
gdb reports that the call stack is:
#0 sem_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/sem_wait.S:45
#1 0x0000000000400e8d in toAThread (threadId=0x602160) at test.c:190
#2 0x00007ffff7bc4e9a in start_thread (arg=0x7fffed7e9700) at pthread_create.c:308
#3 0x00007ffff78f1cbd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#4 0x0000000000000000 in ?? ()
Here are the line numbers from my compile:
187 baboonOrder = "A ";
188 strcat(orderLeaving, baboonOrder);
189
190 sem_wait(mutex);
This is because mutex is NULL.
Why it breaks
You're never actually assigning to the mutex variable. You're passing a pointer into sem_open_errorCheck, but what you really need to pass is a pointer-to-a-pointer. Presumably the same applies to toA and toB.
It's just luck that it worked on the Mac!
I'm programming in C, using berkeley db 4.3 (/usr/lib64/libdb-4.3.so) on RHEL5.6 with kernel 2.6.18-238_xen_AMD64.
In my tests (writing 1,000,000 key/value pairs), if one process quitted abnormally (ctrl + c, kill, or assert fails) while an operation on the db is in process, later operation on that db would be blocked upon opening. Strace shows that the process stuck at a futex(ptr_to_something, FUTEX_WAIT, 2, NULL) call after opening the __db.00x(e.g __db.001, __db.002, __db.003) files.
The only way I know to clear the lock is to remove __db.00x files, and following tests showed that the database is not damaged. It meets my requirement, but I'm just wondering whether there's a better(or more elegant) way to solve this problem.
Here I listed some strace stderr and the code to operate the database which may help.
some of the strace stderr
...
open("__db.001", O_RDWR) = 3
fcntl(3, F_SETFD, FD_CLOEXEC) = 0
fstat(3, {st_mode=S_IFREG|0640, st_size=24576, ...}) = 0
close(3) = 0
open("__db.001", O_RDWR) = 3
fcntl(3, F_SETFD, FD_CLOEXEC) = 0
mmap(NULL, 24576, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0x2afcc4149000
close(3) = 0
futex(0x2afcc4149000, FUTEX_WAIT, 2, NULL **[[stuck here]]**
code to operate the database
typedef DB* db_handle;
db_handle bdb_open(const char *filename, u_int32_t cache_size_mb)
{
int ret;
DB_ENV *env;
db_handle dbp;
u_int32_t flags = DB_CREATE | DB_THREAD | DB_INIT_LOCK | DB_INIT_MPOOL | DB_INIT_LOCK ;
u_int32_t gb = cache_size_mb / 1024, mb = cache_size_mb % 1024;
if (ret = db_env_create(&env, 0)) {
fprintf(stderr, "db_env_create:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = env->set_timeout(env, 3 * 1000000, DB_SET_LOCK_TIMEOUT)) {
fprintf(stderr, "env->set_timeout:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = env->set_lk_detect(env, DB_LOCK_DEFAULT)) { /* this seems to be of no use in my case */
fprintf(stderr, "env->set_lk_detect:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = env->set_cachesize(env, gb, mb * 1024 * 1024, 0)) {
fprintf(stderr, "env->set_cachesize:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if ((ret = env->open(env, NULL, flags, 0)) != 0) {
fprintf(stderr, "db_env_open:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = db_create(&dbp, env, 0)) {
fprintf(stderr, "db_create:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
if (ret = dbp->open(dbp, NULL, filename, NULL, DB_BTREE, flags, 0664)) {
fprintf(stderr, "dbp->open:%d, %s\n", ret, db_strerror(ret));
exit(EXIT_FAILURE);
}
return dbp;
}
int bdb_put(db_handle db, void* key, u_int32_t keylen, void* val, u_int32_t vallen)
{
DBT dkey, dval;
bzero(&dkey, sizeof(dkey));
bzero(&dval, sizeof(dval));
dkey.data = key, dkey.size = keylen;
dval.data = val, dval.size = vallen;
return db->put(db, NULL, &dkey, &dval, 0);
}
int bdb_get(db_handle db, void* key, const u_int32_t keylen,
void* buf, u_int32_t buflen, u_int32_t* nwrite)
{
DBT dkey, dval;
bzero(&dkey, sizeof(dkey));
bzero(&dval, sizeof(dval));
dkey.data = key, dkey.size = keylen;
dval.data = buf, dval.ulen = buflen, dval.flags = DB_DBT_USERMEM;
int ret = db->get(db, NULL, &dkey, &dval, 0);
if (ret == 0 && nwrite != NULL)
*nwrite = dval.size;
return ret;
}
The __db* files contain the names of locks, but not the locks themselves.
The pthread locks are implemented on top of kernel futexes. The process
that was killed likely had an active lock when killed.
Try running "db_recover -h $DBHOME" to clear stale locks.
There is also a callback that can be added to automate "stale lock"
remover.
(aside)
Your code almost certainly needs to handle DB_RUNRECOVERY and DB_VERSION_MISMATCH
error codes from env->open for a robust implementation. Reopen with DB_RECOVER
set will handle.
On older linux, there is also the possibility of "stale futexes" (i.e. a futex
that was locked by a process that died) that can only be cleared by
rebooting (there is another way, using "robust mutexes", that sets
a flag that permits another process to unlock previously locked mutexes,
but that isn't implemented in Berkeley DB).
I use this fanotify sample to monitor open/access perms on the whole file system(/): http://git.infradead.org/users/eparis/fanotify-example.git.
Then I have a test program with multiple threads, each thread iterate the sample foder and open/close the files in it, sometimes my program hangs at open().
OS: Ubuntu 2.6.38-11 x86_64.
Is it a bug of fanotify that it does not support multiple-thread opening?
The code of my test program:
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <ctype.h>
#include <unistd.h>
#include <dirent.h>
#include <string.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <dirent.h>
//open file function
void open_file( char* file )
{
int fd = -1;
fd = open( file, O_WRONLY, 0x666 );
if( fd >= 0 )
{
printf("open:%s\n", file );
close( fd );
}
}
//iterate directory function
void printdir(char *dir, int depth)
{
DIR *dp;
struct stat statbuf;
char pathbuf[2048] = {0};
struct dirent entry;
struct dirent *entryPtr = NULL;
//printf("opendir %s\n", dir );
usleep( 300 );
if((dp = opendir(dir)) == NULL) {
if( errno != ENOTDIR )
{
fprintf(stderr,"cannot open directory: %s\n", dir);
perror("open fial");
}
return;
}
readdir_r( dp, &entry, &entryPtr );
while( entryPtr != NULL)
{
snprintf(pathbuf,2000, "%s/%s\0", dir, entry.d_name );
printf("iteraotr:%s\n", pathbuf );
lstat( pathbuf, &statbuf );
if(S_ISDIR( statbuf.st_mode ))
{
/* Found a directory, but ignore . and .. */
if(strcmp(".",entry.d_name) == 0 ||
strcmp("..",entry.d_name) == 0)
{
}
else
{
//printf("%d,%s\n",depth, entry->d_name);
printdir( pathbuf, depth+1);
}
}
else
{
//printf("%*s%s\n",depth,"",entry->d_name);
open_file( pathbuf );
}
readdir_r( dp, &entry, &entryPtr );
}
closedir(dp);
}
//thread function
void* iterator_dir( void* data )
{
char* path = (char*)data;
printf("In iterator_dir(): %s\n", path );
printdir( path, 0 );
return NULL;
}
pthread_t threadID[10] = {0};
//main function
int main( int argc, char** argv )
{
if( argc < 3 )
{
printf("Usage: %s <thread_num> <file>\n", argv[0] );
exit(0);
}
if( isdigit( (char)*argv[1] ) == 0 )
{
printf(" Thread num is 0 - 9\n");
exit(0);
}
int thread_num = atoi( argv[1] );
char* res;
pthread_attr_t attr;
pthread_attr_init(&attr);
int i = 0;
for( i = 0; i < thread_num; ++i )
{
pthread_create( &threadID[i], &attr, &iterator_dir, argv[2]);
}
for( i = 0; i < thread_num; ++i )
{
pthread_join( threadID[i] , &res );
}
}
2011-09-28 Edit:
I comment the open file operation, only keep the iterate directory part. The application still hangs.
This is the output of strace:
enter code here
pid 10692] open("/home/byang//.config/menus", O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC <unfinished ...>
[pid 10691] write(1, "1213966080 opendir /home/byang//"..., 56) = 56
.........
[pid 10689] madvise(0x7f3c48dbc000, 8368128, MADV_DONTNEED) = 0
[pid 10689] _exit(0) = ?
Process 10689 detached
[pid 10688] <... futex resumed> ) = 0
[pid 10688] futex(0x7f3c47db99d0, FUTEX_WAIT, 10692, NULL <unfinished ...>
It hangs here, when I close the fanotify, it continues...
[pid 10692] <... open resumed> ) = 11
[pid 10692] getdents(11, /* 4 entries */, 32768) = 128
[pid 10692] lstat("/home/byang//.config/menus/applications.menu", {st_mode=S_IFREG|0644, st_size=233, ...}) = 0
10688 is the parent thread; 10689,10691,10692 are the child threads that iterating the directories.
It seems that 10692 are waiting the reply from fanotify?
It's a bug of Kernel's fanotify.
I posted a patch to Linux-Kernel:
When multiple threadsiterate the same direcotry, some thread will hang.
This patch let fanotify differentiate access events from different
threads, prevent fanotify from merging access events from different
threads.
http://marc.info/?l=linux-kernel&m=131822913806350&w=2
-------------------------------
diff -r -u linux-3.1-rc4_orig/fs/notify/fanotify/fanotify.c
linux-3.1-rc4/fs/notify/fanotify/fanotify.c
--- linux-3.1-rc4_orig/fs/notify/fanotify/fanotify.c 2011-08-29
12:16:01.000000000 +0800
+++ linux-3.1-rc4/fs/notify/fanotify/fanotify.c 2011-10-10
12:28:23.276847000 +0800
## -15,7 +15,8 ##
if (old->to_tell == new->to_tell &&
old->data_type == new->data_type &&
- old->tgid == new->tgid) {
+ old->tgid == new->tgid &&
+ old->pid == new->pid) {
switch (old->data_type) {
case (FSNOTIFY_EVENT_PATH):
if ((old->path.mnt == new->path.mnt) &&
## -144,11 +145,19 ##
return PTR_ERR(notify_event);
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
- if (event->mask & FAN_ALL_PERM_EVENTS) {
- /* if we merged we need to wait on the new event */
- if (notify_event)
- event = notify_event;
- ret = fanotify_get_response_from_access(group, event);
+ //if overflow, do not wait for response
+ if(fsnotify_isoverflow(event))
+ {
+ pr_debug("fanotify overflow!\n");
+ }
+ else
+ {
+ if (event->mask & FAN_ALL_PERM_EVENTS) {
+ /* if we merged we need to wait on the new event */
+ if (notify_event)
+ event = notify_event;
+ ret = fanotify_get_response_from_access(group, event);
+ }
}
#endif
diff -r -u linux-3.1-rc4_orig/fs/notify/notification.c
linux-3.1-rc4/fs/notify/notification.c
--- linux-3.1-rc4_orig/fs/notify/notification.c 2011-08-29
12:16:01.000000000 +0800
+++ linux-3.1-rc4/fs/notify/notification.c 2011-10-10 12:27:09.331787000 +0800
## -95,6 +95,7 ##
BUG_ON(!list_empty(&event->private_data_list));
kfree(event->file_name);
+ put_pid(event->pid);
put_pid(event->tgid);
kmem_cache_free(fsnotify_event_cachep, event);
}
## -132,6 +133,14 ##
return priv;
}
+bool fsnotify_isoverflow(struct fsnotify_event *event)
+{
+ if(event==q_overflow_event)
+ {
+ return true;
+ }
+ return false;
+}
/*
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. If the event is successfully added to the
## -374,6 +383,7 ##
return NULL;
}
}
+ event->pid = get_pid(old_event->pid);
event->tgid = get_pid(old_event->tgid);
if (event->data_type == FSNOTIFY_EVENT_PATH)
path_get(&event->path);
## -417,6 +427,7 ##
event->name_len = strlen(event->file_name);
}
+ event->pid = get_pid(task_pid(current));
event->tgid = get_pid(task_tgid(current));
event->sync_cookie = cookie;
event->to_tell = to_tell;
diff -r -u linux-3.1-rc4_orig/include/linux/fsnotify_backend.h
linux-3.1-rc4/include/linux/fsnotify_backend.h
--- linux-3.1-rc4_orig/include/linux/fsnotify_backend.h 2011-08-29
12:16:01.000000000 +0800
+++ linux-3.1-rc4/include/linux/fsnotify_backend.h 2011-10-10
12:27:48.587369000 +0800
## -238,6 +238,7 ##
u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
const unsigned char *file_name;
size_t name_len;
+ struct pid *pid;
struct pid *tgid;
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
## -378,6 +379,8 ##
struct fsnotify_event_private_data *priv,
struct fsnotify_event *(*merge)(struct list_head *,
struct fsnotify_event *));
+/*true if the event is an overflow event*/
+extern bool fsnotify_isoverflow(struct fsnotify_event *event);
/* true if the group notification queue is empty */
extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
/* return, but do not dequeue the first event on the notification queue */
I am trying to ptrace a vsftpd server process on linux to be able to get control whenever vsftpd process makes a system call. I start the vsftpd process and pass this process id as command line to the following program which traces vsftpd.
however, when I run the following program it just hangs and does not print anything.Can anyone point out what could be wrong? Thanks a lot for your help!!
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <linux/user.h>
#include <sys/syscall.h> /* For SYS_write etc */
#include<sys/reg.h>
int main(int argc,char* argv[])
{ pid_t child;
long orig_eax, eax;
long params[3];
int status;
int insyscall = 0;
child = atoi(argv[1]);
ptrace(PTRACE_ATTACH,child,NULL,NULL);
while(1) {
wait(&status);
if(WIFEXITED(status))
break;
orig_eax = ptrace(PTRACE_PEEKUSER,
child, 4 * ORIG_EAX, NULL);
if(orig_eax == __NR_clone || orig_eax == __NR_open || orig_eax == __NR_write)
{
if(insyscall == 0) {
/* Syscall entry */
insyscall = 1;
params[0] = ptrace(PTRACE_PEEKUSER,
child, 4 * EBX,
NULL);
params[1] = ptrace(PTRACE_PEEKUSER,
child, 4 * ECX,
NULL);
params[2] = ptrace(PTRACE_PEEKUSER,
child, 4 * EDX,
NULL);
if(orig_eax == __NR_clone)
{
printf("\nClone");
}
else if(orig_eax == __NR_open)
printf("\nOpen");
else if(orig_eax == __NR_write)
printf("\nWrite");
printf(" called with "
"%ld, %ld, %ld\n",
params[0], params[1],
params[2]);
}
else { /* Syscall exit */
eax = ptrace(PTRACE_PEEKUSER,
child, 4 * EAX, NULL);
printf("Returned "
"with %ld\n", eax);
insyscall = 0;
}
}
ptrace(PTRACE_SYSCALL,
child, NULL, NULL);
}
return 0;
}
You need to have the privilege to trace VSFTPD. Run this as root. To test, put the result of ptrace(PTRACE_ATTACH,child,NULL,NULL); into a variable and print it, ie.
long result = ptrace(PTRACE_ATTACH,child,NULL,NULL);
printf("%ld",result);
On my system if result == -1, I do not have permission. If result == 0, I do.