Join network namespace from inside user namespace - linux

Root creates a network namespace testns, and clone with CLONE_NEWUSER flag to get the child inside an user namespace. Then the child tries to join testns by calling setns, which shows the permission denied. Is there any alternative to let the child inside an user namespace to join a network namespace?
Just wrote up this following test: (Before running, I created testns by calling sudo ip netns add testns)
#define _GNU_SOURCE
#include <sched.h>
#include <stdlib.h>
#include <signal.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE];
static void update_map(char *mapping, char *map_file) {
int fd, j;
size_t map_len;
map_len = strlen(mapping);
for (j = 0; j < map_len; j++)
if (mapping[j] == ',')
mapping[j] = '\n';
fd = open(map_file, O_RDWR);
if (fd == -1) {
fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
if (write(fd, mapping, map_len) != map_len) {
fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
close(fd);
}
static void proc_setgroups_write(pid_t child_pid, char *str) {
char setgroups_path[PATH_MAX];
int fd;
snprintf(setgroups_path, PATH_MAX, "/proc/%ld/setgroups",
(long) child_pid);
fd = open(setgroups_path, O_RDWR);
if (fd == -1) {
if (errno != ENOENT)
fprintf(stderr, "ERROR: open %s: %s\n", setgroups_path,
strerror(errno));
return;
}
if (write(fd, str, strlen(str)) == -1)
fprintf(stderr, "ERROR: write %s: %s\n", setgroups_path,
strerror(errno));
close(fd);
}
static void update_userns(pid_t pid, char *uidMap, char *gidMap) {
char map_path[PATH_MAX];
snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map", (long) pid);
update_map(uidMap, map_path);
proc_setgroups_write(pid, "deny");
snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map", (long) pid);
update_map(gidMap, map_path);
}
static int join_netns(char *netns_name) {
char netns_path[256];
snprintf(netns_path, sizeof(netns_path), "/var/run/netns/%s", netns_name);
int fd = open(netns_path, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "open netns path failed: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
if (setns(fd, CLONE_NEWNET) == -1) {
fprintf(stderr, "set netns failed: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
return 0;
}
static int child(void* arg) {
// Sleep so that userns has the correct mapping.
sleep(1);
return join_netns("testns");
}
int main(int argc, char *argv[]) {
uid_t uid = getuid();
char mapping[10];
snprintf(mapping, 10, "0 %d 1", uid);
int pid1 = clone(child, child_stack + STACK_SIZE, CLONE_NEWUSER | SIGCHLD, NULL);
if (pid1 == -1) {
perror("Clone");
exit(EXIT_FAILURE);
}
update_userns(pid1, mapping, mapping);
if (waitpid(pid1, NULL, 0) == -1) {
perror("waitpid");
exit(EXIT_FAILURE);
}
}

The kernel will do a security check that the userns "owner"/creator of the network namespace matches before allowing a join to an existing network namespace.
You can definitely join a network namespace once inside a user namespace, but that initial network namespace must be created with the same user namespace. In container runtimes like the runc tool, you can validate this by starting a simple container in a user namespace with a network namespace created, and then start a second container with references to the first container's user and network namespace paths. I demo'd this using runc at a previous DockerCon; you can see me sharing the user namespace and network namespace in this segment starting at 41:24

Related

Using eBPF to measure CPU mode switch overhead incured by making system call

As title, but the measurement result is unreasonable. Let me describe the current status.
I'm using syscall getuid as measurement target, I started by measureing the complete overhead with two clock_gettime bounded around, then measure the entry (what SYSCALL instruction does before executing the actual getuid code) and leaving overhead saparately (with eBPF program hook onto the entry and leaving point).
The result for the complete overhead is ~65ns, and regarding to the entry and leaving overhead, it's ~77ns and ~70ns respectively.
It's obvious that my measurement has some additional overhead except the typical overhead. However, it's weird that since clock_gettime is a vDSO syscall, it should barely have noticeable overhead. And BPF, which is a lightweight instrumental tool (JIT-ed and etc.) these day in Linux, shouldn't have noticeable overhead too.
Is there anyone have idea what additional overhead my measurement incurs?
Following is my measurement code:
userland (measuring the return-from-kernel overhead):
#define _GNU_SOURCE
#include <bpf.h>
#include <libbpf.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <string.h>
#include <asm/errno.h>
#include <linux/if_link.h>
#include <errno.h>
#include <sys/resource.h>
#include <unistd.h>
#include <asm/unistd.h>
#include <time.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <sched.h>
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#define TEST_CNT 1000000
#define BPF_FILE_NAME "mkern.o"
#define BPF_MAP_NAME "msys"
static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
int cpu, int group_fd,
unsigned long flags)
{
attr->size = sizeof(*attr);
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
}
static int attach_kprobe(int prog_fd)
{
int err, fd, id;
char buf[32];
struct perf_event_attr attr = {};
err = system("echo 'r:kp_sys_batch __x64_sys_getuid' > /sys/kernel/debug/tracing/kprobe_events");
if (err < 0) {
fprintf(stderr, "Failed to create kprobe, error '%s'\n", strerror(errno));
return -1;
}
fd = open("/sys/kernel/debug/tracing/events/kprobes/kp_sys_batch/id", O_RDONLY, 0);
if (fd < 0) {
fprintf(stderr, "Failed to open event %s\n", "sys_batch");
return -1;
}
err = read(fd, buf, sizeof(buf));
if (err < 0 || err >= sizeof(buf)) {
fprintf(stderr, "read from '%s' failed '%s'\n", "sys_batch", strerror(errno));
return -1;
}
close(fd);
buf[err] = 0;
id = atoi(buf);
attr.config = id;
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
fd = sys_perf_event_open(&attr, 0/*this process*/, -1/*any cpu*/, -1/*group leader*/, 0);
if (fd < 0) {
perror("sys_perf_event_open");
fprintf(stderr, "Failed to open perf_event (id: %llu)\n", attr.config);
return -1;
}
err = ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
if (err < 0) {
fprintf(stderr, "ioctl PERF_EVENT_IOC_ENABLE failed err %s\n",
strerror(errno));
return -1;
}
err = ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (err < 0) {
fprintf(stderr, "ioctl PERF_EVENT_IOC_SET_BPF failed: %s\n",
strerror(errno));
return -1;
}
return 0;
}
static void maxi_memlock_rlimit(void)
{
struct rlimit rlim_new = {
.rlim_cur = RLIM_INFINITY,
.rlim_max = RLIM_INFINITY,
};
if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) {
fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n");
exit(-1);
}
}
static int find_map_fd(struct bpf_object *bpf_obj, const char *mapname)
{
struct bpf_map *map;
int map_fd = -1;
map = bpf_object__find_map_by_name(bpf_obj, mapname);
if (!map) {
fprintf(stderr, "Failed finding map by name: %s\n", mapname);
exit(-1);
}
map_fd = bpf_map__fd(map);
return map_fd;
}
int main(int argc, char **argv)
{
int bpf_map_fd;
int bpf_prog_fd = -1;
int err;
int key = 0;
struct timespec tp;
struct bpf_object *bpf_obj;
struct reals map;
struct bpf_prog_load_attr xattr = {
.prog_type = BPF_PROG_TYPE_KPROBE,
.file = BPF_FILE_NAME,
};
maxi_memlock_rlimit();
err = bpf_prog_load_xattr(&xattr, &bpf_obj, &bpf_prog_fd);
if (err) {
fprintf(stderr, "Failed loading bpf object file\n");
exit(-1);
}
if (attach_kprobe(bpf_prog_fd)) {
fprintf(stderr, "Failed attaching kprobe\n");
exit(-1);
}
bpf_map_fd = find_map_fd(bpf_obj, BPF_MAP_NAME);
if (find_map_fd < 0) {
fprintf(stderr, "Failed finding map fd\n");
exit(-1);
}
/* warm up */
for (int i = 0; i < TEST_CNT; i++) {
syscall(__NR_getuid); /* dummy call */
clock_gettime(CLOCK_MONOTONIC, &tp);
if (unlikely(bpf_map_lookup_elem(bpf_map_fd, &key, &map))) {
fprintf(stderr, "Failed to lookup map element\n");
perror("lookup");
exit(-1);
}
}
uint64_t delta = 0;
for (int i = 0; i < TEST_CNT; i++) {
syscall(__NR_getuid); /* dummy call */
clock_gettime(CLOCK_MONOTONIC, &tp);
if (unlikely(bpf_map_lookup_elem(bpf_map_fd, &key, &map))) {
fprintf(stderr, "Failed to lookup map element\n");
perror("lookup");
exit(-1);
}
delta += (1000000000 * tp.tv_sec + tp.tv_nsec) - map.ts;
}
printf("avg: %fns\n", (double) delta / TEST_CNT);
return 0;
}
user land (measuring the enter-kernel overhead, almost same as the above, except what I pointed out):
err = system("echo 'p:kp_sys_batch sys_batch' > /sys/kernel/debug/tracing/kprobe_events");
...
clock_gettime(CLOCK_MONOTONIC, &tp);
syscall(__NR_getuid); /* dummy call */
...
delta += map.ts - (1000000000 * tp.tv_sec + tp.tv_nsec);
kernel land:
SEC("getuid")
int kp_sys_batch(struct pt_regs *ctx)
{
__u32 i = 0;
struct reals *r;
r = bpf_map_lookup_elem(&reals, &i);
if (!r)
return 1;
r->ts = bpf_ktime_get_ns();
return 0;
}
Except the additional overhead I mentioned above, inside the return-from-kernel measurement code, if the echo 'r:kp_sys_batch sys_batch' is changed to echo 'p:kp_sys_batch sys_batch' (which means that the measurement would take the syscall execution overhead into account), the result would be ~48ns, this means that the result includes overhead of syscall execution and return-from-kernel. Any idea why this could be only ~48ns?
Thanks!

QProcess outputs with delay threads' stdout/stderr of multithreaded application

My platform:
Ubuntu 17.10 32-bit (Vbox VM)
Qt Creator 3.5.1 (opensource)
Qt 5.5.1 (GCC 4.9.1 20140922 (Red Hat 4.9.1-10), 32 bit
I am trying to invoke a multithreaded program (with arguments) using QProcess.start().
My program runs fine on terminal, i.e. periodically prints on stdout.
Using a TextEdit to log the stdout/stderr of the program I have connected QProcess readyReadStandardOutput/Error signals.
The stdout/stderr that comes from the main thread of the program is correctly shown on the TextEdit, the rest of the output (the one from all the other threads) is not shown.
EDIT
On the main thread an HTTP server is listening.
If a HTTP request is performed by browser at the url "127.0.0.1:32001" (port 32001 is hard coded in the QT code), when the HTTP request is received the program appends the HTTP packet and all the pending output from the other thread (moduleThread) to the TextEdit, so it could be a problem of flushing.
main.c
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <pthread.h>
#include <semaphore.h>
#include <sys/time.h>
#include <sys/signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "http_server.h"
static pthread_t moduleThr;
static pthread_attr_t moduleThread_attr;
static bool one_second_elapsed;
static sem_t oneSecondSem;
void *moduleThread(void *arg0)
{
bool one_second_elapsed_local;
while (1)
{
sem_wait(&oneSecondSem);
one_second_elapsed_local = one_second_elapsed;
one_second_elapsed = false;
sem_post(&oneSecondSem);
if (one_second_elapsed_local)
fprintf(stdout, "Hello world each second!\r\n");
usleep(50000);
}
}
static void oneSecElapsed(int signum)
{
sem_wait(&oneSecondSem);
one_second_elapsed = true;
sem_post(&oneSecondSem);
}
static void TIMER_1sec_init(void)
{
struct sigaction sa;
struct itimerval timer;
/* Install timer_handler as the signal handler for SIGALRM. */
memset (&sa, 0, sizeof (sa));
sa.sa_handler = &oneSecElapsed;
sigaction (SIGALRM, &sa, NULL);
/* Configure the timer to expire after 1 sec... */
timer.it_value.tv_sec = 1;
timer.it_value.tv_usec = 0;
/* ... and every 1 sec after that... */
timer.it_interval.tv_sec = 1;
timer.it_interval.tv_usec = 0;
/* Start a virtual timer. It counts down whenever this process is
executing. */
setitimer (ITIMER_REAL, &timer, NULL);
}
/*
* ======== main ========
*/
int main(int argc, char *argv[])
{
const char *tcpport;
if (argc > 2)
{
fprintf(stdout, "id = %s\r\n", argv[1]);
fprintf(stdout, "tcpport = %s\r\n", argv[2]);
tcpport = argv[2];
}
else
exit(-1);
sem_init(&oneSecondSem, 0, 1);
one_second_elapsed = false;
/* Create thread quadro manager */
pthread_attr_init(&moduleThread_attr);
pthread_attr_setstacksize(&moduleThread_attr, 2048);
pthread_create(&moduleThr, &moduleThread_attr, moduleThread, NULL);
TIMER_1sec_init();
HTTPSERVER_init(tcpport);
/* should never return */
return (0);
}
http_server.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* memset() */
#include <stdbool.h>
#include <signal.h>
#include <sys/socket.h>
#include <sys/signal.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <netdb.h>
#include "module.h"
typedef struct { char *name, *value; } header_t;
static header_t reqhdr[17] = { {"\0", "\0"} };
//#define PORT "32001" /* Port to listen on */
#define BACKLOG 10 /* Passed to listen() */
static char *buf;
static char *method; // "GET" or "POST"
static char *uri; // "/index.html" things before '?'
static char *qs; // "a=1&b=2" things after '?'
static char *prot; // "HTTP/1.1"
static char *payload; // for POST
static int payload_size;
// get request header
char *request_header(const char* name)
{
header_t *h = reqhdr;
while(h->name) {
if (strcmp(h->name, name) == 0) return h->value;
h++;
}
return NULL;
}
void handle(int newsock)
{
//static int count = 0;
/* recv(), send(), close() */
int rcvd;
buf = malloc(65535);
rcvd=recv(newsock, buf, 65535, 0);
if (rcvd<0) // receive error
fprintf(stderr,("recv() error\n"));
else if (rcvd==0) // receive socket closed
fprintf(stderr,"Client disconnected unexpectedly.\n");
else // message received
{
buf[rcvd] = '\0';
//fputs(buf, stdout);
//fprintf(stdout, "count = %d\n", count);
method = strtok(buf, " \t\r\n");
uri = strtok(NULL, " \t");
prot = strtok(NULL, " \t\r\n");
fprintf(stderr, "\x1b[32m + [%s] %s\x1b[0m\n", method, uri);
if ((qs = strchr(uri, '?')))
{
*qs++ = '\0'; //split URI
} else {
qs = uri - 1; //use an empty string
}
header_t *h = reqhdr;
char *t = (char *)reqhdr;
char *t2;
while(h < reqhdr+16) {
char *k,*v;
k = strtok(NULL, "\r\n: \t");
if (!k)
break;
v = strtok(NULL, "\r\n");
while(*v && *v==' ')
v++;
h->name = k;
h->value = v;
h++;
fprintf(stderr, "[H] %s: %s\n", k, v);
t = v + 1 + strlen(v);
if (t[1] == '\r' && t[2] == '\n')
break;
}
t+=2;
t++; // now the *t shall be the beginning of user payload
t2 = request_header("content-length"); // and the related header if there is
payload = t;
payload_size = t2 ? atol(t2) : (rcvd-(t-buf));
fprintf(stdout, "-- payload len = %d >", payload_size);
fputs(payload, stdout);
fprintf(stdout, "<\r\n");
if (strcmp(method, "GET") == 0)
{
fprintf(stdout, "\nit's a GET!\r\n");
}
else if (strcmp(method, "POST") == 0)
{
fprintf(stdout, "\nit's a POST!\r\n");
}
sprintf(buf, "HTTP/1.1 200 OK\r\n\r\n");
send(newsock, buf, strlen(buf), 0);
close(newsock);
free(buf);
}
}
int HTTPSERVER_init(const char *tcpport)
{
int sock;
struct addrinfo hints, *res;
int reuseaddr = 1; /* True */
sigset_t sigset, oldset;
sigfillset(&sigset);
pthread_sigmask(SIG_BLOCK, &sigset, &oldset);
/* Get the address info */
memset(&hints, 0, sizeof hints);
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
if (getaddrinfo(NULL, tcpport, &hints, &res) != 0) {
perror("getaddrinfo");
return 1;
}
/* Create the socket */
sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (sock == -1) {
perror("socket");
return 1;
}
/* Enable the socket to reuse the address */
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int)) == -1) {
perror("setsockopt");
return 1;
}
/* Bind to the address */
if (bind(sock, res->ai_addr, res->ai_addrlen) == -1) {
perror("bind");
return 1;
}
/* Listen */
if (listen(sock, BACKLOG) == -1) {
perror("listen");
return 1;
}
freeaddrinfo(res);
/* Main loop */
while (1) {
printf("waiting on accept\n");
fflush(stdout);
socklen_t size = sizeof(struct sockaddr_in);
struct sockaddr_in their_addr;
int newsock = accept(sock, (struct sockaddr*)&their_addr, &size);
if (newsock == -1) {
perror("accept");
}
else {
printf("Got a connection from %s on port %d\n",
inet_ntoa(their_addr.sin_addr), htons(their_addr.sin_port));
handle(newsock);
}
}
close(sock);
return 0;
}
As pointed out by #eyllanesc, the pending strings "Hello world each second!\r\n" come out eventually after several seconds.
The moduleThread shoud be a thread and not a different process: this is verified by commadn "ps aux" that shows only "../program/testApp test 32001".
Could any of you kindly give me a hint on what is going on here please?
Thanks for your time,
Francesco

write_proc is not invoked when written from userspace

I am trying to understand procfs for communication between userspace and kernel module. My module has basic two functions for procfs write_proc, driver_mmap.
I call multiple times write_proc by calling fputs("123456789",fd). where fd is file descriptor to procfs entry in /proc directory. But I don't see write_proc called multiple time. Code is attached by here.
<code>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <sound/core.h>
#include <sound/initval.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/mm.h> /* mmap related stuff */
#define BUF_SIZE 64 * 1024
int *MmapBuffer;
int Factor = 1;
static int write_proc(struct file *filp, int *buf, size_t count, loff_t *offp)
{
int rc,i;
printk("in Write \n");
for (i = 1; i <= 16*1024 ; i++)
MmapBuffer[i-1] = (i+1)*Factor;
Factor++;
return count;
}
static int driver_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret;
vma->vm_flags |= VM_LOCKED|VM_SHARED;
ret = remap_pfn_range(vma, vma->vm_start,
virt_to_phys(MmapBuffer) >> PAGE_SHIFT,
vma->vm_end-vma->vm_start, vma->vm_page_prot);
if(ret != 0)
printk("MMAP Failed \n");
SetPageReserved(virt_to_page(MmapBuffer));
printk("MMAP Succeeded \n");
return 0;
}
// file operations
struct file_operations proc_fops =
{
.write = write_proc,
.mmap = driver_mmap,
};
// init module
int init_module_test(void)
{
printk("<1>Hello world\n");
MmapBuffer = kzalloc(BUF_SIZE,__GFP_COLD|GFP_DMA);
if(MmapBuffer == NULL)
printk("Kzalloc failed. reduce buffer size \n");
proc_create ("Test_fs",0,NULL, &proc_fops);
return 0;
}
// exit module
void cleanup_module_test(void)
{
kfree(MmapBuffer);
remove_proc_entry ("Test_fs", NULL);
printk("Goodbye world\n");
}
module_init(init_module_test);
module_exit(cleanup_module_test);
MODULE_LICENSE("GPL");
</code>
Application code
<code>
#include<stdio.h>
#include<stdlib.h>
#include<sys/mman.h>
#include<errno.h>
#include <fcntl.h>
int main(void)
{
int fd;
int i,j;
int *msg ;
printf("Allocation started \n ");
msg=(int*)malloc(64*1024);
if(msg == NULL)
printf("Allocation failed \n");
//unsigned int *addr;
printf("Starting opening \n ");
if((fd=open("/proc/Test_fs", O_RDONLY ))<0)
{
printf("File not opened ");
}
printf("Starting mapping \n ");
msg = mmap(NULL, 64*1024, PROT_READ, MAP_SHARED , fd, 0);
printf("done from module \n ");
if(msg == MAP_FAILED)
{
printf("MAP failed and error is %s", strerror(errno));
return 0;
}
close(fd);
printf("Successful mapping");
FILE *f;
f=fopen("/proc/Test_fs", "wr");
if(!f)
{
printf("File not opened ");
}
for (j = 0; j < 10 ; j++)
{
if(fputs("1234567890,",f) <= 0)
printf("write failed, ");
for (i = 0; i < 16*1024 ; i++)
printf("%d, ", msg[i]);
printf("\n \n done \n \n ");
}
fclose(f);
return 0;
}
</code>

How can i make sure that only a single instance of the process on Linux? [duplicate]

What would be your suggestion in order to create a single instance application, so that only one process is allowed to run at a time? File lock, mutex or what?
A good way is:
#include <sys/file.h>
#include <errno.h>
int pid_file = open("/var/run/whatever.pid", O_CREAT | O_RDWR, 0666);
int rc = flock(pid_file, LOCK_EX | LOCK_NB);
if(rc) {
if(EWOULDBLOCK == errno)
; // another instance is running
}
else {
// this is the first instance
}
Note that locking allows you to ignore stale pid files (i.e. you don't have to delete them). When the application terminates for any reason the OS releases the file lock for you.
Pid files are not terribly useful because they can be stale (the file exists but the process does not). Hence, the application executable itself can be locked instead of creating and locking a pid file.
A more advanced method is to create and bind a unix domain socket using a predefined socket name. Bind succeeds for the first instance of your application. Again, the OS unbinds the socket when the application terminates for any reason. When bind() fails another instance of the application can connect() and use this socket to pass its command line arguments to the first instance.
Here is a solution in C++. It uses the socket recommendation of Maxim. I like this solution better than the file based locking solution, because the file based one fails if the process crashes and does not delete the lock file. Another user will not be able to delete the file and lock it. The sockets are automatically deleted when the process exits.
Usage:
int main()
{
SingletonProcess singleton(5555); // pick a port number to use that is specific to this app
if (!singleton())
{
cerr << "process running already. See " << singleton.GetLockFileName() << endl;
return 1;
}
... rest of the app
}
Code:
#include <netinet/in.h>
class SingletonProcess
{
public:
SingletonProcess(uint16_t port0)
: socket_fd(-1)
, rc(1)
, port(port0)
{
}
~SingletonProcess()
{
if (socket_fd != -1)
{
close(socket_fd);
}
}
bool operator()()
{
if (socket_fd == -1 || rc)
{
socket_fd = -1;
rc = 1;
if ((socket_fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
{
throw std::runtime_error(std::string("Could not create socket: ") + strerror(errno));
}
else
{
struct sockaddr_in name;
name.sin_family = AF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
rc = bind (socket_fd, (struct sockaddr *) &name, sizeof (name));
}
}
return (socket_fd != -1 && rc == 0);
}
std::string GetLockFileName()
{
return "port " + std::to_string(port);
}
private:
int socket_fd = -1;
int rc;
uint16_t port;
};
For windows, a named kernel object (e.g. CreateEvent, CreateMutex). For unix, a pid-file - create a file and write your process ID to it.
You can create an "anonymous namespace" AF_UNIX socket. This is completely Linux-specific, but has the advantage that no filesystem actually has to exist.
Read the man page for unix(7) for more info.
Avoid file-based locking
It is always good to avoid a file based locking mechanism to implement the singleton instance of an application. The user can always rename the lock file to a different name and run the application again as follows:
mv lockfile.pid lockfile1.pid
Where lockfile.pid is the lock file based on which is checked for existence before running the application.
So, it is always preferable to use a locking scheme on object directly visible to only the kernel. So, anything which has to do with a file system is not reliable.
So the best option would be to bind to a inet socket. Note that unix domain sockets reside in the filesystem and are not reliable.
Alternatively, you can also do it using DBUS.
It's seems to not be mentioned - it is possible to create a mutex in shared memory but it needs to be marked as shared by attributes (not tested):
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_t *mutex = shmat(SHARED_MEMORY_ID, NULL, 0);
pthread_mutex_init(mutex, &attr);
There is also shared memory semaphores (but I failed to find out how to lock one):
int sem_id = semget(SHARED_MEMORY_KEY, 1, 0);
No one has mentioned it, but sem_open() creates a real named semaphore under modern POSIX-compliant OSes. If you give a semaphore an initial value of 1, it becomes a mutex (as long as it is strictly released only if a lock was successfully obtained).
With several sem_open()-based objects, you can create all of the common equivalent Windows named objects - named mutexes, named semaphores, and named events. Named events with "manual" set to true is a bit more difficult to emulate (it requires four semaphore objects to properly emulate CreateEvent(), SetEvent(), and ResetEvent()). Anyway, I digress.
Alternatively, there is named shared memory. You can initialize a pthread mutex with the "shared process" attribute in named shared memory and then all processes can safely access that mutex object after opening a handle to the shared memory with shm_open()/mmap(). sem_open() is easier if it is available for your platform (if it isn't, it should be for sanity's sake).
Regardless of the method you use, to test for a single instance of your application, use the trylock() variant of the wait function (e.g. sem_trywait()). If the process is the only one running, it will successfully lock the mutex. If it isn't, it will fail immediately.
Don't forget to unlock and close the mutex on application exit.
It will depend on which problem you want to avoid by forcing your application to have only one instance and the scope on which you consider instances.
For a daemon — the usual way is to have a /var/run/app.pid file.
For user application, I've had more problems with applications which prevented me to run them twice than with being able to run twice an application which shouldn't have been run so. So the answer on "why and on which scope" is very important and will probably bring answer specific on the why and the intended scope.
Here is a solution based on sem_open
/*
*compile with :
*gcc single.c -o single -pthread
*/
/*
* run multiple instance on 'single', and check the behavior
*/
#include <stdio.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>
#include <errno.h>
#define SEM_NAME "/mysem_911"
int main()
{
sem_t *sem;
int rc;
sem = sem_open(SEM_NAME, O_CREAT, S_IRWXU, 1);
if(sem==SEM_FAILED){
printf("sem_open: failed errno:%d\n", errno);
}
rc=sem_trywait(sem);
if(rc == 0){
printf("Obtained lock !!!\n");
sleep(10);
//sem_post(sem);
sem_unlink(SEM_NAME);
}else{
printf("Lock not obtained\n");
}
}
One of the comments on a different answer says "I found sem_open() rather lacking". I am not sure about the specifics of what's lacking
Based on the hints in maxim's answer here is my POSIX solution of a dual-role daemon (i.e. a single application that can act as daemon and as a client communicating with that daemon). This scheme has the advantage of providing an elegant solution of the problem when the instance started first should be the daemon and all following executions should just load off the work at that daemon. It is a complete example but lacks a lot of stuff a real daemon should do (e.g. using syslog for logging and fork to put itself into background correctly, dropping privileges etc.), but it is already quite long and is fully working as is. I have only tested this on Linux so far but IIRC it should be all POSIX-compatible.
In the example the clients can send integers passed to them as first command line argument and parsed by atoi via the socket to the daemon which prints it to stdout. With this kind of sockets it is also possible to transfer arrays, structs and even file descriptors (see man 7 unix).
#include <stdio.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <sys/socket.h>
#include <sys/un.h>
#define SOCKET_NAME "/tmp/exampled"
static int socket_fd = -1;
static bool isdaemon = false;
static bool run = true;
/* returns
* -1 on errors
* 0 on successful server bindings
* 1 on successful client connects
*/
int singleton_connect(const char *name) {
int len, tmpd;
struct sockaddr_un addr = {0};
if ((tmpd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) {
printf("Could not create socket: '%s'.\n", strerror(errno));
return -1;
}
/* fill in socket address structure */
addr.sun_family = AF_UNIX;
strcpy(addr.sun_path, name);
len = offsetof(struct sockaddr_un, sun_path) + strlen(name);
int ret;
unsigned int retries = 1;
do {
/* bind the name to the descriptor */
ret = bind(tmpd, (struct sockaddr *)&addr, len);
/* if this succeeds there was no daemon before */
if (ret == 0) {
socket_fd = tmpd;
isdaemon = true;
return 0;
} else {
if (errno == EADDRINUSE) {
ret = connect(tmpd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un));
if (ret != 0) {
if (errno == ECONNREFUSED) {
printf("Could not connect to socket - assuming daemon died.\n");
unlink(name);
continue;
}
printf("Could not connect to socket: '%s'.\n", strerror(errno));
continue;
}
printf("Daemon is already running.\n");
socket_fd = tmpd;
return 1;
}
printf("Could not bind to socket: '%s'.\n", strerror(errno));
continue;
}
} while (retries-- > 0);
printf("Could neither connect to an existing daemon nor become one.\n");
close(tmpd);
return -1;
}
static void cleanup(void) {
if (socket_fd >= 0) {
if (isdaemon) {
if (unlink(SOCKET_NAME) < 0)
printf("Could not remove FIFO.\n");
} else
close(socket_fd);
}
}
static void handler(int sig) {
run = false;
}
int main(int argc, char **argv) {
switch (singleton_connect(SOCKET_NAME)) {
case 0: { /* Daemon */
struct sigaction sa;
sa.sa_handler = &handler;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGINT, &sa, NULL) != 0 || sigaction(SIGQUIT, &sa, NULL) != 0 || sigaction(SIGTERM, &sa, NULL) != 0) {
printf("Could not set up signal handlers!\n");
cleanup();
return EXIT_FAILURE;
}
struct msghdr msg = {0};
struct iovec iovec;
int client_arg;
iovec.iov_base = &client_arg;
iovec.iov_len = sizeof(client_arg);
msg.msg_iov = &iovec;
msg.msg_iovlen = 1;
while (run) {
int ret = recvmsg(socket_fd, &msg, MSG_DONTWAIT);
if (ret != sizeof(client_arg)) {
if (errno != EAGAIN && errno != EWOULDBLOCK) {
printf("Error while accessing socket: %s\n", strerror(errno));
exit(1);
}
printf("No further client_args in socket.\n");
} else {
printf("received client_arg=%d\n", client_arg);
}
/* do daemon stuff */
sleep(1);
}
printf("Dropped out of daemon loop. Shutting down.\n");
cleanup();
return EXIT_FAILURE;
}
case 1: { /* Client */
if (argc < 2) {
printf("Usage: %s <int>\n", argv[0]);
return EXIT_FAILURE;
}
struct iovec iovec;
struct msghdr msg = {0};
int client_arg = atoi(argv[1]);
iovec.iov_base = &client_arg;
iovec.iov_len = sizeof(client_arg);
msg.msg_iov = &iovec;
msg.msg_iovlen = 1;
int ret = sendmsg(socket_fd, &msg, 0);
if (ret != sizeof(client_arg)) {
if (ret < 0)
printf("Could not send device address to daemon: '%s'!\n", strerror(errno));
else
printf("Could not send device address to daemon completely!\n");
cleanup();
return EXIT_FAILURE;
}
printf("Sent client_arg (%d) to daemon.\n", client_arg);
break;
}
default:
cleanup();
return EXIT_FAILURE;
}
cleanup();
return EXIT_SUCCESS;
}
All credits go to Mark Lakata. I merely did some very minor touch up only.
main.cpp
#include "singleton.hpp"
#include <iostream>
using namespace std;
int main()
{
SingletonProcess singleton(5555); // pick a port number to use that is specific to this app
if (!singleton())
{
cerr << "process running already. See " << singleton.GetLockFileName() << endl;
return 1;
}
// ... rest of the app
}
singleton.hpp
#include <netinet/in.h>
#include <unistd.h>
#include <cerrno>
#include <string>
#include <cstring>
#include <stdexcept>
using namespace std;
class SingletonProcess
{
public:
SingletonProcess(uint16_t port0)
: socket_fd(-1)
, rc(1)
, port(port0)
{
}
~SingletonProcess()
{
if (socket_fd != -1)
{
close(socket_fd);
}
}
bool operator()()
{
if (socket_fd == -1 || rc)
{
socket_fd = -1;
rc = 1;
if ((socket_fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
{
throw std::runtime_error(std::string("Could not create socket: ") + strerror(errno));
}
else
{
struct sockaddr_in name;
name.sin_family = AF_INET;
name.sin_port = htons (port);
name.sin_addr.s_addr = htonl (INADDR_ANY);
rc = bind (socket_fd, (struct sockaddr *) &name, sizeof (name));
}
}
return (socket_fd != -1 && rc == 0);
}
std::string GetLockFileName()
{
return "port " + std::to_string(port);
}
private:
int socket_fd = -1;
int rc;
uint16_t port;
};
#include <windows.h>
int main(int argc, char *argv[])
{
// ensure only one running instance
HANDLE hMutexH`enter code here`andle = CreateMutex(NULL, TRUE, L"my.mutex.name");
if (GetLastError() == ERROR_ALREADY_EXISTS)
{
return 0;
}
// rest of the program
ReleaseMutex(hMutexHandle);
CloseHandle(hMutexHandle);
return 0;
}
FROM: HERE
On Windows you could also create a shared data segment and use an interlocked function to test for the first occurence, e.g.
#include <Windows.h>
#include <stdio.h>
#include <conio.h>
#pragma data_seg("Shared")
volatile LONG lock = 0;
#pragma data_seg()
#pragma comment(linker, "/SECTION:Shared,RWS")
void main()
{
if (InterlockedExchange(&lock, 1) == 0)
printf("first\n");
else
printf("other\n");
getch();
}
I have just written one, and tested.
#define PID_FILE "/tmp/pidfile"
static void create_pidfile(void) {
int fd = open(PID_FILE, O_RDWR | O_CREAT | O_EXCL, 0);
close(fd);
}
int main(void) {
int fd = open(PID_FILE, O_RDONLY);
if (fd > 0) {
close(fd);
return 0;
}
// make sure only one instance is running
create_pidfile();
}
Just run this code on a seperate thread:
void lock() {
while(1) {
ofstream closer("myapplock.locker", ios::trunc);
closer << "locked";
closer.close();
}
}
Run this as your main code:
int main() {
ifstream reader("myapplock.locker");
string s;
reader >> s;
if (s != "locked") {
//your code
}
return 0;
}

Shared variable between different executables in Linux

What I want to do is to create a globally shared variable to be accessed by different processes. I want the child process to be replaced by an existing executable.
UPDATE: I think this is the solution. The code is borrowed from here. But since every process needs at least one I/O operation to mmap the file, is there any faster approach?
mycode.h
static void* addr; //static
app1.cc
include
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/stat.h>
int main(void)
{
size_t length = 1024 * 1024;
off_t offset = 0;
int prot = (PROT_READ| PROT_WRITE);
int flags = MAP_SHARED;
int fd = -1;
fd = open("./jim.mymemory", O_RDWR| O_CREAT, S_IRUSR| S_IWUSR );
if (fd == 0) {
int myerr = errno;
printf("ERROR: open failed (errno %d %s)\n", myerr, strerror(myerr));
return EXIT_FAILURE;
}
addr = mmap(NULL, length, prot, flags, fd, offset);
if (addr == 0) {
int myerr = errno;
printf("ERROR (child): mmap failed (errno %d %s)\n", myerr,
strerror(myerr));
}
*((int *) addr)=5;
if (munmap(addr, length) == -1) {
int myerr = errno;
printf("ERROR (child): munmap failed (errno %d %s)\n", myerr,
strerror(myerr));
}
return 0;
}
mycode.cc
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include "mycode.h"
int main(void) {
size_t length = 1024 * 1024;
off_t offset = 0;
int prot = (PROT_READ| PROT_WRITE);
int flags = MAP_SHARED;
int fd = -1;
pid_t pid;
fd = open("./jim.mymemory", O_RDWR| O_CREAT, S_IRUSR| S_IWUSR );
if (fd == 0) {
int myerr = errno;
printf("ERROR: open failed (errno %d %s)\n", myerr, strerror(myerr));
return EXIT_FAILURE;
}
if (lseek(fd, length - 1, SEEK_SET) == -1) {
int myerr = errno;
printf("ERROR: lseek failed (errno %d %s)\n", myerr, strerror(myerr));
return EXIT_FAILURE;
}
write(fd, "", 1);
if ((pid = fork()) == 0) { // child
/*Child process*/
printf("INFO (child): start \n");
execv("./app1", NULL); // **app1**
printf("INFO (child): done \n");
msync(addr,sizeof(int),MS_SYNC|MS_INVALIDATE); // can be commented out, since we wait in the parent process
} else {
/*Parent process*/
unsigned int readval = 0;
addr = mmap(NULL, length, prot, flags, fd, offset);
if (addr == 0) {
int myerr = errno;
printf("ERROR (parent): mmap failed (errno %d %s)\n", myerr,
strerror(myerr));
}
printf("INFO (parent): start read\n");
wait(NULL);
readval = *((int *) addr);
printf("val: %d \n", readval);
printf("INFO (parent): done read\n");
if (munmap(addr, length) == -1) {
int myerr = errno;
printf("ERROR (parent): munmap failed (errno %d %s)\n", myerr,
strerror(myerr));
}
}
if (close(fd) == -1) {
int myerr = errno;
printf("ERROR: close failed (errno %d %s)\n", myerr, strerror(myerr));
}
unlink ("./jim.mymemory");
return EXIT_SUCCESS;
}
Any help is appreciated.
The execve will drop all mappings in the kernel, so this technique will not work. What you can do instead is open a file (as in Vaughn's suggestion) and pass the descriptor to the child process. Open file descriptors are unchanged across an exec. Then you can map it in the child. Alternatively, investigate APIs like shm_open()/shm_unlink() which will manage a global file mapping such that other processes can use it, not just a child.
But basically: you have to mmap() in the child, you can't pass anything in your address space to the child in Unix.

Resources