Deadlock in MCS lock implementation - multithreading

Hardware:
Darwin Kernel Version 13.2.0: Thu Apr 17 23:03:13 PDT 2014; root:xnu-2422.100.13~1/RELEASE_X86_64 x86_64
atomics.hpp
1 #ifndef ATOMIC_UTILS_H
2 #define ATOMIC_UTILS_H
3
4 #include
5
6 #define BARRIER() __asm__ volatile ( "": : :"memory" )
7
8 #define CPU_RELAX() __asm__ volatile( "pause\n\t": : :"memory" )
9
10 #define STORE_FENCE() __asm__ volatile("mfence" ::: "memory");
11
12 class AtomicUtils
13 {
14 public:
15
16 /**
17 * check if the value at addr is equal to oldval, if so replace it with newva l
18 * and return the oldval
19 */
20 inline static size_t compareAndExchange( volatile size_t* addr, size_t oldval , size_t newval )
21 {
22 size_t ret;
23 __asm__ volatile( "lock cmpxchgq %2, %1\n\t"
24 :"=a"(ret), "+m"(*addr)
25 : "r"(newval), "0"(oldval)
26 : "memory" );
27 return ret;
28 }
29
30 /**
31 * Atomically stores x into addr and returns the previous
32 * stored in addr
33 */
34 inline static size_t loadAndStore( size_t x, volatile size_t* addr )
36 {
37 size_t ret;
38 __asm__ volatile( "lock xchgq %1, %0\n\t"
39 : "+m"(*addr), "=r"(ret)
40 : "1"(x) );
41 return ret;
42 }
43
44 };
45
46 #endif
mcs.hpp
1 #ifndef MCS_LOCK_H
2 #define MCS_LOCK_H
3
4 #include "atomics.hpp"
5 #include
6
7 class MCSLock
8 {
9 struct mcs_lock_t
10 {
11 mcs_lock_t():next(0), locked(false){}
12 struct mcs_lock_t* next;
13 bool locked;
14 };
15
16 public:
17 typedef struct mcs_lock_t mcs_lock;
18
19 private:
20 mcs_lock** tail;
21 static boost::thread_specific_ptr tls_node;
22
23 public:
24 MCSLock( mcs_lock** lock_tail ):tail( lock_tail )
25 {
26 if( tls_node.get() == 0 )
27 tls_node.reset( new mcs_lock() );
28 }
29
30 void lock()
31 {
32 mcs_lock* thread_node = tls_node.get();
33 thread_node->next = 0;
34 thread_node->locked = true;
35
36 volatile mcs_lock* pred = reinterpret_cast(
37 AtomicUtils::loadAndStore(
38 reinterpret_cast( thread_node ),
39 reinterpret_cast( tail )
40 )
41 );
42 if( pred != 0 )
43 {
44 pred->next = *tail;
45
46 STORE_FENCE();
47 //BARRIER(); // Required to prevent re ordering between prev->next = tail and thread_node->locked. ( WR harzard )
48
49 // Spin on a local variable. Someone unlock me plz !!
50 while( thread_node->locked )
51 CPU_RELAX();
52
53 }
54 }
55
56 void unlock()
57 {
58 mcs_lock* thread_node = tls_node.get();
59 if( thread_node->next == 0 )
60 {
61 // If false, then we a new thread has request for lock. Now release t he lock for the new thread
62 if(
63 AtomicUtils::compareAndExchange(
64 reinterpret_cast( tail ),
65 reinterpret_cast( thread_node ),
66 0
67 ) == reinterpret_cast( thread_node ) 68 )
69 {
70 return;
71 }
72
73 while( thread_node->next == 0 )
74 CPU_RELAX();
75 }
76
77 thread_node->next->locked = false;
78 }
79 };
80
81 boost::thread_specific_ptr MCSLock::tls_node;
82 #endif
mcs_test.cpp
1 #include "mcs.hpp"
2 #include <iostream>
3 #include <pthread.h>
4 #include <vector>
5 #define NUM_THREADS 16
6 #define NUM_ITERATIONS 100
7
8 std::vector<int> elements;
9 MCSLock::mcs_lock *tail = 0;
10
11 void* thread_run( void* data )
12 {
13 MCSLock lock( &tail );
14 for( int i = 0; i < NUM_ITERATIONS; ++i )
15 {
16 lock.lock();
17 elements.push_back( i );
18 lock.unlock();
19 }
20
21 return 0;
22 }
23
24 int main()
25 {
26 pthread_t threads[ NUM_THREADS ];
27 elements.reserve( NUM_THREADS * NUM_ITERATIONS );
28
29 {
30 for( int i = 0; i < NUM_THREADS; ++i )
31 pthread_create( &threads[i], NULL, thread_run, NULL );
32
33 for( int i = 0; i < NUM_THREADS; ++i )
34 pthread_join( threads[i], NULL );
35
36 std::cout <<"\nExiting main thread: " << std::endl;
37 }
38 }
The above code is compiled using clang
Problem:
I see that 1 or 2 threads are stuck in lock() in line 50. Except the main threads, the threads which are stuck in lock() there are no other threads alive. This means that when the other threads invoke unlock() they somehow don't set the locked = false for other variables and exit.
Any pointers on debugging this please ?
Stuck on this for many hours and no clues.

Doesn't clang have builtins for these inline-asm blocks (like gcc's __sync_val_compare_and_swap)? Why re-invent the wheel?
Second, I'd really think about adding the memory clobber to loadAndStore. You need to make sure that any writes the compiler is holding in registers gets flushed to memory before doing the xchgq. Similarly it will prevent gcc from optimizing memory reads to before the xchgq. Either would be bad.
Third, I'd examine the asm output for your while loops (thread_node->locked and thread_node->next). Since these variables are not volatile, gcc may optimize this to only perform the read once.
These may not solve your problem, but that's where I'd start.

Related

How to filter Zero division expression when I want to generate random expression

I have written a program to calculate a math expression with four basic operator.Now I want to write a program to generate expressions to test my calculate program,but there is a problem that there may be Zero division expression that will raise error.I tried to kill subprocess when it cause error,but I failed.I don't know how to avoid this problem.
#include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <time.h>
20 #include <assert.h>
21 #include <string.h>
22
23 static char buf[65536] = {};
24 static char code_buf[65536 + 128] = {}; // a little larger than `buf`
25 static char *code_format =
26 "#include <stdio.h>\n"
27 "int main() { "
28
29 " unsigned result = %s; "
30 " printf(\"%%u\", result); "
31 " return 0; "
32 "}";
33
34 static int loc=0;//used in gen_rand_expr()
int main(int argc, char *argv[]) {
183 int seed = time(0);
184 srand(seed);
185 int loop = 1;
186 if (argc > 1) {
187 sscanf(argv[1], "%d", &loop);
188 }
189 int i;
190 for (i = 0; i < loop; i ++) {
191 gen_rand_expr(); //generate random expression
192
193 loc=0;
194 sprintf(code_buf, code_format, buf);
195
196 FILE *fp = fopen("/tmp/.code.c", "w");
197 assert(fp != NULL);
198 fputs(code_buf, fp);
199 fclose(fp);
200
201 int ret = system("gcc /tmp/.code.c -o /tmp/.expr");
202 if (ret != 0) continue;
203 fp = popen("/tmp/.expr", "r");
204 assert(fp != NULL);
205
206 int result;
207 fscanf(fp, "%d", &result);
208 pclose(fp);
209
210 printf("%u %s\n", result, buf);
211 }
212 return 0;
213 }

How to properly use if else statements and while loops with a child process in C

I'm new to C and I've been trying to create a program that takes a user input integer makes a sequence depending on whether the number is even or odd.
n / 2 if n is even
3 * n + 1 if n is odd
A new number will be computed until the sequence reaches 1. For example if a user inputs 35:
35, 106, 53, 160, 80, 40, 20, 10, 5, 16, 8, 4, 2, 1
For some reason my code doesn't work after the scan statement of the child process. I left my code and sample output below:
Code:
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
int main()
{
pid_t pid;
int i = 0;
int j = 0;
/* fork a child process */
pid = fork();
if (pid < 0) { /* error occurred */
fprintf(stderr, "Fork Failed\n");
return 1;
}
else if (pid == 0) { /* child process */
printf("I am the child %d\n",pid);
printf("Enter a value: \n");
scanf("%d", i);
while (i < 0) {
printf("%d is not a positive integer. Please try again.\n", i);
printf("Enter a value: \n");
scanf("%d", i);
}
// can add a print i here
while (i != 1) {
if (i % 2 == 0) { // if the inputted number is even
j = i / 2;
}
else {
j = 3 * i + 1;
}
printf("%d", j);
}
}
else { /* parent process */
/* parent will wait for the child to complete */
printf("I am the parent %d\n",pid);
wait(NULL); // wait(NULL) will wait for the child process to complete and takes the status code of the child process as a parameter
printf("Child Complete\n");
}
return 0;
}
Output I'm getting on terminal in Linux (Debian):
oscreader#OSC:~/osc9e-src/ch3$ gcc newproc-posix.c
oscreader#OSC:~/osc9e-src/ch3$ ./a.out
I am the parent 16040
I am the child 0
Enter a value:
10
Child Complete
oscreader#OSC:~/osc9e-src/ch3$
Transferring comments into a semi-coherent answer.
Your calls to scanf() require a pointer argument; you give it an integer argument. Use scanf("%d", &i); — and it would be a good idea to check that scanf() returns 1 before testing the result.
My compiler told me about your bug. Why didn't your compiler do so too? Make sure you enable every warning you can! Your comment indicates that you're using gcc (or perhaps clang) — I routinely compile with:
gcc -std=c11 -O3 -g -Werror -Wall -Wextra -Wstrict-prototypes …
Indeed, for code from SO, I add -Wold-style-declarations -Wold-style-definitions to make sure functions are declared and defined properly. It's often a good idea to add -pedantic to avoid accidental use of GCC extensions.
In the loop, you don't need j — you should be changing and printing i instead.
cz17.c
#include <stdio.h>
#include <sys/wait.h>
#include <unistd.h>
int main(void)
{
int i = 0;
pid_t pid = fork();
if (pid < 0)
{
fprintf(stderr, "Fork Failed\n");
return 1;
}
else if (pid == 0)
{
printf("I am the child %d\n", pid);
printf("Enter a value: \n");
if (scanf("%d", &i) != 1)
{
fprintf(stderr, "failed to read an integer\n");
return 1;
}
while (i <= 0 || i > 1000000)
{
printf("value %d out of range 1..1000000. Try again.\n", i);
printf("Enter a value: \n");
if (scanf("%d", &i) != 1)
{
fprintf(stderr, "failed to read an integer\n");
return 1;
}
}
while (i != 1)
{
if (i % 2 == 0)
{
i = i / 2;
}
else
{
i = 3 * i + 1;
}
printf(" %d", i);
fflush(stdout);
}
putchar('\n');
}
else
{
printf("I am the parent of %d\n", pid);
int status;
int corpse = wait(&status);
printf("Child Complete (%d - 0x%.4X)\n", corpse, status);
}
return 0;
}
Compilation:
gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes cz17.c -o cz17
Sample output:
$ cz17
I am the parent of 41838
I am the child 0
Enter a value:
2346
1173 3520 1760 880 440 220 110 55 166 83 250 125 376 188 94 47 142 71 214 107 322 161 484 242 121 364 182 91 274 137 412 206 103 310 155 466 233 700 350 175 526 263 790 395 1186 593 1780 890 445 1336 668 334 167 502 251 754 377 1132 566 283 850 425 1276 638 319 958 479 1438 719 2158 1079 3238 1619 4858 2429 7288 3644 1822 911 2734 1367 4102 2051 6154 3077 9232 4616 2308 1154 577 1732 866 433 1300 650 325 976 488 244 122 61 184 92 46 23 70 35 106 53 160 80 40 20 10 5 16 8 4 2 1
Child Complete (41838 - 0x0000)
$

LINUX msgget and queue

I have simple code:
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <errno.h>
4 #include <string.h>
5 #include <sys/types.h>
6 #include <sys/ipc.h>
7 #include <sys/msg.h>
8
9 struct my_msgbuf {
10 long mtype;
11 char mtext[200];
12 };
13
14 int main(void){
15 struct my_msgbuf buf;
16 int msqid;
17 key_t key;
18
19 if((key = ftok("main.c", 'B')) == -1) {
20 perror("ftok"); exit(1);
21 }
22
23 if( (msqid = msgget(key, 0644 | IPC_CREAT)) == -1){
24 perror("msgget");
25 exit(1);
26 }
27
28 printf("Enter lines of text, ^D to quit:\n");
29 buf.mtype = 1;
30 while(fgets(buf.mtext, sizeof(buf.mtext), stdin) != NULL){
31 int len = strlen(buf.mtext);
32 if(buf.mtext[len-1] == '\n') buf.mtext[len-1] = '\0';
33 if(msgsnd(msqid, &buf, len+1,0) == -1){
34 perror("msgsnd");
35 }
36
37 }
38
39 if(msgctl(msqid, IPC_RMID, NULL) == -1){
40 perror("msgctl");
41 exit(1);
42 }
43
44 return 0;
45 }
and next step what i do:
gcc -o main main.c
next step run:
./main
and results:
msgget: no space left on device
how can I repair it? I'm working on university server (connecting by putty), it can be answer about this problem?
According to the man page, ENOSPC is the error for:
A message queue has to be created but the system limit for the maximum number of message queues (MSGMNI) would be exceeded.
If it's a server, it means there's too many message queues created by other users (not unlikely if this is some homework assignment and other students also work on the server). You can see the MSGMNI value with cat /proc/sys/kernel/msgmni. Only the admin can change it however.

PCI driver to fetch MAC address

I was trying to write a pci driver which can display the MAC address of my Ethernet card.
Running a Ubuntu on VM and my Ethernet card is Intel one as follows
00:08.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 02)
I was able to get the data sheet of the same from Intel website and as per data sheet it says IO address are mapped to Bar 2 (Refer to pg 87) and MAC can be read using RAL/RAH register which are at offset RAL (05400h + 8*n; R/W) and RAH (05404h + 8n; R/W)
2 18h IO Register Base Address (bits 31:2) 0b mem
Based on this information, i wrote a small PCI driver but i always get the MAC as fff and when i debugged further, i see io_base address is always zero.
Below is the code
1 /*
2 Program to find a device on the PCI sub-system
3 */
4 #define VENDOR_ID 0x8086
5 #define DEVICE_ID 0x100e
6
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/stddef.h>
10 #include <linux/pci.h>
11 #include <linux/init.h>
12 #include <linux/cdev.h>
13 #include <linux/device.h>
14 #include <asm/io.h>
15
16 #define LOG(string...) printk(KERN_INFO string)
17
18 #define CDEV_MAJOR 227
19 #define CDEV_MINOR 0
20
21
22 MODULE_LICENSE("GPL");
23
24 struct pci_dev *pci_dev;
25 unsigned long mmio_addr;
26 unsigned long reg_len;
27 unsigned long *base_addr;
28
29 int device_probe(struct pci_dev *dev, const struct pci_device_id *id);
30 void device_remove(struct pci_dev *dev);
31
32 struct pci_device_id pci_device_id_DevicePCI[] =
33 {
34 {VENDOR_ID, DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
35 };
36
37 struct pci_driver pci_driver_DevicePCI =
38 {
39 name: "MyPCIDevice",
40 id_table: pci_device_id_DevicePCI,
41 probe: device_probe,
42 remove: device_remove
43 };
44
45
46 int init_module(void)
47 {
48 //struct pci_dev *pdev = NULL;
49 int ret = 0;
50
51 pci_register_driver(&pci_driver_DevicePCI);
52
53 return ret;
54 }
55
56 void cleanup_module(void)
57 {
58 pci_unregister_driver(&pci_driver_DevicePCI);
59
60 }
61
62 #define REGISTER_OFFSET 0x05400
64 int device_probe(struct pci_dev *dev, const struct pci_device_id *id)
65 {
66 int ret;
67 int bar = 2; // Bar to be reserved
68 unsigned long io_base = 0;
69 unsigned long mem_len = 0;
70 unsigned int register_data = 0;
71
72 LOG("Device probed");
73
74 /* Reserve the access to PCI device */
75 ret = pci_request_region(dev, bar, "my_pci");
76 if (ret) {
77 printk(KERN_ERR "request region failed :%d\n", ret);
78 return ret;
79 }
80
81 ret = pci_enable_device(dev);
82 if (ret < 0 ) LOG("Failed while enabling ... ");
83
84 io_base = pci_resource_start(dev, bar);
85 mem_len = pci_resource_len(dev, bar);
86
87 request_region(io_base, mem_len, "my_pci");
88 register_data = inw(io_base + REGISTER_OFFSET);
89 printk(KERN_INFO "IO base = %lx", io_base);
90 printk(KERN_INFO "MAC = %x", register_data);
91
92 return ret;
93 }
94
95 void device_remove(struct pci_dev *dev)
96 {
97 pci_release_regions(dev);
98 pci_disable_device(dev);
99 }
100
lspci -x output of my card
00:08.0 Ethernet controller: Intel Corporation 82540EM Gigabit Ethernet Controller (rev 02)
00: 86 80 0e 10 07 00 30 02 02 00 00 02 00 40 00 00
10: 00 00 82 f0 00 00 00 00 41 d2 00 00 00 00 00 00
20: 00 00 00 00 00 00 00 00 00 00 00 00 86 80 1e 00
30: 00 00 00 00 dc 00 00 00 00 00 00 00 09 01 ff 00
Can any one let me know what am i doing wrong?
I've modified your code and commented on changes. I have removed all of your existing comments to avoid confusion, and have only modified your probe function.
/* We need a place to store a logical address for unmapping later */
static void* logical_address;
int device_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int ret;
int bar_mask; /* BAR mask (this variable) and the integer BAR */
int requested_bar = 2; /* (this variable) are not the same thing, so give them */
/* separate variables */
resource_size_t io_base = 0; /* use kernel macros instead of built-in datatypes */
resource_size_t mem_len = 0;
unsigned int register_data = 0;
LOG("Device probed");
/* add this call to get the correct BAR mask */
bar_mask = pci_select_bars(dev, 0);
/* switched order - enable device before requesting memory */
ret = pci_enable_device(dev);
if (ret < 0 ) LOG("Failed while enabling ... ");
/* for this call, we want to pass the BAR mask, NOT the integer bar we want */
ret = pci_request_region(dev, bar_mask, "my_pci");
if (ret) {
printk(KERN_ERR "request region failed :%d\n", ret);
return ret;
}
/* it is in THESE calls that we request a specific BAR */
io_base = pci_resource_start(dev, requested_bar);
mem_len = pci_resource_len(dev, requested_bar);
/* you don't need to request anything again, so get rid of this line: */
/* request_region(io_base, mem_len, "my_pci"); */
/* you're missing an important step: we need to translate the IO address
* to a kernel logical address that we can actually use. Add a call to
* ioremap()
*/
logical_address = ioremap(io_base, mem_len);
/* we need to use the logical address returned by ioremap(), not the physical
* address returned by resource_start
*/
register_data = inw(logical_address + REGISTER_OFFSET);
printk(KERN_INFO "IO base = %lx", io_base);
printk(KERN_INFO "MAC = %x", register_data);
return ret;
}
You will need to add a corresponding call to iounmap() in your device_remove() routine. Take a look at the Intel E100E driver source code for some good examples.

linux kernel ip_options_build() function

Below is the ip_options_build() in linux kernel 3.4, line 51 and 52:
51 if (opt->srr)
52 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
I understand that the two lines say, if source routing option is present, copy the destination address to the end of the option, that suggests that iph[opt->srr+1] is the length of the source routing option, but I don't get it why?
31/*
32 * Write options to IP header, record destination address to
33 * source route option, address of outgoing interface
34 * (we should already know it, so that this function is allowed be
35 * called only after routing decision) and timestamp,
36 * if we originate this datagram.
37 *
38 * daddr is real destination address, next hop is recorded in IP header.
39 * saddr is address of outgoing interface.
40 */
41
42void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
43 __be32 daddr, struct rtable *rt, int is_frag)
44{
45 unsigned char *iph = skb_network_header(skb);
46
47 memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
48 memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
49 opt = &(IPCB(skb)->opt);
50
51 if (opt->srr)
52 memcpy(iph+opt->srr+iph[opt->srr+1]-4, &daddr, 4);
53
54 if (!is_frag) {
55 if (opt->rr_needaddr)
56 ip_rt_get_source(iph+opt->rr+iph[opt->rr+2]-5, skb, rt);
57 if (opt->ts_needaddr)
58 ip_rt_get_source(iph+opt->ts+iph[opt->ts+2]-9, skb, rt);
59 if (opt->ts_needtime) {
60 struct timespec tv;
61 __be32 midtime;
62 getnstimeofday(&tv);
63 midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC
+ tv.tv_nsec / NSEC_PER_MSEC);
64 memcpy(iph+opt->ts+iph[opt->ts+2]-5, &midtime, 4);
65 }
66 return;
67 }
68 if (opt->rr) {
69 memset(iph+opt->rr, IPOPT_NOP, iph[opt->rr+1]);
70 opt->rr = 0;
71 opt->rr_needaddr = 0;
72 }
73 if (opt->ts) {
74 memset(iph+opt->ts, IPOPT_NOP, iph[opt->ts+1]);
75 opt->ts = 0;
76 opt->ts_needaddr = opt->ts_needtime = 0;
77 }
78}
If I remember correctly, iph + opt->srr is basically the address of the first byte of the srr option. The format of the option itself is as follows:
TYPE (1 byte) | LENGTH (1 byte) | OFFSET (1 byte) | ... and then some addresses 4 bytes each
The LENGTH "field" specifies the length in bytes of the entire option, so that's why iph[opt->srr+1] is the length of the option.

Resources