Communicate Kernel module and user space (Driver) - linux

I am working with a SoC FPGA. In the past I have been testing some peripherals through polling, and now I want to work with interrupts.
I have followed some tutorials and now I have a driver which can detect interrupts on the IRQ 72. The problem is that I want to reply to that IRQ and I have been trying different things but none seems to work.
The last attempt was about a C program which would do the logic part, I mean, it will perform some action when the Kernel tells it an interrupt is present. This program is writting it's PID on a file, I want the kernel to read the PID to send the program a SIGUSR1 signal, and then the program will do smth.
Kernel module:
#include <linux/module.h> // Needed by all modules
#include <linux/kernel.h> // Needed for KERN_INFO
#include <linux/fs.h> // Needed by filp
#include <asm/uaccess.h> // Needed by segment descriptors
#include <linux/init.h> /*Needed for the macros*/
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/of.h>
#define DEVNAME "test_int"
static irq_handler_t __test_isr(int irq, void *dev_id, struct pt_regs *regs){
printk (KERN_INFO DEVNAME ": ISR\n");
return (irq_handler_t) IRQ_HANDLED;
}
static int __test_int_driver_probe(struct platform_device* pdev){
// Create variables
struct file *f;
char buf[128];
mm_segment_t fs;
int i;
// Init the buffer with 0
for(i=0;i<128;i++)
buf[i] = 0;
// PID file
// It is an inteer, so i guess 4Bytes would be better
f = filp_open("/home/root/modInt/miPID", O_RDONLY, 0);
if(f == NULL)
printk(KERN_ALERT "filp_open error!!.\n");
else{
// Get current segment descriptor
fs = get_fs();
// Set segment descriptor associated to kernel space
set_fs(get_ds());
// Read the file
f->f_op->read(f, buf, 128, &f->f_pos);
// Restore segment descriptor
set_fs(fs);
// See what we read from file
printk("El PID es buf:%s\n",buf);
}
filp_close(f,NULL);
int irq_num;
irq_num = platform_get_irq(pdev, 0);
printk(KERN_INFO DEVNAME ": La IRQ %d va a ser registrada!\n", irq_num);
return request_irq(irq_num, (irq_handler_t) __test_isr, 0, DEVNAME, NULL);
}
static int __test_int_driver_remove (struct platform_device *pdev){
int irq_num;
irq_num = platform_get_irq (pdev, 0);
printk(KERN_INFO "test_int: Abandonando la captura de la IRQ %d !\n", irq_num);
free_irq(irq_num, NULL);
return 0;
}
static const struct of_device_id __test_int_driver_id[] = {
{.compatible = "altr , socfpga-mysoftip"},
{}
};
static struct platform_driver __test_int_driver = {
.driver= {
.name = DEVNAME,
.owner = THIS_MODULE,
.of_match_table = of_match_ptr (__test_int_driver_id),
},
.probe = __test_int_driver_probe,
.remove = __test_int_driver_remove
};
module_platform_driver (__test_int_driver);
MODULE_LICENSE("GPL");
Program:
#include<stdio.h>
#include<signal.h>
#include<unistd.h>
void sig_handler(int signo)
{
if (signo == SIGUSR1)
printf("Senal SIGUSR1 recibida\n");
}
int main(void){
int pid=getpid();
FILE *f = fopen("miPID", "w");
if (f == NULL){
printf("Error opening file!\n");
exit(1);
}
fprintf(f, "%d", pid);
fclose(f);
printf("My process ID : %d\n", pid);
if (signal(SIGUSR1, sig_handler) == SIG_ERR)
printf("\nNo se ha podido capturar SIGINT\n");
// A long long wait so that we can easily issue a signal to this process
while(1)
sleep(1);
return 0;
}
Compiles well under ARM arch. Errors when instantiating de module:
root#socfpga:~/modInt# insmod sigGen.ko
[ 63.121696] sigGen: loading out-of-tree module taints kernel.
[ 63.129185] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 63.138088] pgd = ee7b0000
[ 63.140801] [00000000] *pgd=3fcb2831
[ 63.144381] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM
[ 63.144385] Modules linked in: sigGen(O+)
[ 63.144399] CPU: 1 PID: 1350 Comm: insmod Tainted: G O 4.14.73-rt45-ltsi #2
[ 63.144401] Hardware name: Altera SOCFPGA
[ 63.144406] task: ee9c3f00 task.stack: ee618000
[ 63.144411] PC is at 0x0
[ 63.144423] LR is at __test_int_driver_probe+0x80/0x108 [sigGen]
[ 63.144427] pc : [<00000000>] lr : [<bf0000ec>] psr: a0070013
[ 63.144430] sp : ee619c20 ip : c0814080 fp : ee619ccc
[ 63.144433] r10: 00000000 r9 : 00000003 r8 : bf000000
[ 63.144437] r7 : ef279600 r6 : ee4c6000 r5 : ffffe000 r4 : 00000000
[ 63.144441] r3 : ee4c6058 r2 : 00000080 r1 : ee619c28 r0 : ee4c6000
[ 63.144446] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
[ 63.144450] Control: 10c5387d Table: 2e7b004a DAC: 00000051
[ 63.144454] Process insmod (pid: 1350, stack limit = 0xee618218)
[ 63.144457] Stack: (0xee619c20 to 0xee61a000)
[ 63.144465] 9c20: 00000000 c04c3280 00000000 00000000 00000000 00000000 00000000 00000000
[ 63.144472] 9c40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 63.144478] 9c60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 63.144484] 9c80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 63.144491] 9ca0: 00000000 00000000 ef279610 ef279610 ef279610 bf002014 fffffdfb bf002014
[ 63.144499] 9cc0: ee619cec ee619cd0 c050ffb8 bf000078 ef279610 c0cabf84 c0cabf9c 00000000
[ 63.144506] 9ce0: ee619d1c ee619cf0 c050def8 c050ff68 00000000 ef279610 bf002014 ef279644
[ 63.144512] 9d00: 00000000 bf0020c8 11b4365c bf002080 ee619d3c ee619d20 c050e084 c050dcf4
[ 63.144519] 9d20: 00000000 bf002014 c050dfc8 00000000 ee619d64 ee619d40 c050bf34 c050dfd4
[ 63.144526] 9d40: ef02b06c ef28f248 c07afb2c bf002014 ef26a380 c0c56658 ee619d74 ee619d68
[ 63.144533] 9d60: c050d86c c050bec4 ee619d9c ee619d78 c050d338 c050d84c bf0012c8 ee619d88
[ 63.144540] 9d80: bf002014 bf005000 00000000 00000001 ee619db4 ee619da0 c050edb0 c050d198
[ 63.144547] 9da0: bf002080 bf005000 ee619dc4 ee619db8 c050ff08 c050ed34 ee619dd4 ee619dc8
[ 63.144554] 9dc0: bf005020 c050fec4 ee619e44 ee619dd8 c0101870 bf00500c ee618000 ef001e40
[ 63.144561] 9de0: ee619df8 ee618038 ee619e34 ee619df8 c025d0cc c025bb1c ee619e44 ee619e08
[ 63.144569] 9e00: c025bb1c c0257780 00000001 0000001f ee4037c0 ee618008 00000001 bf002080
[ 63.144575] 9e20: 00000001 bf002080 00000001 ee72fbc0 bf0020c8 11b4365c ee619e6c ee619e48
[ 63.144582] 9e40: c01accac c010182c ee619e6c ee619e58 c024c6a8 ee619f40 00000001 ee4bc9c0
[ 63.144590] 9e60: ee619f1c ee619e70 c01aba50 c01acc44 bf00208c 00007fff bf002080 c01a8a04
[ 63.144596] 9e80: bf002264 00000000 c0947650 bf0021b0 bf002180 00000000 c0803938 ee619f40
[ 63.144603] 9ea0: ee619eec ee619eb0 c026b078 c0264e58 00000001 00000000 00000000 00000000
[ 63.144609] 9ec0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 63.144615] 9ee0: 00000000 00000000 00000000 00000000 7fffffff 00000000 00000003 000188e1
[ 63.144622] 9f00: 0000017b c01080c4 ee618000 00000000 ee619fa4 ee619f20 c01ac274 c01a9c24
[ 63.144629] 9f20: 7fffffff 00000000 00000003 00000000 00000000 f0b1e000 0001ef00 00000000
[ 63.144636] 9f40: f0b1e412 f0b1e000 0001ef00 f0b3c7a8 f0b3c5d4 f0b35a50 00003000 00003080
[ 63.144643] 9f60: 00000000 00000000 00000000 000016fc 0000002c 0000002d 00000018 00000000
[ 63.144649] 9f80: 00000012 00000000 00000000 00000000 00000000 beb4cd88 00000000 ee619fa8
[ 63.144656] 9fa0: c0107ee0 c01ac1e4 00000000 00000000 00000003 000188e1 00000000 00000002
[ 63.144662] 9fc0: 00000000 00000000 beb4cd88 0000017b 00000000 00000000 b6f67000 00000000
[ 63.144669] 9fe0: beb4cbc0 beb4cbb0 00013fef b6eb7990 60070010 00000003 00000000 00000000
[ 63.144695] [<bf0000ec>] (__test_int_driver_probe [sigGen]) from [<c050ffb8>] (platform_drv_probe+0x5c/0xc0)
[ 63.144708] [<c050ffb8>] (platform_drv_probe) from [<c050def8>] (driver_probe_device+0x210/0x2e0)
[ 63.144718] [<c050def8>] (driver_probe_device) from [<c050e084>] (__driver_attach+0xbc/0xc0)
[ 63.144731] [<c050e084>] (__driver_attach) from [<c050bf34>] (bus_for_each_dev+0x7c/0xb0)
[ 63.144741] [<c050bf34>] (bus_for_each_dev) from [<c050d86c>] (driver_attach+0x2c/0x30)
[ 63.144749] [<c050d86c>] (driver_attach) from [<c050d338>] (bus_add_driver+0x1ac/0x224)
[ 63.144757] [<c050d338>] (bus_add_driver) from [<c050edb0>] (driver_register+0x88/0x108)
[ 63.144766] [<c050edb0>] (driver_register) from [<c050ff08>] (__platform_driver_register+0x50/0x58)
[ 63.144778] [<c050ff08>] (__platform_driver_register) from [<bf005020>] (__test_int_driver_init+0x20/0x1000 [sigGen])
[ 63.144792] [<bf005020>] (__test_int_driver_init [sigGen]) from [<c0101870>] (do_one_initcall+0x50/0x178)
[ 63.144805] [<c0101870>] (do_one_initcall) from [<c01accac>] (do_init_module+0x74/0x20c)
[ 63.144815] [<c01accac>] (do_init_module) from [<c01aba50>] (load_module+0x1e38/0x2468)
[ 63.144824] [<c01aba50>] (load_module) from [<c01ac274>] (SyS_finit_module+0x9c/0xac)
[ 63.144834] [<c01ac274>] (SyS_finit_module) from [<c0107ee0>] (ret_fast_syscall+0x0/0x5c)
[ 63.144843] Code: bad PC value
[ 63.487534] dw_mmc ff704000.dwmmc0: Unexpected interrupt latency
[ 63.613936] ---[ end trace 0000000000000002 ]---
Segmentation fault
root#socfpga:~/modInt#
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144381] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144454] Process insmod (pid: 1350, stack limit = 0xee618218)
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144457] Stack: (0xee619c20 to 0xee61a000)
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144465] 9c20: 00000000 c04c3280 00000000 00000000 00000000 00000000 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144472] 9c40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144478] 9c60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144484] 9c80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144491] 9ca0: 00000000 00000000 ef279610 ef279610 ef279610 bf002014 fffffdfb bf002014
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144499] 9cc0: ee619cec ee619cd0 c050ffb8 bf000078 ef279610 c0cabf84 c0cabf9c 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144506] 9ce0: ee619d1c ee619cf0 c050def8 c050ff68 00000000 ef279610 bf002014 ef279644
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144512] 9d00: 00000000 bf0020c8 11b4365c bf002080 ee619d3c ee619d20 c050e084 c050dcf4
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144519] 9d20: 00000000 bf002014 c050dfc8 00000000 ee619d64 ee619d40 c050bf34 c050dfd4
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144526] 9d40: ef02b06c ef28f248 c07afb2c bf002014 ef26a380 c0c56658 ee619d74 ee619d68
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144533] 9d60: c050d86c c050bec4 ee619d9c ee619d78 c050d338 c050d84c bf0012c8 ee619d88
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144540] 9d80: bf002014 bf005000 00000000 00000001 ee619db4 ee619da0 c050edb0 c050d198
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144547] 9da0: bf002080 bf005000 ee619dc4 ee619db8 c050ff08 c050ed34 ee619dd4 ee619dc8
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144554] 9dc0: bf005020 c050fec4 ee619e44 ee619dd8 c0101870 bf00500c ee618000 ef001e40
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144561] 9de0: ee619df8 ee618038 ee619e34 ee619df8 c025d0cc c025bb1c ee619e44 ee619e08
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144569] 9e00: c025bb1c c0257780 00000001 0000001f ee4037c0 ee618008 00000001 bf002080
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144575] 9e20: 00000001 bf002080 00000001 ee72fbc0 bf0020c8 11b4365c ee619e6c ee619e48
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144582] 9e40: c01accac c010182c ee619e6c ee619e58 c024c6a8 ee619f40 00000001 ee4bc9c0
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144590] 9e60: ee619f1c ee619e70 c01aba50 c01acc44 bf00208c 00007fff bf002080 c01a8a04
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144596] 9e80: bf002264 00000000 c0947650 bf0021b0 bf002180 00000000 c0803938 ee619f40
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144603] 9ea0: ee619eec ee619eb0 c026b078 c0264e58 00000001 00000000 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144609] 9ec0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144615] 9ee0: 00000000 00000000 00000000 00000000 7fffffff 00000000 00000003 000188e1
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144622] 9f00: 0000017b c01080c4 ee618000 00000000 ee619fa4 ee619f20 c01ac274 c01a9c24
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144629] 9f20: 7fffffff 00000000 00000003 00000000 00000000 f0b1e000 0001ef00 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144636] 9f40: f0b1e412 f0b1e000 0001ef00 f0b3c7a8 f0b3c5d4 f0b35a50 00003000 00003080
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144643] 9f60: 00000000 00000000 00000000 000016fc 0000002c 0000002d 00000018 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144649] 9f80: 00000012 00000000 00000000 00000000 00000000 beb4cd88 00000000 ee619fa8
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144656] 9fa0: c0107ee0 c01ac1e4 00000000 00000000 00000003 000188e1 00000000 00000002
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144662] 9fc0: 00000000 00000000 beb4cd88 0000017b 00000000 00000000 b6f67000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144669] 9fe0: beb4cbc0 beb4cbb0 00013fef b6eb7990 60070010 00000003 00000000 00000000
Broadcast message from systemd-journald#socfpga (Thu 2019-02-14 04:16:01 UTC):
kernel[1283]: [ 63.144843] Code: bad PC value
I am sure there are better ways and I would like to hear them. For example, I also tried to implement the logic on the IRQ handler, but... failed.
EDIT:
I changed a little bit my code as #suren99 suggested, I also added some printk to debug, and now i get this:
root#socfpga:~/leer# insmod leer.ko
[ 527.785020] Opened the file successfully
[ 527.788953] Value of ret is: -22
[ 527.792438] The PID is: ▒▒▒8A▒▒�#
[ 527.792476] test_int: La IRQ 41 va a ser registrada!
I must add that...
ret = kernel_read(fp, offset, buf, 512);
Why this is "-22"?
printk("The PID is: %s", buf);
If I printk this, then the answer is this: ▒▒▒8A▒▒�#
printk("The PID is: %s", *buf);
And if put this, then the pointer returns "null".
What am I doing wrong?

First of all, you shouldn't check the return value of filp_open with NULL. You should check if the call has succeeded by using IS_ERR()
if (IS_ERR(f)) {
pr_err("Error opening file")
}
I believe the file_open has returned an error pointer and you are trying to dereference it
f->f_op->read(f, buf, 128, &f->f_pos);
Alternatively, you can use addr2line to find which line has caused the kernel panic

Related

Wine on Linux Mint - too many "fixme" messages

I'm trying to run a windows app on Linux Mint 20.03.
Wine installed Mono to install the app since it uses .NET. When launching the app from the terminal, I've got all of these messages:
01a4:fixme:iphlpapi:NotifyAddrChange (Handle 000000000081F1E8, overlapped 0000019800034090): stub
01a4:fixme:win:GetPointerDevices (000000000081EB0C 0000000000000000): partial stub
01a4:fixme:system:QueryDisplayConfig (00000002 000000000081EB20 00000198047BFE20 000000000081EB10 00000198000BF100 0000000000000000): semi-stub
01a4:fixme:system:DisplayConfigGetDeviceInfo Unimplemented packet type: 11
01a4:fixme:win:GetPointerDevices (000000000081EB0C 0000000000000000): partial stub
01a4:fixme:system:QueryDisplayConfig (00000002 000000000081EB20 00000198047BFE20 000000000081EB10 00000198000BF100 0000000000000000): semi-stub
01a4:fixme:system:DisplayConfigGetDeviceInfo Unimplemented packet type: 11
01a4:fixme:win:GetPointerDevices (000000000081EB0C 0000000000000000): partial stub
01a4:fixme:system:QueryDisplayConfig (00000002 000000000081EB20 00000198047BFE20 000000000081EB10 00000198000BF100 0000000000000000): semi-stub
01a4:fixme:system:DisplayConfigGetDeviceInfo Unimplemented packet type: 11
021c:fixme:wtsapi:WTSRegisterSessionNotification Stub 0000000000020074 0x00000000
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 000001980497F0B0 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
01a4:fixme:service:I_ScRegisterDeviceNotification Notification filters are not yet implemented.
01a4:fixme:wtsapi:WTSRegisterSessionNotification Stub 0000000000030042 0x00000000
0228:fixme:sync:NtSetInformationJobObject stub: 0x32c 4 0xfcff028 4
0230:fixme:file:ReplaceFileW Ignoring flags 2
01a4:fixme:combase:RoGetActivationFactory (L"Windows.Devices.Enumeration.DeviceAccessInformation", {574bd3d3-5f30-45cd-8a94-724fe5973084}, 000000000081CB00): semi-stub
01a4:err:combase:RoGetActivationFactory Failed to find library for L"Windows.Devices.Enumeration.DeviceAccessInformation"
01a4:fixme:combase:RoGetActivationFactory (L"Windows.Devices.Enumeration.DeviceAccessInformation", {574bd3d3-5f30-45cd-8a94-724fe5973084}, 000000000081CB00): semi-stub
01a4:err:combase:RoGetActivationFactory Failed to find library for L"Windows.Devices.Enumeration.DeviceAccessInformation"
01a4:fixme:system:EnableNonClientDpiScaling (0000000000030040): stub
01a4:fixme:win:RegisterTouchWindow (0000000000030040 00000002): stub
021c:fixme:wtsapi:WTSRegisterSessionNotification Stub 000000000002004E 0x00000000
01a4:fixme:dwmapi:DwmSetWindowAttribute (0000000000030040, 2, 000000000081C584, 4) stub
01a4:fixme:nls:RtlGetThreadPreferredUILanguages 00000038, 000000000081B94C, 0000000000000000 000000000081B948
01a4:fixme:nls:get_dummy_preferred_ui_language (0x38 000000000081B94C 0000000000000000 000000000081B948) returning a dummy value (current locale)
01a4:fixme:nls:RtlGetThreadPreferredUILanguages 00000038, 000000000081B94C, 000000000081B932 000000000081B948
01a4:fixme:nls:get_dummy_preferred_ui_language (0x38 000000000081B94C 000000000081B932 000000000081B948) returning a dummy value (current locale)
01a4:fixme:dwrite:dwritefontface5_HasVariations 0000000001775CE0: stub
01a4:fixme:win:RegisterPowerSettingNotification (000000000002004E,{2b84c20e-ad23-4ddf-93db-05ffbd7efca5},0): stub
0254:err:winediag:ntlm_check_version ntlm_auth was not found or is outdated. Make sure that ntlm_auth >= 3.0.25 is in your path. Usually, you can find it in the winbind package of your distribution.
0254:err:ntlm:ntlm_LsaApInitializePackage no NTLM support, expect problems
0254:fixme:powrprof:PowerRegisterSuspendResumeNotification (0x00000002,000000000081FCF0,000000000081FCE8) stub!
0254:fixme:heap:RtlSetHeapInformation 0000000000000000 1 0000000000000000 0 stub
0254:fixme:ntdll:EtwEventSetInformation (deadbeef, 2, 00000212002410B0, 16) stub
027c:fixme:winsock:WSALookupServiceBeginW (0000000006ACDBE0 0x00000ff0 0000000006ACDC58) Stub!
027c:fixme:iphlpapi:NotifyAddrChange (Handle 0000000006ACDDB8, overlapped 0000021200310B10): stub
027c:fixme:wlanapi:WlanEnumInterfaces (0000000000000001, 0000000000000000, 0000000006ACCD88) semi-stub
027c:fixme:wlanapi:WlanEnumInterfaces (0000000000000001, 0000000000000000, 0000000006ACCD88) semi-stub
01a4:fixme:shcore:GetCurrentProcessExplicitAppUserModelID 000000000081BC90: stub
0298:fixme:file:NtLockFile I/O completion on lock not implemented yet
0298:fixme:heap:PrefetchVirtualMemory process FFFFFFFFFFFFFFFF, count 0000000000000001, addresses 000000000A3CF400, flags 0 stub.
027c:fixme:winsock:setsockopt Ignoring SO_RANDOMIZE_PORT
01a4:fixme:win:RegisterTouchWindow (0000000000020070 00000002): stub
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 000001980497EC00 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
027c:fixme:wlanapi:WlanEnumInterfaces (0000000000000001, 0000000000000000, 0000000006ACCA88) semi-stub
02a8:err:winediag:ntlm_check_version ntlm_auth was not found or is outdated. Make sure that ntlm_auth >= 3.0.25 is in your path. Usually, you can find it in the winbind package of your distribution.
02a8:err:ntlm:ntlm_LsaApInitializePackage no NTLM support, expect problems
02a8:fixme:powrprof:PowerRegisterSuspendResumeNotification (0x00000002,000000000081FCF0,000000000081FCE8) stub!
02a8:fixme:heap:RtlSetHeapInformation 0000000000000000 1 0000000000000000 0 stub
02a8:fixme:ntdll:EtwEventSetInformation (deadbeef, 2, 000002B6002410B0, 16) stub
02a8:fixme:ntdll:NtQuerySystemInformation info_class SYSTEM_PERFORMANCE_INFORMATION
02a8:fixme:thread:QueryThreadCycleTime (FFFFFFFFFFFFFFFE,000000000081EB48): stub!
02f8:fixme:kernelbase:AppPolicyGetThreadInitializationType FFFFFFFFFFFFFFFA, 000000000D48FE10
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 000001980497EED0 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
027c:fixme:ntdll:NtQuerySystemInformation info_class SYSTEM_PERFORMANCE_INFORMATION
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 000001980497E480 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 000001980497E270 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
027c:fixme:winsock:setsockopt Ignoring SO_RANDOMIZE_PORT
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000019804CB30C0 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000019804B93260 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 000001980497EC60 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
02a8:fixme:dwrite:dwritefontface5_HasVariations 00000000016A40A0: stub
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000000000000000 len=0 000000000FCFEF2C),stub!
0228:fixme:winstation:GetUserObjectSecurity (0000000000000008 000000000FCFEF28 0000019804F197B0 len=40 000000000FCFEF2C),stub!
0228:fixme:ntdll:NtSetInformationToken TokenIntegrityLevel stub!
0228:fixme:ntdll:NtFilterToken flags 0x1 unsupported
0228:fixme:ntdll:NtFilterToken support for restricting sids not yet implemented
01f0:fixme:ntdll:NtQuerySystemInformation info_class SYSTEM_PERFORMANCE_INFORMATION
01dc:fixme:kernelbase:AppPolicyGetProcessTerminationMethod FFFFFFFFFFFFFFFA, 000000000081FD40
Am I missing something?

Using mutex to synchronize two kernel threads causes a system panic

I used the mutex to synchronize the two kernel threads. After running, panic occurred in the system and abnormal memory was found in mutex by kdump.
Here is a simplified code example, You can run it directly to reproduce the problem.
I changed the memory allocation method to use kmalloc instead of vmalloc, and then it worked, Who knows why?
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/kthread.h>
#include <linux/slab.h>
struct product {
struct list_head list;
struct mutex lock;
bool finish;
};
struct task_struct *task1;
struct task_struct *task2;
spinlock_t spin;
struct list_head products;
struct product *create_product(void)
{
struct product *p_prod;
p_prod = vmalloc(sizeof(struct product));
// p_prod = kmalloc(sizeof(struct product), GFP_KERNEL);
if(!p_prod)
return NULL;
INIT_LIST_HEAD(&p_prod->list);
mutex_init(&p_prod->lock);
p_prod->finish = false;
return p_prod;
}
void remove_product(struct product **pp_prod)
{
vfree(*pp_prod);
// kfree(*pp_prod);
*pp_prod = NULL;
}
int producer(void *data)
{
while(!kthread_should_stop())
{
struct product *p_prod = create_product();
if(!p_prod)
continue;
spin_lock(&spin);
list_add_tail(&p_prod->list, &products);
spin_unlock(&spin);
while (true)
{
mutex_lock(&p_prod->lock);
if(p_prod->finish)
{
mutex_unlock(&p_prod->lock);
schedule();
break;
}
mutex_unlock(&p_prod->lock);
}
remove_product(&p_prod);
}
do_exit(0);
}
int consumer(void *data)
{
while(!kthread_should_stop())
{
struct product *p_prod;
spin_lock(&spin);
if(list_empty(&products))
{
spin_unlock(&spin);
schedule();
continue;
}
p_prod = list_first_entry(&products, struct product, list);
list_del(&p_prod->list);
spin_unlock(&spin);
mutex_lock(&p_prod->lock);
p_prod->finish = true;
mutex_unlock(&p_prod->lock);
}
do_exit(0);
}
static int __init kdemo_init(void) {
printk(">>> demo driver begin!\n");
spin_lock_init(&spin);
INIT_LIST_HEAD(&products);
task1 = kthread_run(producer, NULL, "hdz-producer");
task2 = kthread_run(consumer, NULL, "hdz-consumer");
return 0;
}
static void __exit kdemo_exit(void) {
kthread_stop(task1);
kthread_stop(task2);
printk(">>> demo driver exit!\n");
}
module_init(kdemo_init);
module_exit(kdemo_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("xxxx#xxx.com");
MODULE_VERSION("1.0");
dmesg log and consumer stack
[ 176.599116] >>> demo driver begin!
[ 177.167659] BUG: unable to handle kernel NULL pointer dereference at 0000000000000fb0
[ 177.167695] IP: [<ffffffff9e0caa47>] wake_q_add+0x17/0x50
[ 177.167719] PGD 0
[ 177.167729] Oops: 0002 [#1] SMP
[ 177.167743] Modules linked in: kdemo(OE) mpt3sas mptctl mptbase nvmet_rdma nvmet nvme_rdma nvme_fabrics nvme nvme_core drbd(OE) dell_rbu kvdo(OE) uds(OE) bonding sha512_ssse3 sha512_generic qat_api(OE) usdm_drv(OE) intel_qat(OE) authenc uio ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ucm rpcrdma sunrpc rdma_ucm ib_umad ib_uverbs ib_iser rdma_cm ib_ipoib iw_cm libiscsi scsi_transport_iscsi ib_cm mlx5_ib ib_core intelcas(OE) inteldisk(OE) iTCO_wdt iTCO_vendor_support dell_smbios sparse_keymap dcdbas skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd sg joydev pcspkr ipmi_si i2c_i801 lpc_ich shpchp ipmi_devintf ipmi_msghandler mei_me acpi_power_meter
[ 177.168071] mei acpi_pad wmi nfit libnvdimm dm_multipath binfmt_misc ip_tables xfs libcrc32c mgag200 drm_kms_helper crc32c_intel syscopyarea sysfillrect sysimgblt mlx5_core fb_sys_fops ttm ixgbe drm igb mlxfw devlink mdio ptp i2c_algo_bit pps_core i2c_core dca sr_mod cdrom sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common ahci libahci libata mpt2sas raid_class scsi_transport_sas megaraid_sas dm_mirror dm_region_hash dm_log dm_mod
[ 177.168263] CPU: 24 PID: 5412 Comm: hdz-consumer Kdump: loaded Tainted: G OE ------------ 3.10.0-862.el7.x86_64 #1
[ 177.168297] Hardware name: Dell Inc. PowerEdge R740/08D89F, BIOS 2.10.2 02/24/2021
[ 177.168320] task: ffff93db22af3f40 ti: ffff93dc89354000 task.ti: ffff93dc89354000
[ 177.168344] RIP: 0010:[<ffffffff9e0caa47>] [<ffffffff9e0caa47>] wake_q_add+0x17/0x50
[ 177.168372] RSP: 0018:ffff93dc89357e48 EFLAGS: 00010246
[ 177.168389] RAX: 0000000000000000 RBX: ffffbe2ce6533018 RCX: 0000000000000fb0
[ 177.168410] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff93dc89357e58
[ 177.168432] RBP: ffff93dc89357e48 R08: ffffbe2ce6533000 R09: 0000000000000000
[ 177.168453] R10: 0000000000000001 R11: 0000000000000001 R12: ffffbe2ce6533014
[ 177.168475] R13: ffff93dc89357e58 R14: 0000000000000000 R15: 0000000000000000
[ 177.168497] FS: 0000000000000000(0000) GS:ffff93dca9400000(0000) knlGS:0000000000000000
[ 177.168540] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 177.168560] CR2: 0000000000000fb0 CR3: 00000002b7a0e000 CR4: 00000000007607e0
[ 177.168583] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 177.168606] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 177.168629] PKRU: 00000000
[ 177.168640] Call Trace:
[ 177.168656] [<ffffffff9e711b2e>] __mutex_unlock_slowpath+0x5e/0x90
[ 177.168679] [<ffffffff9e710fab>] mutex_unlock+0x1b/0x20
[ 177.168699] [<ffffffffc0637064>] consumer+0x64/0x90 [kdemo]
[ 177.168723] [<ffffffffc0637000>] ? 0xffffffffc0636fff
[ 177.168746] [<ffffffff9e0bae31>] kthread+0xd1/0xe0
[ 177.168765] [<ffffffff9e0bad60>] ? insert_kthread_work+0x40/0x40
[ 177.168788] [<ffffffff9e71f61d>] ret_from_fork_nospec_begin+0x7/0x21
[ 177.168811] [<ffffffff9e0bad60>] ? insert_kthread_work+0x40/0x40
[ 177.168831] Code: 09 00 00 31 c9 31 d2 e8 18 41 ff ff eb e4 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 8d 8e b0 0f 00 00 31 c0 ba 01 00 00 00 48 89 e5 <f0> 48 0f b1 96 b0 0f 00 00 48 85 c0 74 0b 5d c3 66 0f 1f 84 00
[ 177.168996] RIP [<ffffffff9e0caa47>] wake_q_add+0x17/0x50
[ 177.169017] RSP <ffff93dc89357e48>
[ 177.169959] CR2: 0000000000000fb0
producer stask
crash> bt 5411
PID: 5411 TASK: ffff93db22af4f10 CPU: 1 COMMAND: "hdz-producer"
bt: page excluded: kernel virtual address: ffffffffffffffff type: "cpu_online_map"
#0 [ffff93dca8e48e48] crash_nmi_callback at ffffffff9e0533b7
#1 [ffff93dca8e48e58] nmi_handle at ffffffff9e71790c
#2 [ffff93dca8e48eb0] do_nmi at ffffffff9e717b2d
#3 [ffff93dca8e48ef0] end_repeat_nmi at ffffffff9e716d79
#4 [ffff93dca8e48f28] __vmalloc_node_range at ffffffff9e1d7518
[exception RIP: mutex_unlock+20]
RIP: ffffffff9e710fa4 RSP: ffff93ddafd73e98 RFLAGS: 00000202
RAX: 0000000000000010 RBX: 0000000000000010 RCX: 0000000000000202
RDX: ffff93ddafd73e98 RSI: 0000000000000018 RDI: 0000000000000001
RBP: ffffffff9e710fa4 R8: ffffffff9e710fa4 R9: 0000000000000018
R10: ffff93ddafd73e98 R11: 0000000000000202 R12: ffffffffffffffff
R13: ffffbe2ce6535010 R14: ffffffffc0639240 R15: 0000000000000000
ORIG_RAX: ffffffffc0639240 CS: 0010 SS: 0018
--- <(unknown) exception stack> ---
#5 [ffff93ddafd73e98] mutex_unlock at ffffffff9e710fa4
#6 [ffff93ddafd73ea0] producer at ffffffffc0637145 [kdemo]
#7 [ffff93ddafd73ec8] kthread at ffffffff9e0bae31
#8 [ffff93ddafd73f50] ret_from_fork_nospec_begin at ffffffff9e71f61d

Determining the address that is causing double page fault

A hardware is performing memory operation and resulting in a page-fault. How can I determine the virtual address that is causing following double fault error message so that I can pin the address?
[ 52.330981] PANIC: double fault, error_code: 0x0
[ 52.331972] Kernel panic - not syncing: Machine halted.
[ 52.332916] CPU: 0 PID: 1432 Comm: stress Tainted: G OE 4.15.0-23-generic #25~16.04.1-Ubuntu
[ 52.334584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014
[ 52.336673] Call Trace:
[ 52.337151] <#DF>
[ 52.337549] dump_stack+0x63/0x8b
[ 52.338152] panic+0xe4/0x244
[ 52.338690] df_debug+0x2d/0x30
[ 52.339450] do_double_fault+0x9a/0x130
[ 52.340187] double_fault+0x1e/0x30
[ 52.340836] RIP: 0010:error_entry+0x1e/0x100
[ 52.341674] RSP: 0000:fffffe0000002000 EFLAGS: 00010046
[ 52.342605] RAX: 0000000090c009e7 RBX: 0000000000000001 RCX: ffffffff90c009e7
[ 52.343896] RDX: 0000000004ba1000 RSI: ffffffff90c015ff RDI: fffffe0000002078
[ 52.345153] RBP: fffffe0000002079 R08: 0000000000000000 R09: 0000000000000000
[ 52.346407] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 52.347739] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 52.349054] ? native_iret+0x7/0x7
[ 52.349729] ? async_page_fault+0xf/0x50
[ 52.350463] </#DF>
[ 52.350908] <ENTRY_TRAMPOLINE>
[ 52.351503] RIP: 0010:do_async_page_fault+0x0/0x80
[ 52.352443] RSP: 0000:fffffe0000002120 EFLAGS: 00010012
[ 52.353416] RAX: 0000000090c009e7 RBX: 0000000000000001 RCX: ffffffff90c009e7
[ 52.354770] RDX: 0000000004ba1000 RSI: 0000000000000000 RDI: fffffe0000002128
[ 52.356160] RBP: fffffe0000002129 R08: 0000000000000000 R09: 0000000000000000
[ 52.357500] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 52.358814] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 52.360157] ? native_iret+0x7/0x7
[ 52.360849] ? kvm_async_pf_task_wait+0x220/0x220
[ 52.361717] ? async_page_fault+0x25/0x50
[ 52.362521] ? native_iret+0x7/0x7
[ 52.363136] RIP: 0010:async_page_fault+0x0/0x50
[ 52.364052] RSP: 0000:fffffe00000021d0 EFLAGS: 00010006
[ 52.365021] RAX: 00007f780fef3010 RBX: 00007f780b352010 RCX: 00007f780b352010
[ 52.366391] RDX: 0000000004ba1000 RSI: 0000000008001000 RDI: 0000000000000000
[ 52.367729] RBP: 0000000008000000 R08: ffffffffffffffff R09: 0000000000000000
[ 52.369046] R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000001000
[ 52.370437] R13: 00007f7813351010 R14: 0000000000000002 R15: fffffffffffff000
[ 52.372447] ? async_page_fault+0x25/0x50
[ 52.373650] </ENTRY_TRAMPOLINE>
[ 52.374788] Kernel Offset: 0xf200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 52.376319] ---[ end Kernel panic - not syncing: Machine halted.

Open tty Serial USB port

I am using Sierra Aircard modem
While Configuring Dial Port/PPP port ,I am opening This port(deb/ttyUSB3) like this
struct termios tio;
memset(&tio, 0, sizeof(termios));
if ((fdDataPort = open(portName, O_RDWR | O_NOCTTY| O_SYNC | O_NONBLOCK )) != -1)
{
cfmakeraw (&tio);
printf("After OpenDataPort call");
tio.c_iflag = 0;//IGNCR;
tio.c_cflag |= CLOCAL | CREAD;
tcflush(fdDataPort, TCIOFLUSH);
tcsetattr(fdDataPort, TCSANOW, &tio);
tcflush(fdDataPort, TCIOFLUSH);
tcflush(fdDataPort, TCIOFLUSH);
cfsetispeed(&tio, B115200);
cfsetospeed(&tio, B115200);
tcsetattr(fdDataPort, TCSANOW, &tio);
printf("After tcsetattr call");
return true;
}
else
{
return false;
}
This configuration is working perfectly fine till now for connection establishment. Reconnecting etc
But I have one problem wrt this method : If i remove dongle when this operation is in progress(only few mili seconds) i am not able to detect dongle removal in my physical-device-manager(This process does device management modeswitch etc...) because msg is not received from kernel layer . also if i remove dongle also /dev/ttyUSB3 still persists (0,1,2 are released) . Kindly let me know if this is a right way to open the port or any other method is available .Appreciate your help
EDIT
Below is the ERROR log from dmesg
49.463282] 5864 slab pages
[ 49.463286] 943924 pages shared
[ 49.463291] 0 pages swap cached
[ 49.465229] FAT-fs (sda1): utf8 is not a recommended IO charset for FAT filesystems, filesystem will be case sensitive!
[ 49.511839] FAT-fs (sda1): Volume was not properly unmounted. Some data may be corrupt. Please run fsck.
[ 51.120554] usb 1-1: USB disconnect, device number 4
[ 51.153175] sierra ttyUSB0: Sierra USB modem converter now disconnected from ttyUSB0
[ 51.153546] sierra 1-1:1.0: device disconnected
[ 51.185779] sierra ttyUSB1: Sierra USB modem converter now disconnected from ttyUSB1
[ 51.186091] sierra 1-1:1.1: device disconnected
[ 51.233531] sierra ttyUSB2: Sierra USB modem converter now disconnected from ttyUSB2
[ 51.233888] sierra 1-1:1.3: device disconnected
[ 51.242018] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242032] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242040] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242047] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242054] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242060] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242066] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.242073] sierra ttyUSB3: sierra_submit_rx_urbs: submit urb failed: -19
[ 51.617553] sd 1:0:0:0: [sda] Unhandled error code
[ 51.617569] sd 1:0:0:0: [sda]
[ 51.617575] Result: hostbyte=0x07 driverbyte=0x00
[ 51.617582] sd 1:0:0:0: [sda] CDB:
[ 51.617587] cdb[0]=0x28: 28 00 00 00 0d 27 00 00 01 00
[ 51.617619] end_request: I/O error, dev sda, sector 3367
[ 51.617674] sd 1:0:0:0: [sda] Unhandled error code
[ 51.617682] sd 1:0:0:0: [sda]
[ 51.617687] Result: hostbyte=0x07 driverbyte=0x00
[ 51.617693] sd 1:0:0:0: [sda] CDB:
[ 51.617698] cdb[0]=0x28: 28 00 00 00 0d 28 00 00 01 00
I am stuck please help

"INFO: task blocked for more than 120 seconds." Can't figure out what it's blocked on

I have a program that is just a TCP data forwarder. Data packets come in from one source and are buffered intelligently for redistribution to multiple clients. There are a few threads, and there's some mutex'ing going on. There's also a little logging to disk. But mostly it's just TCP socket data (no UDP despite what the dmesg seems to imply). Also, there's no blocking. It's all select on non-blocking sockets and some polling.
This process is hung up. Although it responds to SNMP requests (there are dedicated threads for that) and will accept client connections (there's a dedicated listener thread), no data will flow. Note that this program will run for a month with no problems before it finally hangs up, so it's really hard to probe with a debug version. I'm really hoping someone can help me interpret the dmesg reports I have, because this is all I have go go on.
The main thread that receives packets has this stack frame:
#0 0x00289424 in __kernel_vsyscall ()
#1 0x00a93656 in nanosleep () from /lib/libc.so.6
#2 0x00acc34c in usleep () from /lib/libc.so.6
#3 0x0805a140 in main ()
The sender thread has this stack frame:
#0 0x00289424 in __kernel_vsyscall ()
#1 0x00a93656 in nanosleep () from /lib/libc.so.6
#2 0x00acc34c in usleep () from /lib/libc.so.6
#3 0x08066efc in TSITcpip::TcpipThread(void*) ()
#4 0x08067026 in tcpip_callback(void*) ()
#5 0x00b95a09 in start_thread () from /lib/libpthread.so.0
#6 0x00ad343e in clone () from /lib/libc.so.6
There are 6 more threads. One is the listener thread, and the rest are for SNMP. I don't know how to get more stack frame info to tell what the duration of the sleep is, but there are limits placed on it. In one place, the longest sleep is capped at 5 seconds. Others are hard-coded to 1 second and 5 milliseconds. There are no blocking calls to access sockets. Mutexes are involved, but this one system ran for like a month with no clients attached, which means that blocking on locks would be very rare.
Below is an excerpt from dmesg. Everything before it is related to boot-up, and there's nothing after except for a segfault for an unrelated process. This is my only clue about what's going on, and I really hope someone can help me make sense of it. (I added some blank lines for readability.)
INFO: task str2many:1624 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D efa23b80 0 1624 1593 0x00000080
efd8c570 00200082 f2987140 efa23b80 00200246 c07d9c2b 00000031 0000bc43
00000000 eff27040 00010ced 00000001 00010ced c0b110c0 c0b110c0 efd8c818
c0b110c0 c0b0caa4 c0b110c0 efd8c818 efac6000 c07d3aa3 efe6d01c f7032780
Call Trace:
[<c07d9c2b>] ? udp_recvmsg+0x14b/0x2c0
[<c07d3aa3>] ? tcp_v4_rcv+0x473/0x770
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c0788b91>] ? __netif_receive_skb+0x401/0x5f0
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c053da0c>] ? core_sys_select+0x16c/0x270
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<f8a4df94>] ? e1000_clean_rx_irq+0x274/0x450 [e1000e]
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c0489395>] ? do_futex+0xe5/0xa40
[<c078cd79>] ? net_rx_action+0x199/0x280
[<c0480be0>] ? ktime_get_ts+0xd0/0x100
[<c0600623>] ? copy_to_user+0x33/0x110
[<c053c941>] ? poll_select_copy_remaining+0xe1/0x130
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
[<c0830000>] ? __mutex_lock_slowpath+0xb0/0x140
INFO: task str2many:1635 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D c2d03ac0 0 1635 1593 0x00000080
efa75570 00200082 00000200 c2d03ac0 00000002 efac5df4 efac5df0 00000000
00000000 eff27040 00010ced cc11e7f9 00010ced c0b110c0 c0b110c0 efa75818
c0b110c0 c0b0caa4 c0b110c0 efa75818 f6272000 00000000 000005ff efa75570
Call Trace:
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c05f906a>] ? plist_del+0x2a/0x60
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04893c5>] ? do_futex+0x115/0xa40
[<c04b78d1>] ? move_native_irq+0x11/0x50
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
INFO: task str2many:1636 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D efa75030 0 1636 1593 0x00000080
efa75030 00200082 eff27040 efa75030 c082ecb0 00000000 00000000 0000d164
00000000 eff27040 00010ced 00000001 00010ced c0b110c0 c0b110c0 efa752d8
c0b110c0 c0b0caa4 c0b110c0 efa752d8 efaca000 c0b110c0 efa752d8 efaba000
Call Trace:
[<c082ecb0>] ? schedule+0x3c0/0xae0
[<c07e03ab>] ? inet_sendmsg+0x4b/0xb0
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c05f906a>] ? plist_del+0x2a/0x60
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04893c5>] ? do_futex+0x115/0xa40
[<c04b8874>] ? __rcu_process_callbacks+0x44/0x2d0
[<c04b8b35>] ? rcu_process_callbacks+0x35/0x40
[<c045c18e>] ? __do_softirq+0xae/0x1a0
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
INFO: task str2many:1637 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D 00000070 0 1637 1593 0x00000080
efa9bab0 00200082 00000000 00000070 f63c559c efaa7030 c2d080fc 000170ed
00000000 eff27040 00010ced cdcefd93 00010ced c0b110c0 c0b110c0 efa9bd58
c0b110c0 c0b0caa4 c0b110c0 efa9bd58 f70fc000 c2d080c0 00200082 c044d665
Call Trace:
[<c044d665>] ? try_to_wake_up+0x205/0x3a0
[<c043b2b7>] ? __wake_up_common+0x47/0x70
[<c043f8f0>] ? complete+0x40/0x60
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c05f906a>] ? plist_del+0x2a/0x60
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04893c5>] ? do_futex+0x115/0xa40
[<c04b8874>] ? __rcu_process_callbacks+0x44/0x2d0
[<c04b8b35>] ? rcu_process_callbacks+0x35/0x40
[<c045c18e>] ? __do_softirq+0xae/0x1a0
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
INFO: task str2many:1638 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D b4dfcbe0 0 1638 1593 0x00000080
efa9b570 00200082 00000000 b4dfcbe0 ef991b64 c0833efa c0439bf0 0001a113
00000000 eff27040 00010ced cd0bd748 00010ced c0b110c0 c0b110c0 efa9b818
c0b110c0 c0b0caa4 c0b110c0 efa9b818 f70fc000 c0b100d8 000000e0 ffffffff
Call Trace:
[<c0833efa>] ? do_page_fault+0x2a/0x90
[<c0439bf0>] ? kmap_atomic_prot+0x120/0x150
[<c05ff1e1>] ? __get_user_4+0x11/0x17
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c05f906a>] ? plist_del+0x2a/0x60
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0439bf0>] ? kmap_atomic_prot+0x120/0x150
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04893c5>] ? do_futex+0x115/0xa40
[<c0433a3a>] ? __do_page_fault+0x1aa/0x420
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
INFO: task str2many:1716 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D eb09ae40 0 1716 1593 0x00000080
efaa7ab0 00200082 ef53e040 eb09ae40 eb09ae60 000005a8 c077c7fc 000bb500
00000000 eff27040 00010ced 00000001 00010ced c0b110c0 c0b110c0 efaa7d58
c0b110c0 c0b0caa4 c0b110c0 efaa7d58 efba2000 00000001 9b0bb0f7 00000000
Call Trace:
[<c077c7fc>] ? sk_reset_timer+0xc/0x20
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c07cf241>] ? __tcp_push_pending_frames+0x31/0xe0
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c053da0c>] ? core_sys_select+0x16c/0x270
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c0476450>] ? autoremove_wake_function+0x0/0x40
[<c0480be0>] ? ktime_get_ts+0xd0/0x100
[<c0600623>] ? copy_to_user+0x33/0x110
[<c053c941>] ? poll_select_copy_remaining+0xe1/0x130
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
INFO: task str2many:1717 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D c2d03ac0 0 1717 1593 0x00000080
efb11570 00200082 00000200 c2d03ac0 00000002 efba3df4 efba3df0 00000000
00000000 eff27040 00010ced 00000001 00010ced c0b110c0 c0b110c0 efb11818
c0b110c0 c0b0caa4 c0b110c0 efb11818 efac8000 00000000 000009ff efb11570
Call Trace:
[<c0784701>] ? sk_stream_wait_memory+0x1b1/0x210
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c0476450>] ? autoremove_wake_function+0x0/0x40
[<c05a292c>] ? security_file_permission+0xc/0x10
[<c052ab96>] ? rw_verify_area+0x66/0xe0
[<c045c18e>] ? __do_softirq+0xae/0x1a0
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
CE: hpet increasing min_delta_ns to 40226 nsec
INFO: task str2many:9401 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D f0238040 0 9401 1593 0x00000080
f0132ab0 00200082 ef802500 f0238040 00200246 c07d9c2b 00000031 00004b07
00000000 f619ec80 00031c6c 00000001 00031c6c c0b110c0 c0b110c0 f0132d58
c0b110c0 c0b0caa4 c0b110c0 f0132d58 f2294000 00000000 efe6d01c f7032780
Call Trace:
[<c07d9c2b>] ? udp_recvmsg+0x14b/0x2c0
[<c0830c65>] ? rwsem_down_failed_common+0x75/0x1a0
[<c0830dda>] ? rwsem_down_read_failed+0x1a/0x24
[<c0830e17>] ? call_rwsem_down_read_failed+0x7/0xc
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c053da0c>] ? core_sys_select+0x16c/0x270
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c044126b>] ? check_preempt_wakeup+0x16b/0x220
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04870ee>] ? wake_futex+0x2e/0x60
[<c0489860>] ? do_futex+0x5b0/0xa40
[<c0480be0>] ? ktime_get_ts+0xd0/0x100
[<c0600623>] ? copy_to_user+0x33/0x110
[<c053c941>] ? poll_select_copy_remaining+0xe1/0x130
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
[<c0830000>] ? __mutex_lock_slowpath+0xb0/0x140
INFO: task str2many:9407 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D f01a5570 0 9407 1593 0x00000080
f01a5570 00200082 f619ec80 f01a5570 c082ecb0 00000400 c2c03ac0 00018f12
00000000 f619ec80 00031c6c 00000000 00031c6c c0b110c0 c0b110c0 f01a5818
c0b110c0 c0b0caa4 c0b110c0 f01a5818 f0690000 c0b110c0 f01a5818 342d2c7f
Call Trace:
[<c082ecb0>] ? schedule+0x3c0/0xae0
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c05f906a>] ? plist_del+0x2a/0x60
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04893c5>] ? do_futex+0x115/0xa40
[<c04b78d1>] ? move_native_irq+0x11/0x50
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b
INFO: task str2many:9408 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
str2many D efa5b570 0 9408 1593 0x00000080
efa5b570 00200082 f619ec80 efa5b570 c082ecb0 00000000 00000000 00018f12
00000000 f619ec80 00031c6c 00000000 00031c6c c0b110c0 c0b110c0 efa5b818
c0b110c0 c0b0caa4 c0b110c0 efa5b818 eb01e000 c0b110c0 efa5b818 efb42000
Call Trace:
[<c082ecb0>] ? schedule+0x3c0/0xae0
[<c07e03ab>] ? inet_sendmsg+0x4b/0xb0
[<c048792d>] ? exit_robust_list+0x7d/0x140
[<c04529ca>] ? mm_release+0xda/0xf0
[<c0458d9d>] ? exit_mm+0x6d/0x140
[<c0459136>] ? do_exit+0x126/0x740
[<c05f906a>] ? plist_del+0x2a/0x60
[<c046bd60>] ? dequeue_signal+0x30/0x190
[<c045978c>] ? do_group_exit+0x3c/0xa0
[<c046c2fe>] ? get_signal_to_deliver+0x1ce/0x460
[<c0408fa3>] ? do_signal+0x93/0xa50
[<c04893c5>] ? do_futex+0x115/0xa40
[<c04b8b35>] ? rcu_process_callbacks+0x35/0x40
[<c045c18e>] ? __do_softirq+0xae/0x1a0
[<c04afed0>] ? __audit_syscall_exit+0x220/0x250
[<c04099d7>] ? do_notify_resume+0x77/0xa0
[<c083135c>] ? work_notifysig+0x13/0x1b

Resources