What caused the invalid virtual address in linux kernel - linux

Update: the virtual address is invalid address, not null pointer.
=======================================================
Invalid virtual address problem happened in my project, but I did not modify the code, some information as below:
[ 1150.456387] Unable to handle kernel paging request at virtual address 0000000000010000
[ 1150.456393] Mem abort info:
[ 1150.456395] ESR = 0x96000005
[ 1150.456398] EC = 0x25: DABT (current EL), IL = 32 bits
[ 1150.456400] SET = 0, FnV = 0
[ 1150.456402] EA = 0, S1PTW = 0
[ 1150.456404] FSC = 0x05: level 1 translation fault
[ 1150.456407] Data abort info:
[ 1150.456409] ISV = 0, ISS = 0x00000005
[ 1150.456411] CM = 0, WnR = 0
[ 1150.456413] user pgtable: 4k pages, 39-bit VAs, pgdp=00000008e7572000
[ 1150.456415] [0000000000010000] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
[ 1150.456676] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[ 1150.456802] Skip md ftrace buffer dump for: 0xa00f50
[ 1150.457154] pc : percpu_ref_get_many+0x2c/0x104
[ 1150.457164] lr : percpu_ref_get_many+0x2c/0x104
[ 1150.457167] sp : ffffffc02e513a20
[ 1150.457168] x29: ffffffc02e513a20 x28: ffffffef9324a000 x27: fffffffe00000008
[ 1150.457171] x26: fffffffe00000000 x25: 0000008000000000 x24: ffffff88661d3800
[ 1150.457175] x23: 0000000000000000 x22: ffffff8970f94408 x21: 0000000000000001
[ 1150.457178] x20: 0000000000010000 x19: 0000000000000001 x18: ffffffc01ce1b078
[ 1150.457181] x17: 430e0000071d4992 x16: 0001400000000000 x15: 000000008e83ddb6
[ 1150.457184] x14: 00000000ab032363 x13: 00000000d860b6cd x12: ffffff88661d4230
[ 1150.457187] x11: ffffffef93f46de8 x10: 0000000000000018 x9 : 0000000000000080
[ 1150.457190] x8 : 00000000000000c0 x7 : 0000000000000000 x6 : ffffffef910a04c0
[ 1150.457192] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000002
[ 1150.457195] x2 : 0000000000000000 x1 : ffffffef92f6e370 x0 : 0000000000000005
[ 1150.457198] Call trace:
[ 1150.457199] percpu_ref_get_many+0x2c/0x104
[ 1150.457202] refill_obj_stock.llvm.3785610425212570905+0xa8/0x24c
[ 1150.457208] memcg_slab_free_hook+0x138/0x1a8
[ 1150.457212] kmem_cache_free+0x14c/0x378
[ 1150.457214] unlink_anon_vmas+0x60/0x1e0
[ 1150.457217] free_pgtables+0x54/0x190
[ 1150.457219] unmap_region+0xfc/0x148
[ 1150.457223] __do_munmap+0x588/0x79c
[ 1150.457225] __vm_munmap.llvm.11919687734878218350+0x88/0x174
[ 1150.457228] __arm64_sys_munmap+0x44/0x5c
[ 1150.457231] invoke_syscall+0x60/0x150
[ 1150.457235] el0_svc_common.llvm.8274709215075016746+0xc8/0x114
[ 1150.457237] do_el0_svc+0x28/0xa0
[ 1150.457239] el0_svc+0x28/0x90
[ 1150.457243] el0t_64_sync_handler+0x88/0xec
[ 1150.457246] el0t_64_sync+0x1b4/0x1b8
[ 1150.457250] Code: 9402597f b0014e20 91196000 97fb46b8 (f8bfc289)
I parsed the information with the tool crash. The invalid address problem happened on the line percpu_ptr = READ_ONCE(ref->percpu_count_ptr); as below:
/*
* Internal helper. Don't use outside percpu-refcount proper. The
* function doesn't return the pointer and let the caller test it for NULL
* because doing so forces the compiler to generate two conditional
* branches as it can't assume that #ref->percpu_count is not NULL.
*/
static inline bool __ref_is_percpu(struct percpu_ref *ref,
unsigned long __percpu **percpu_countp)
{
unsigned long percpu_ptr;
/*
* The value of #ref->percpu_count_ptr is tested for
* !__PERCPU_REF_ATOMIC, which may be set asynchronously, and then
* used as a pointer. If the compiler generates a separate fetch
* when using it as a pointer, __PERCPU_REF_ATOMIC may be set in
* between contaminating the pointer value, meaning that
* READ_ONCE() is required when fetching it.
*
* The dependency ordering from the READ_ONCE() pairs
* with smp_store_release() in __percpu_ref_switch_to_percpu().
*/
percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
/*
* Theoretically, the following could test just ATOMIC; however,
* then we'd have to mask off DEAD separately as DEAD may be
* visible without ATOMIC if we race with percpu_ref_kill(). DEAD
* implies ATOMIC anyway. Test them together.
*/
if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
return false;
*percpu_countp = (unsigned long __percpu *)percpu_ptr;
return true;
}
Another case happend on line if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD)).
pointer variable ref comes from an address &objcg->refcnt as blow:
static inline void obj_cgroup_get(struct obj_cgroup *objcg)
{
percpu_ref_get(&objcg->refcnt);
}
So it cann't be invalid. Why did invalid address problem happened?

Related

Freeing allocated memory linux kernel device driver module

I am writing code for a linux kernel module that allocates space and stores some data in it, but the kmalloc allocation happens in the write function for the vfs api as i need the size of the buffer coming from the user application and i cannot access it outside the write function. where should i place the kfree() function? i cannot place it in under cleanup because it gives me an error whenever i try to uninstall the module.
ssize_t hcsr04_write(struct file *filp, const char *buffer, size_t length, loff_t * offset)
{
if (pulsecount < (5)){
pulseptr[pulsecount] = kmalloc(sizeof(buffer),GFP_ATOMIC);
sprintf (pulseptr[pulsecount],"%s",buffer);
pulsecount++;
}
else{
int j = 0;
while (j<4){
sprintf (pulseptr[j], "%s", (pulseptr[j+1]) ); // [5 , 20 , 30 , 70 , 50] ===> [20 , 30 , 70 , 50 , 50]
j++;
}
sprintf (pulseptr[4],"%s",buffer);
}
}
this is my write function.
static void __exit hcsr04_module_cleanup(void)
{
//if (pulseptr!= {NULL,NULL,NULL,NULL,NULL}){
kfree(pulseptr);
printk(KERN_INFO "Dynamic memory freed successfully.");
//}
//pulseptr = {NULL,NULL,NULL,NULL,NULL};
gpio_free( GPIO_OUT );
gpio_free( GPIO_IN );
hcsr04_lock = 0;
cdev_del(&hcsr04_cdev);
unregister_chrdev_region( hcsr04_dev, 1 );
kobject_put( hcsr04_kobject );
}
this is the cleanup function. if i execute rmmod command with the cleanup function like this i get the following error :
[ 93.294821] 8<--- cut here ---
[ 93.297928] Unable to handle kernel paging request at virtual address bcf03574
[ 93.305253] pgd = 3cdbb3d3
[ 93.307993] [bcf03574] *pgd=00000000
[ 93.311621] Internal error: Oops: 5 [#1] SMP ARM
[ 93.316301] Modules linked in: hcsr04(O-) nfc bnep bluetooth ecdh_generic ecc ipv6 hello(PO) g_serial libcomposite udc_core brcmfmac brcmutil sha256_generic libsha256 vc4 cfg80211 bcm2835_codec(C) rfkill bcm2835_isp(C) bcm2835_v4l2(C) v4l2_mem2mem cec bcm2835_mmal_vchiq(C) videobuf2_dma_contig snd_soc_core videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 snd_compress snd_pcm_dmaengine videobuf2_common snd_pcm videodev snd_timer mc snd raspberrypi_hwmon vc_sm_cma(C) uio_pdrv_genirq uio fixed
[ 93.360563] CPU: 0 PID: 478 Comm: rmmod Tainted: P C O 5.4.72-v7 #1
[ 93.368060] Hardware name: BCM2835
[ 93.371516] PC is at kfree+0x48/0x2bc
[ 93.375235] LR is at hcsr04_module_cleanup+0x18/0xcac [hcsr04]
[ 93.381148] pc : [<802fd004>] lr : [<7f17a36c>] psr: 20010013
[ 93.387501] sp : b80abf08 ip : b80abf38 fp : b80abf34
[ 93.392797] r10: 00000081 r9 : b80aa000 r8 : 801011c4
[ 93.398095] r7 : 7f17a36c r6 : 7e92dc38 r5 : 7f17c000 r4 : bcf03570
[ 93.404713] r3 : bab24000 r2 : 00000024 r1 : 00000000 r0 : 7f17c000
[ 93.411333] Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user
[ 93.418568] Control: 10c5383d Table: 38d5806a DAC: 00000055
[ 93.424394] Process rmmod (pid: 478, stack limit = 0x274b04e4)
[ 93.430307] Stack: (0xb80abf08 to 0xb80ac000)
[ 93.434728] bf00: 801c8158 80989110 7f17c0c0 00000000 7e92dc38 00000081
[ 93.443026] bf20: 801011c4 b80aa000 b80abf4c b80abf38 7f17a36c 802fcfc8 7f17c0c0 00000000
[ 93.451324] bf40: b80abfa4 b80abf50 801ca940 7f17a360 72736368 00003430 00000000 80da6b10
[ 93.459621] bf60: 80101068 7e92cbe8 00000000 801011c4 b80aa000 00000006 b80abfac b80abf88
[ 93.467918] bf80: 80228294 8022938c 00000000 80e05f88 00000000 7e92de2c 00000000 b80abfa8
[ 93.476216] bfa0: 80101000 801ca7e0 00000000 7e92de2c 01677694 00000800 6a0f1b00 7e92dbe4
[ 93.484513] bfc0: 00000000 7e92de2c 7e92dc38 00000081 7e92df1f 7e92dc34 01677190 00000001
[ 93.492811] bfe0: 004e1f70 7e92dbec 004c866f 76f37218 60010030 01677694 00000000 00000000
[ 93.501100] Backtrace:
[ 93.503593] [<802fcfbc>] (kfree) from [<7f17a36c>] (hcsr04_module_cleanup+0x18/0xcac [hcsr04])
[ 93.512331] r9:b80aa000 r8:801011c4 r7:00000081 r6:7e92dc38 r5:00000000 r4:7f17c0c0
[ 93.520195] [<7f17a354>] (hcsr04_module_cleanup [hcsr04]) from [<801ca940>] (sys_delete_module+0x16c/0x244)
[ 93.530073] r5:00000000 r4:7f17c0c0
[ 93.533704] [<801ca7d4>] (sys_delete_module) from [<80101000>] (ret_fast_syscall+0x0/0x28)
[ 93.542084] Exception stack(0xb80abfa8 to 0xb80abff0)
[ 93.547209] bfa0: 00000000 7e92de2c 01677694 00000800 6a0f1b00 7e92dbe4
[ 93.555507] bfc0: 00000000 7e92de2c 7e92dc38 00000081 7e92df1f 7e92dc34 01677190 00000001
[ 93.563801] bfe0: 004e1f70 7e92dbec 004c866f 76f37218
[ 93.568922] r5:7e92de2c r4:00000000
[ 93.572549] Code: e3a02024 e5933000 e1a04624 e0243492 (e5943004)
[ 93.578730] ---[ end trace cff8773499967501 ]---
the error goes away once i comment out kfree
I Realised what i did wrong. I shouldve done kfree(pulseptr[0]) where 0 can be the index number of the memory locations i have allocated.

PCI driver 'Oops: Kernel access of bad area' error

I wanted to write a simple PCI express driver for Xilinx FPGA. But I am not able to request memory region for PCI.
Question is: How to claim that I/O memory area for custom driver. I want to write 3. byte of that area using driver.
Below are the details. What am I missing ? Thanks
1-) I am getting this error:
[ 4.345350] Unable to handle kernel paging request for data at address 0x00000005
[ 4.353978] Faulting instruction address: 0x80000000002c9370
[ 4.358337] Oops: Kernel access of bad area, sig: 11 [#1]
[ 4.362426] BE SMP NR_CPUS=24 CoreNet Generic
[ 4.365477] Modules linked in: fpgapcie(O+) ucc_uart
[ 4.369139] CPU: 0 PID: 1999 Comm: udevd Tainted: G O 4.19.26+gc0c2141 #1
[ 4.375924] NIP: 80000000002c9370 LR: 80000000002c9350 CTR: c00000000053acfc
[ 4.381753] REGS: c0000001ee2bb1c0 TRAP: 0300 Tainted: G O (4.19.26+gc0c2141)
[ 4.389146] MSR: 000000008002b000 <CE,EE,FP,ME> CR: 22228242 XER: 20000000
[ 4.394982] DEAR: 0000000000000005 ESR: 0000000000800000 IRQMASK: 0
GPR00: 80000000002c9350 c0000001ee2bb440 80000000002d1f00 000000000000001a
GPR04: 0000000000000001 000000000000022d c000000000f30548 c000000001013000
GPR08: 00000001fec37000 0000000000000003 0000000000000000 0000000000000020
GPR12: 0000000028228444 c000000001013000 0000000000020000 000000013c323ac8
GPR16: 000000013c323ae0 80000000002cc000 c000000000a194b0 c0000001f0eaa1c0
GPR20: 00000000006000c0 c000000000ed9da0 0000000000000000 0000000000000100
GPR24: 000000000000001c 000000000f700000 c0000001f3034880 0000000000000000
GPR28: c0000001f337b800 00000000000000f7 c0000001f337b8a0 0000000000000000
2-) Code piece in PCI probe function:
static int pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
int ret, minor;
struct cdev *cdev;
dev_t devno;
unsigned long pci_io_addr = 0;
/* add this pci device in pci_cdev */
if ((minor = pci_cdev_add(pci_cdev, MAX_DEVICE, dev)) < 0)
goto error;
/* compute major/minor number */
devno = MKDEV(major, minor);
/* allocate struct cdev */
cdev = cdev_alloc();
/* initialise struct cde
cdev_init(cdev, &pci_ops);
cdev->owner = THIS_MODULE;
/* register cdev */
ret = cdev_add(cdev, devno, 1);
if (ret < 0) {
dev_err(&(dev->dev), "Can't register character device\n");
goto error;
}
pci_cdev[minor].cdev = cdev;
dev_info(&(dev->dev), "%s The major device number is %d (%d).\n",
"Registeration is a success", MAJOR(devno), MINOR(devno));
dev_info(&(dev->dev), "If you want to talk to the device driver,\n");
dev_info(&(dev->dev), "you'll have to create a device file. \n");
dev_info(&(dev->dev), "We suggest you use:\n");
dev_info(&(dev->dev), "mknod %s c %d %d\n", DEVICE_NAME, MAJOR(devno), MINOR(devno));
dev_info(&(dev->dev), "The device file name is important, because\n");
dev_info(&(dev->dev), "the ioctl program assumes that's the\n");
dev_info(&(dev->dev), "file you'll use.\n");
/* enable the device */
pci_enable_device(dev);
/* 'alloc' IO to talk with the card */
if (pci_request_region(dev, BAR_IO, "IO-pci") == 0) {
printk(KERN_ALERT "The memory you requested from fpgapcie is already reserved by CORE pci driver.");
}
check that BAR_IO is *really* IO region
if ((pci_resource_flags(dev, BAR_IO) & IORESOURCE_IO) != IORESOURCE_IO) {
dev_err(&(dev->dev), "BAR2 isn't an IO region\n");
cdev_del(cdev);
goto error;
}
pci_io_addr = pci_resource_start(dev,BAR_IO);
printk(KERN_INFO "PCI start adress: %02X", &pci_io_addr);
outb(pci_io_addr+3, 5);
printk(KERN_INFO "Message from PCI device to user: 5");
return 1;
error:
printk(KERN_INFO "An error occuder while probing pci");
return 0;
}
3-) lspci -v output:
0001:01:00.0 Memory controller: Xilinx Corporation Device 7021
Subsystem: Xilinx Corporation Device 0007
Flags: bus master, fast devsel, latency 0, IRQ 41
Memory at c10000000 (32-bit, non-prefetchable) [size=2K]
Capabilities: [40] Power Management version 3
Capabilities: [48] MSI: Enable- Count=1/1 Maskable- 64bit+
Capabilities: [60] Express Endpoint, MSI 00
Capabilities: [100] Device Serial Number 00-00-00-01-01-00-0a-35
Kernel driver in use: yusufpci
Kernel modules: fpgapcie
4-) full dmesg:
[ 4.285663] Module pci init
[ 4.294787] yusufpci 0001:01:00.0: Registeration is a success The major device number is 247 (0).
[ 4.302367] yusufpci 0001:01:00.0: If you want to talk to the device driver,
[ 4.308116] yusufpci 0001:01:00.0: you'll have to create a device file.
[ 4.313516] yusufpci 0001:01:00.0: We suggest you use:
[ 4.317354] yusufpci 0001:01:00.0: mknod virtual_pci c 247 0
[ 4.321713] yusufpci 0001:01:00.0: The device file name is important, because
[ 4.327553] yusufpci 0001:01:00.0: the ioctl program assumes that's the
[ 4.332866] yusufpci 0001:01:00.0: file you'll use.
[ 4.336459] The memory you requested from fpgapcie is already reserved by CORE pci driver. This is not an error.
[ 4.336463] PCI start adress: EE2BB4B0
[ 4.345350] Unable to handle kernel paging request for data at address 0x00000005
[ 4.353978] Faulting instruction address: 0x80000000002c9370
[ 4.358337] Oops: Kernel access of bad area, sig: 11 [#1]
[ 4.362426] BE SMP NR_CPUS=24 CoreNet Generic
[ 4.365477] Modules linked in: fpgapcie(O+) ucc_uart
[ 4.369139] CPU: 0 PID: 1999 Comm: udevd Tainted: G O 4.19.26+gc0c2141 #1
[ 4.375924] NIP: 80000000002c9370 LR: 80000000002c9350 CTR: c00000000053acfc
[ 4.381753] REGS: c0000001ee2bb1c0 TRAP: 0300 Tainted: G O (4.19.26+gc0c2141)
[ 4.389146] MSR: 000000008002b000 <CE,EE,FP,ME> CR: 22228242 XER: 20000000
[ 4.394982] DEAR: 0000000000000005 ESR: 0000000000800000 IRQMASK: 0
GPR00: 80000000002c9350 c0000001ee2bb440 80000000002d1f00 000000000000001a
GPR04: 0000000000000001 000000000000022d c000000000f30548 c000000001013000
GPR08: 00000001fec37000 0000000000000003 0000000000000000 0000000000000020
GPR12: 0000000028228444 c000000001013000 0000000000020000 000000013c323ac8
GPR16: 000000013c323ae0 80000000002cc000 c000000000a194b0 c0000001f0eaa1c0
GPR20: 00000000006000c0 c000000000ed9da0 0000000000000000 0000000000000100
GPR24: 000000000000001c 000000000f700000 c0000001f3034880 0000000000000000
GPR28: c0000001f337b800 00000000000000f7 c0000001f337b8a0 0000000000000000
[ 4.453632] NIP [80000000002c9370] .pci_probe+0x220/0x2b4 [fpgapcie]
[ 4.458680] LR [80000000002c9350] .pci_probe+0x200/0x2b4 [fpgapcie]
[ 4.463639] Call Trace:
[ 4.464775] [c0000001ee2bb440] [80000000002c9350] .pci_probe+0x200/0x2b4 [fpgapcie] (unreliable)
[ 4.472262] [c0000001ee2bb500] [c0000000004b77c8] .pci_device_probe+0x11c/0x1f4
[ 4.478270] [c0000001ee2bb5a0] [c000000000561ebc] .really_probe+0x26c/0x38c
[ 4.483927] [c0000001ee2bb640] [c0000000005621ac] .driver_probe_device+0x78/0x154
[ 4.490106] [c0000001ee2bb6d0] [c0000000005623d8] .__driver_attach+0x150/0x154
[ 4.496025] [c0000001ee2bb760] [c00000000055f424] .bus_for_each_dev+0x94/0xdc
[ 4.501856] [c0000001ee2bb800] [c0000000005615fc] .driver_attach+0x24/0x38
[ 4.507426] [c0000001ee2bb870] [c000000000560ec8] .bus_add_driver+0x264/0x2a4
[ 4.513258] [c0000001ee2bb910] [c000000000563384] .driver_register+0x88/0x178
[ 4.519089] [c0000001ee2bb990] [c0000000004b5a68] .__pci_register_driver+0x50/0x64
[ 4.525355] [c0000001ee2bba00] [80000000002c9564] .pci_init_module+0xc0/0x444 [fpgapcie]
[ 4.532144] [c0000001ee2bba80] [c0000000000020b4] .do_one_initcall+0x64/0x224
[ 4.537978] [c0000001ee2bbb50] [c0000000000f443c] .do_init_module+0x70/0x260
[ 4.543722] [c0000001ee2bbbf0] [c0000000000f6564] .load_module+0x1e6c/0x2400
[ 4.549467] [c0000001ee2bbd10] [c0000000000f6d28] .__se_sys_finit_module+0xcc/0x100
[ 4.555819] [c0000001ee2bbe30] [c0000000000006b0] system_call+0x60/0x6c
[ 4.561127] Instruction dump:
[ 4.562785] e86a8080 38810070 f9210070 4800041d e8410028 e9210070 3d420000 e94a8088
[ 4.569231] 39290003 5529063e e94a0000 7c0004ac <992a0005> 39200001 3d420000 992d0684
[ 4.575854] ---[ end trace 2d15cff7ba1b3255 ]---
Problem solved. But when I write the third byte of Memory Mapped area, FPGA programmed to answer with lighting its GPIO leds. I tried to write first 15 byte of MMIO but it did not work. The leds did not lighted. But the code stopped giving errors.
I also cannot read the bytes on MMIO space using readb() function. It's
giving
unrecoverable machine check error
Problem solved using this code.
pci_request_regions(dev, "fpgapcie");
pci_io_startaddr = pci_resource_start(dev,BAR_IO);
pci_io_endaddr = pci_resource_end(dev,BAR_IO);
pci_io_size = pci_resource_len(dev,BAR_IO);
printk(KERN_INFO "Region start: %lX, Region end: %lX, Size: % lX",pci_io_startaddr,pci_io_endaddr,pci_io_size);
pci_io_addr = ioremap(pci_io_startaddr, pci_io_endaddr);
printk(KERN_INFO "PCI start adress: %lX", pci_io_addr);
for(i = 0;i<15;i++) /* Write first 15 byte */
{
writeb(2, pci_io_addr+i);
printk(KERN_INFO "%lX, Message from PCI device to user: 2", pci_io_addr+i);
}
and the dmesg output:
fpgapcie: loading out-of-tree module taints kernel.
fpgapcie 0001:01:00.0: Registeration is a success The major device number is 247 (0).
fpgapcie 0001:01:00.0: If you want to talk to the device driver,
fsl-fman-port ffe488000.port fm1-gb0: renamed from eth0
fpgapcie 0001:01:00.0: you'll have to create a device file.
fpgapcie 0001:01:00.0: We suggest you use:
fpgapcie 0001:01:00.0: mknod virtual_pci c 247 0
fpgapcie 0001:01:00.0: The device file name is important, because
fpgapcie 0001:01:00.0: the ioctl program assumes that's the
fpgapcie 0001:01:00.0: file you'll use.
Region start: 210000000, Region end: 2100007FF, Size: 800
PCI start adress: 8000080088900000
8000080088900000, Message from PCI device to user: 2
8000080088900001, Message from PCI device to user: 2
8000080088900002, Message from PCI device to user: 2
8000080088900003, Message from PCI device to user: 2
8000080088900004, Message from PCI device to user: 2
8000080088900005, Message from PCI device to user: 2
8000080088900006, Message from PCI device to user: 2
8000080088900007, Message from PCI device to user: 2
8000080088900008, Message from PCI device to user: 2
8000080088900009, Message from PCI device to user: 2
800008008890000A, Message from PCI device to user: 2
800008008890000B, Message from PCI device to user: 2
800008008890000C, Message from PCI device to user: 2
800008008890000D, Message from PCI device to user: 2
800008008890000E, Message from PCI device to user: 2

What causes the kernel oops in the "make -j4" process

I am using an embedded board with 4 armv8 cores.(xilinx zynqmp)
I found that using make -j4 to compile software will appear kernel oops.
The stack of oops is almost the same,but the type of oops is different,i have seen three types:
undefined instrction
unable to handle kernel paging request
NULL pointer access
I don't understand why compile will cause kernel oops. Does the make/gcc program interact with the kernel? Please help analyze what causes this problem?
Thanks.
an oops messageļ¼š
[ 240.633378] Internal error: undefined instruction: 0 [#1] SMP
[ 240.633383] Unable to handle kernel paging request at virtual address ffffff8016df6480
[ 240.633386] Unable to handle kernel paging request at virtual address ffffff8016df6480
[ 240.633388] Unable to handle kernel paging request at virtual address ffffff8016df6480
[ 240.633394] Mem abort info:
[ 240.633395] Mem abort info:
[ 240.633396] Mem abort info:
[ 240.633398] Exception class = DABT (current EL), IL = 32 bits
[ 240.633399] Exception class = DABT (current EL), IL = 32 bits
[ 240.633401] Exception class = DABT (current EL), IL = 32 bits
[ 240.633402] SET = 0, FnV = 0
[ 240.633403] SET = 0, FnV = 0
[ 240.633405] SET = 0, FnV = 0
[ 240.633406] EA = 0, S1PTW = 0
[ 240.633407] EA = 0, S1PTW = 0
[ 240.633408] EA = 0, S1PTW = 0
[ 240.633409] Data abort info:
[ 240.633409] Data abort info:
[ 240.633410] Data abort info:
[ 240.633411] ISV = 0, ISS = 0x00000046
[ 240.633412] ISV = 0, ISS = 0x00000046
[ 240.633413] ISV = 0, ISS = 0x00000046
[ 240.633414] CM = 0, WnR = 1
[ 240.633415] CM = 0, WnR = 1
[ 240.633416] CM = 0, WnR = 1
[ 240.633419] swapper pgtable: 4k pages, 39-bit VAs, pgd = ffffff80090b1000
[ 240.633421] swapper pgtable: 4k pages, 39-bit VAs, pgd = ffffff80090b1000
[ 240.633424] swapper pgtable: 4k pages, 39-bit VAs, pgd = ffffff80090b1000
[ 240.633425] [ffffff8016df6480] *pgd=000000097fffe003
[ 240.633427] [ffffff8016df6480] *pgd=000000097fffe003
[ 240.633429] [ffffff8016df6480] *pgd=000000097fffe003
[ 240.633430] , *pud=000000097fffe003
[ 240.633431] , *pud=000000097fffe003
[ 240.633432] , *pud=000000097fffe003
[ 240.633433] , *pmd=0000000000000000
[ 240.633434] , *pmd=0000000000000000
[ 240.633435] , *pmd=0000000000000000
[ 240.792167] Modules linked in:
[ 240.795207] CPU: 0 PID: 4831 Comm: cc1plus Not tainted 4.14.0 #10
[ 240.801280] Hardware name: xlnx,zynqmp (DT)
[ 240.805447] task: ffffffc973db2000 task.stack: ffffff800ef88000
[ 240.811355] PC is at calc_global_load+0x1e0/0x1e8
[ 240.816039] LR is at do_timer+0x1c/0x28
[ 240.819856] pc : [<ffffff80080c2d68>] lr : [<ffffff80080f37c4>] pstate: a00001c5
[ 240.827233] sp : ffffff8008003df0
[ 240.830531] x29: ffffff8008003df0 x28: ffffffc973db2000
[ 240.835826] x27: ffffffc97ff6dfc0 x26: ffffffc97ff6dfc0
[ 240.841121] x25: ffffffc97ff6dfd0 x24: 0000003803af96c6
[ 240.846416] x23: 0000000000000001 x22: ffffff8008eac000
[ 240.851711] x21: ffffff8008eac000 x20: 0000003803afa045
[ 240.857006] x19: ffffff8008df6000 x18: 0000000000000001
[ 240.862300] x17: 0000007fb5be73b0 x16: 00000000012aec58
[ 240.867595] x15: 0000000000400000 x14: 0000000000007fff
[ 240.872890] x13: 00000000012c9000 x12: 0000007fb0b2e758
[ 240.878185] x11: ffffff8008df6000 x10: 0000000000000040
[ 240.883480] x9 : ffffff8008e0c8e8 x8 : ffffff8008e96000
[ 240.888775] x7 : ffffffc977800268 x6 : 00000000ffffca7b
[ 240.894069] x5 : 00ffffffffffffff x4 : 000000497718f000
[ 240.899364] x3 : 0000003803af7900 x2 : ffffff8008df6000
[ 240.904659] x1 : fffffffffffffb7f x0 : ffffff8008e96ab8
[ 240.909955] Process cc1plus (pid: 4831, stack limit = 0xffffff800ef88000)
[ 240.916724] Call trace:
[ 240.919157] Exception stack(0xffffff8008003cb0 to 0xffffff8008003df0)
[ 240.925580] 3ca0: ffffff8008e96ab8 fffffffffffffb7f
[ 240.933393] 3cc0: ffffff8008df6000 0000003803af7900 000000497718f000 00ffffffffffffff
[ 240.941205] 3ce0: 00000000ffffca7b ffffffc977800268 ffffff8008e96000 ffffff8008e0c8e8
[ 240.949017] 3d00: 0000000000000040 ffffff8008df6000 0000007fb0b2e758 00000000012c9000
[ 240.956829] 3d20: 0000000000007fff 0000000000400000 00000000012aec58 0000007fb5be73b0
[ 240.964641] 3d40: 0000000000000001 ffffff8008df6000 0000003803afa045 ffffff8008eac000
[ 240.972453] 3d60: ffffff8008eac000 0000000000000001 0000003803af96c6 ffffffc97ff6dfd0
[ 240.980265] 3d80: ffffffc97ff6dfc0 ffffffc97ff6dfc0 ffffffc973db2000 ffffff8008003df0
[ 240.988077] 3da0: ffffff80080f37c4 ffffff8008003df0 ffffff80080c2d68 00000000a00001c5
[ 240.995889] 3dc0: 0000000000000040 ffffffc976f6cd20 0000008000000000 ffffff8008003ec0
[ 241.003700] 3de0: ffffff8008003df0 ffffff80080c2d68
[ 241.008563] [<ffffff80080c2d68>] calc_global_load+0x1e0/0x1e8
[ 241.014293] [<ffffff80081000b4>] tick_do_update_jiffies64.part.2+0x6c/0x118
[ 241.021236] [<ffffff80081001bc>] tick_sched_do_timer+0x5c/0x68
[ 241.027051] [<ffffff8008100248>] tick_sched_timer+0x30/0x90
[ 241.032607] [<ffffff80080f0b3c>] __hrtimer_run_queues+0xec/0x168
[ 241.038595] [<ffffff80080f0db0>] hrtimer_interrupt+0xa0/0x220
[ 241.044327] [<ffffff80087af788>] arch_timer_handler_phys+0x28/0x48
[ 241.050489] [<ffffff80080e0798>] handle_percpu_devid_irq+0x80/0x138
[ 241.056737] [<ffffff80080dafbc>] generic_handle_irq+0x24/0x38
[ 241.062465] [<ffffff80080db66c>] __handle_domain_irq+0x5c/0xb8
[ 241.068281] [<ffffff8008081500>] gic_handle_irq+0x68/0xc0
[ 241.073661] Exception stack(0xffffff800ef8bec0 to 0xffffff800ef8c000)
[ 241.080086] bec0: 00000000000036cf 0000000000000015 0000000000000001 0000000003c71808
[ 241.087898] bee0: 0000000003c717f8 0000000000008000 000000000393b8c8 0000000000008000
[ 241.095710] bf00: 000000000000006d 0000000000007fff ffffffffffffc000 0000000000000000
[ 241.103522] bf20: 0000007fb0b2e758 00000000012c9000 0000000000007fff 0000000000400000
[ 241.111334] bf40: 00000000012aec58 0000007fb5be73b0 0000000000000001 0000000000000001
[ 241.119146] bf60: 00000000000036ce 00000000039f0820 0000000000000000 0000000003b99de8
[ 241.126958] bf80: 0000000000000003 0000000003b9a268 00000000000036ce 00000000012d4000
[ 241.134770] bfa0: 0000000000000042 0000007fc21c9290 0000000000798440 0000007fc21c9290
[ 241.142582] bfc0: 00000000007983ac 0000000060000000 0000000003c95000 00000000ffffffff
[ 241.150394] bfe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 241.158207] [<ffffff8008083614>] el0_irq_naked+0x44/0x4c
[ 241.163503] Code: d27a0003 d2800005 1707ffb4 d503201f (d67c03c0)
[ 241.169578] ---[ end trace 8c598a94062fb1cc ]---
[ 241.174178] Kernel panic - not syncing: Fatal exception in interrupt
[ 241.180514] SMP: stopping secondary CPUs
[ 242.251098] SMP: failed to stop secondary CPUs 0-3
[ 242.255793] Kernel Offset: disabled
[ 242.259266] CPU features: 0x002004
[ 242.262650] Memory Limit: none
[ 242.265690] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

How to analyze and debug a Linux kernel panic which occurs while running an application?

I'm getting kernel panics while running linux application on my custom hardware design. But another custom hardware which is similar to this can run this linux application without any problem. I did the memory calibration and stress app test for this board. But there was not any issue related to memory. So how can I debug this to clarify whether this is hardware or software problem. I could debug kernel and uboot using JTAG properly. But how can I pinpoint the issue for this kernel panic while running certain application ?
Here is the panic message (LOG)
[ 308.268356] Unable to handle kernel paging request at virtual address
54494156
[ 308.275647] pgd = 80004000
[ 308.278365] [54494156] *pgd=00000000
[ 308.281977] Internal error: Oops: 80000005 [#1] PREEMPT SMP ARM
[ 308.287904] Modules linked in: tw6869 mxc_v4l2_capture ipu_bg_overlay_sdc
ipu_still ipu_prp_enc videobuf2_dma_contig ipu_csi_enc videobuf2_memops
adv7610_video ipu_fg_overlay_sdc v4l2_int_device galcore(O)
[ 308.306375] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G O 4.1.15-2.0.0-
ga+yocto+gff4e28b #1
[ 308.309584] tw6869 0000:01:00.0: tw6869_querystd: vch1: unknown std
detected
[ 308.322648] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[ 308.329184] task: ce11bc00 ti: ce14c000 task.ti: ce14c000
[ 308.334598] PC is at 0x54494156
[ 308.337766] LR is at cpuidle_enter_state+0x90/0x350
[ 308.342650] pc : [<54494156>] lr : [<806d59b0>] psr: 800b00b3
[ 308.342650] sp : ce14df30 ip : 00000017 fp : ce14df8c
[ 308.354132] r10: 80e02508 r9 : 80e08ad0 r8 : 80d9b880
[ 308.359361] r7 : 00000001 r6 : 80e88160 r5 : 00000047 r4 : c04b271c
[ 308.365892] r3 : 54494157 r2 : 00000000 r1 : 80e08a80 r0 : 80d9b880
[ 308.372430] Flags: Nzcv IRQs off FIQs on Mode SVC_32 ISA Thumb Segment
kernel
[ 308.380004] Control: 10c5387d Table: 5f38404a DAC: 00000015
[ 308.385755] Process swapper/3 (pid: 0, stack limit = 0xce14c210)
[ 308.391768] Stack: (0xce14df30 to 0xce14e000)
[ 308.396135] df20: 00000000 d0f2c140 c04b271c 00000047
[ 308.404333] df40: 00000000 00000000 004832f1 00000000 c04b271c 00000047
00000000 dc8ba30f
[ 308.412537] df60: 80d97320 ce14c000 80e025e8 80e88160 d0f2bed0 80e08a74
80a02970 ce14dfa0
[ 308.420738] df80: ce14df9c ce14df90 806d5cc0 806d592c ce14dfdc ce14dfa0
80171a74 806d5ca8
[ 308.428939] dfa0: 80d9b880 80d9b880 00000000 80e02e00 80e025f0 80e878ea
00000001 80c09164
[ 308.437140] dfc0: 80d9aec8 80d97300 10c0387d 80e8d35c ce14dff4 ce14dfe0
8010fae0 801717d8
[ 308.445340] dfe0: 5e12c06a 00000015 00000000 ce14dff8 1010162c 8010f994
00000000 00000000
[ 308.453529] Backtrace:
[ 308.456036] [<806d5920>] (cpuidle_enter_state) from [<806d5cc0>]
(cpuidle_enter+0x24/0x28)
[ 308.464320] r10:ce14dfa0 r9:80a02970 r8:80e08a74 r7:d0f2bed0 r6:80e88160
r5:80e025e8
[ 308.472277] r4:ce14c000
[ 308.474868] [<806d5c9c>] (cpuidle_enter) from [<80171a74>]
(cpu_startup_entry+0x2a8/0x43c)
[ 308.483171] [<801717cc>] (cpu_startup_entry) from [<8010fae0>]
(secondary_start_kernel+0x158/0x164)
[ 308.492232] r7:80e8d35c
[ 308.494813] [<8010f988>] (secondary_start_kernel) from [<1010162c>]
(0x1010162c)
[ 308.502223] r5:00000015 r4:5e12c06a
[ 308.505862] Code: bad PC value
[ 308.508940] ---[ end trace 1d6a0d2b2019793f ]---
[ 308.513579] Kernel panic - not syncing: Attempted to kill the idle task!
[ 308.520313] CPU1: stopping
[ 308.523060] CPU: 1 PID: 37 Comm: cfinteractive Tainted: G D O 4.1.15-
2.0.0-ga+yocto+gff4e28b #1
[ 308.532731] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[ 308.539274] Backtrace:
[ 308.541792] [<8010c358>] (dump_backtrace) from [<8010c5d4>]
(show_stack+0x20/0x24)
[ 308.549379] r7:80e87b48 r6:80e45ca0 r5:00000000 r4:80e45ca0
[ 308.555168] [<8010c5b4>] (show_stack) from [<80919780>]
(dump_stack+0x7c/0xbc)
[ 308.562421] [<80919704>] (dump_stack) from [<80110038>]
(handle_IPI+0x2e4/0x2f8)
[ 308.569831] r7:80e87b48 r6:00000005 r5:80d99c34 r4:80d99c34
[ 308.575608] [<8010fd54>] (handle_IPI) from [<80101594>]
(gic_handle_irq+0x68/0x6c)
[ 308.583192] r10:00000002 r9:ce020000 r8:00000001 r7:f4a00100 r6:ce321b28
r5:80e02f7c
[ 308.591152] r4:f4a0010c r3:ce321b28
[ 308.594795] [<8010152c>] (gic_handle_irq) from [<8010d240>]
(__irq_svc+0x40/0x74)
[ 308.602295] Exception stack(0xce321b28 to 0xce321b70)
[ 308.607371] 1b20: 00000001 80bbbbd4 80e8f9c0 00000000 ffffe000 80e8f9c0
[ 308.615572] 1b40: 00000010 00000000 00000001 ce020000 00000002 ce321bd4
ce321b60 ce321b70
[ 308.623769] 1b60: 80493dc0 80137668 200f0113 ffffffff
[ 308.628833] r7:ce321b5c r6:ffffffff r5:200f0113 r4:80137668
[ 308.634617] [<801375b4>] (__do_softirq) from [<80137c20>]
(irq_exit+0xd0/0x10c)
[ 308.641940] r10:ce321c40 r9:ce020000 r8:00000001 r7:00000000 r6:00000010
r5:00000000
[ 308.649901] r4:80d99c34
[ 308.652491] [<80137b50>] (irq_exit) from [<8017beb4>]
(__handle_domain_irq+0x90/0xfc)
[ 308.660335] r5:00000000 r4:80d99c34
[ 308.663984] [<8017be24>] (__handle_domain_irq) from [<80101560>]
(gic_handle_irq+0x34/0x6c)
[ 308.672348] r10:00000004 r9:ce321d90 r8:00000001 r7:f4a00100 r6:ce321c40
r5:80e02f7c
[ 308.680311] r4:f4a0010c r3:ce321c40
[ 308.683954] [<8010152c>] (gic_handle_irq) from [<8010d240>]
(__irq_svc+0x40/0x74)
[ 308.691453] Exception stack(0xce321c40 to 0xce321c88)
[ 308.696528] 1c40: 00000003 d0f2f89c 00000003 00000003 80e02650 d0f13180
80e02e00 d0f13184
[ 308.704729] 1c60: 00000001 ce321d90 00000004 ce321cbc 00000003 ce321c88
801a739c 801a73cc
[ 308.712923] 1c80: 200f0013 ffffffff
[ 308.716425] r7:ce321c74 r6:ffffffff r5:200f0013 r4:801a73cc
[ 308.722208] [<801a715c>] (smp_call_function_many) from [<801a7468>]
(smp_call_function+0x48/0x88)
[ 308.731093] r10:ffffffff r9:ce321d88 r8:00000000 r7:00000000 r6:00000001
r5:ce321d90
[ 308.739054] r4:80110990
[ 308.741634] [<801a7420>] (smp_call_function) from [<801a74e0>]
(on_each_cpu+0x38/0x90)
[ 308.749564] r7:00000000 r6:00000001 r5:ce321d90 r4:80110990
[ 308.755342] [<801a74a8>] (on_each_cpu) from [<80110d2c>]
(twd_rate_change+0x38/0x40)
[ 308.763099] r7:00000000 r6:00000002 r5:ce321d88 r4:ffffffff
[ 308.768890] [<80110cf4>] (twd_rate_change) from [<801523c4>]
(notifier_call_chain+0x54/0x94)
[ 308.777357] [<80152370>] (notifier_call_chain) from [<80152894>]
(__srcu_notifier_call_chain+0x54/0x70)
[ 308.786763] r9:ce321d88 r8:00000002 r7:00000000 r6:00000000 r5:ce0a6184
r4:ce0a619c
[ 308.794650] [<80152840>] (__srcu_notifier_call_chain) from [<801528d8>]
(srcu_notifier_call_chain+0x28/0x30)
[ 308.804491] r10:00000000 r9:80e8a384 r8:00000002 r7:80e02508 r6:ce026000
r5:80e73b44
[ 308.812451] r4:ce0a6180
[ 308.815042] [<801528b0>] (srcu_notifier_call_chain) from [<80746114>]
(__clk_notify+0xa0/0xa8)
[ 308.823683] [<80746074>] (__clk_notify) from [<807461c4>]
(__clk_recalc_rates+0xa8/0xac)
[ 308.831786] r8:ce003f80 r7:00000001 r6:1daee080 r5:00000002 r4:ce026000
[ 308.838630] [<8074611c>] (__clk_recalc_rates) from [<80746198>]
(__clk_recalc_rates+0x7c/0xac)
[ 308.847254] r7:00000001 r6:3b5dc100 r5:00000002 r4:ce026000
[ 308.853030] [<8074611c>] (__clk_recalc_rates) from [<80746198>]
(__clk_recalc_rates+0x7c/0xac)
[ 308.861654] r7:00000001 r6:3b5dc100 r5:00000002 r4:ce026a80
[ 308.867438] [<8074611c>] (__clk_recalc_rates) from [<80748b50>]
(clk_core_set_parent+0x1bc/0x2dc)
[ 308.876324] r7:00000001 r6:00000000 r5:ce026380 r4:ce026400
[ 308.882102] [<80748994>] (clk_core_set_parent) from [<80748c9c>]
(clk_set_parent+0x2c/0x30)
[ 308.890467] r9:00000000 r8:179a7b00 r7:80e02508 r6:000f32a0 r5:00060ae0
r4:80eec6ac
[ 308.898357] [<80748c70>] (clk_set_parent) from [<806d4220>]
(imx6q_set_target+0x490/0x544)
[ 308.906653] [<806d3d90>] (imx6q_set_target) from [<806ca034>]
(__cpufreq_driver_target+0x184/0x2b0)
[ 308.915712] r10:00000000 r9:00000000 r8:80eec5e4 r7:00000000 r6:80e02508
r5:ce66b500
[ 308.923671] r4:00000000
[ 308.926253] [<806c9eb0>] (__cpufreq_driver_target) from [<806d2d44>]
(cpufreq_interactive_speedchange_task+0x264/0x354)
[ 308.937048] r10:80d9ae10 r9:80e02650 r8:80e02e00 r7:00000000 r6:000f32a0
r5:80d9ae10
[ 308.945007] r4:d0f1ee10
[ 308.947591] [<806d2ae0>] (cpufreq_interactive_speedchange_task) from
[<801514e4>] (kthread+0xfc/0x114)
[ 308.956911] r10:00000000 r9:00000000 r8:00000000 r7:806d2ae0 r6:00000000
r5:ce303480
[ 308.964870] r4:00000000
[ 308.967456] [<801513e8>] (kthread) from [<80108028>]
(ret_from_fork+0x14/0x2c)
[ 308.974692] r7:00000000 r6:00000000 r5:801513e8 r4:ce303480
[ 308.980460] CPU0: stopping
[ 308.983202] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G D O 4.1.15-2.0.0-
ga+yocto+gff4e28b #1
[ 308.992439] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[ 308.998981] Backtrace:
[ 309.001488] [<8010c358>] (dump_backtrace) from [<8010c5d4>]
(show_stack+0x20/0x24)
[ 309.009073] r7:80e87b48 r6:80e45ca0 r5:00000000 r4:80e45ca0
[ 309.014859] [<8010c5b4>] (show_stack) from [<80919780>]
(dump_stack+0x7c/0xbc)
[ 309.022112] [<80919704>] (dump_stack) from [<80110038>]
(handle_IPI+0x2e4/0x2f8)
[ 309.029522] r7:80e87b48 r6:00000005 r5:80d99c34 r4:80d99c34
[ 309.035298] [<8010fd54>] (handle_IPI) from [<80101594>]
(gic_handle_irq+0x68/0x6c)
[ 309.042881] r10:80e02508 r9:80e08b14 r8:d0f04ed0 r7:f4a00100 r6:80e01e98
r5:80e02f7c
[ 309.050843] r4:f4a0010c r3:80e01e98
[ 309.054488] [<8010152c>] (gic_handle_irq) from [<8010d240>]
(__irq_svc+0x40/0x74)
[ 309.061987] Exception stack(0x80e01e98 to 0x80e01ee0)
[ 309.067058] 1e80: 00000000 d0f084c0
[ 309.075261] 1ea0: dc8ba30f dc8ba30f cef8dbc4 00000047 80e88160 00000004
d0f04ed0 80e08b14
[ 309.083462] 1ec0: 80e02508 80e01f3c 80e01e80 80e01ee0 8091f2bc 806d59e0
600f0013 ffffffff
[ 309.091652] r7:80e01ecc r6:ffffffff r5:600f0013 r4:806d59e0
[ 309.097430] [<806d5920>] (cpuidle_enter_state) from [<806d5cc0>]
(cpuidle_enter+0x24/0x28)
[ 309.105708] r10:80e01f50 r9:80a02970 r8:80e08a74 r7:d0f04ed0 r6:80e88160
r5:80e025e8
[ 309.113667] r4:80e00000
[ 309.116248] [<806d5c9c>] (cpuidle_enter) from [<80171a74>]
(cpu_startup_entry+0x2a8/0x43c)
[ 309.124540] [<801717cc>] (cpu_startup_entry) from [<80917448>]
(rest_init+0x98/0x9c)
[ 309.132297] r7:80e02500
[ 309.134893] [<809173b0>] (rest_init) from [<80d00d9c>]
(start_kernel+0x404/0x424)
[ 309.142390] r5:80e8d000 r4:80e8d04c
[ 309.146037] [<80d00998>] (start_kernel) from [<1000807c>] (0x1000807c)
[ 309.152583] CPU2: stopping
[ 309.155327] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G D O 4.1.15-2.0.0-
ga+yocto+gff4e28b #1
[ 309.164565] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[ 309.171106] Backtrace:
[ 309.173611] [<8010c358>] (dump_backtrace) from [<8010c5d4>]
(show_stack+0x20/0x24)
[ 309.181196] r7:80e87b48 r6:80e45ca0 r5:00000000 r4:80e45ca0
[ 309.186977] [<8010c5b4>] (show_stack) from [<80919780>]
(dump_stack+0x7c/0xbc)
[ 309.194230] [<80919704>] (dump_stack) from [<80110038>]
(handle_IPI+0x2e4/0x2f8)
[ 309.201639] r7:80e87b48 r6:00000005 r5:80d99c34 r4:80d99c34
[ 309.207417] [<8010fd54>] (handle_IPI) from [<80101594>]
(gic_handle_irq+0x68/0x6c)
[ 309.215001] r10:80e02508 r9:80e08b14 r8:d0f1eed0 r7:f4a00100 r6:ce14bee8
r5:80e02f7c
[ 309.222960] r4:f4a0010c r3:ce14bee8
[ 309.226604] [<8010152c>] (gic_handle_irq) from [<8010d240>]
(__irq_svc+0x40/0x74)
[ 309.234103] Exception stack(0xce14bee8 to 0xce14bf30)
[ 309.239179] bee0: 00000000 d0f224c0 dc8ba30f dc8ba30f ce933307 00000047
[ 309.247383] bf00: 80e88160 00000004 d0f1eed0 80e08b14 80e02508 ce14bf8c
ce14bed0 ce14bf30
[ 309.255578] bf20: 8091f2bc 806d59e0 600f0013 ffffffff
[ 309.260643] r7:ce14bf1c r6:ffffffff r5:600f0013 r4:806d59e0
[ 309.266420] [<806d5920>] (cpuidle_enter_state) from [<806d5cc0>]
(cpuidle_enter+0x24/0x28)
[ 309.274698] r10:ce14bfa0 r9:80a02970 r8:80e08a74 r7:d0f1eed0 r6:80e88160
r5:80e025e8
[ 309.282656] r4:ce14a000
[ 309.285237] [<806d5c9c>] (cpuidle_enter) from [<80171a74>]
(cpu_startup_entry+0x2a8/0x43c)
[ 309.293528] [<801717cc>] (cpu_startup_entry) from [<8010fae0>]
(secondary_start_kernel+0x158/0x164)
[ 309.302587] r7:80e8d35c
[ 309.305168] [<8010f988>] (secondary_start_kernel) from [<1010162c>]
(0x1010162c)
[ 309.312579] r5:00000015 r4:5e12c06a
[ 309.316221] ---[ end Kernel panic - not syncing: Attempted to kill the
idle task!
I must be thankful to you if you can guide me on this issue. Are there any methods for hardware debugging to pinpoint the issues related to this while running applications on linux ?
Regards,
Kulunu.

Linux ARM Abort oops from user space

On a ARM Cortex-A9 (Freescale iMX6SL) running Linux kernel 3.0.35, I am seeing a kernel oops with PC and LR (0x402aca32/0x402ac3cd) that is in user space. Mode is USER_32 and ISA is Thumb. There is no code on this system that executes in Thumb mode.
[ 597.195954] Unable to handle kernel paging request at virtual address 000a34d4
[ 597.205436] pgd = c35dc000
[ 597.208149] [000a34d4] *pgd=8c454831, *pte=8374c1cf, *ppte=8374ca3e
[ 597.214657] Internal error: Oops: 81f [#1] PREEMPT
[ 597.219609] Modules linked in: ...<snip>...
[ 597.243075] CPU: 0 Tainted: P W (3.0.35-aaaaaa #1)
[ 597.249162] PC is at 0x402aca32
[ 597.252304] LR is at 0x402ac3cd
[ 597.255448] pc : [<402aca32>] lr : [<402ac3cd>] psr: 60000030
[ 597.255453] sp : be8fc220 ip : 00000000 fp : 00000809
[ 597.266940] r10: 00000004 r9 : 40336ea0 r8 : 00000818
[ 597.272168] r7 : 4034c25c r6 : 00011b31 r5 : 00001250 r4 : 000a2cc8
[ 597.278698] r3 : 00000000 r2 : 000a34d0 r1 : 00011b30 r0 : 00000809
[ 597.285229] Flags: nZCv IRQs on FIQs on Mode USER_32 ISA Thumb Segment user
[ 597.292629] Control: 10c53c7d Table: 835dc059 DAC: 00000015
[ 597.298378] Process wancontrol (pid: 7551, stack limit = 0xce9f02e8)
[ 597.307890] ---[ end trace f50414d2a3d239df ]---
[ 597.312516] Kernel panic - not syncing: Fatal exception in interrupt
[ 597.325257] Backtrace:
[ 597.327567] [<c0135248>] (dump_backtrace+0x0/0x110) from [<c041e188>] (dump_stack+0x18/0x1c)
[ 597.336837] r6:c3088d20 r5:ce9f02e8 r4:c0537b48 r3:00000002
[ 597.342382] [<c041e170>] (dump_stack+0x0/0x1c) from [<c041e200>] (panic+0x74/0x194)
[ 597.350794] [<c041e18c>] (panic+0x0/0x194) from [<c01355b0>] (die+0x1a4/0x1e4)
[ 597.358402] r3:07ffff00 r2:ce9f1db8 r1:c0537f90 r0:c04ac8ba
[ 597.364044] r7:00000000
[ 597.366591] [<c013540c>] (die+0x0/0x1e4) from [<c013a7b0>] (__do_kernel_fault+0x6c/0x8c)
[ 597.375465] r8:00000000 r7:ce9f1fb0 r6:cee35900 r5:0000081f r4:000a34d4
[ 597.382060] [<c013a744>] (__do_kernel_fault+0x0/0x8c) from [<c013aa90>] (do_page_fault+0x2c0/0x2f0)
[ 597.391760] r8:cee35900 r7:000a34d4 r6:c3088d20 r5:ce9f1fb0 r4:00000001
[ 597.398438] r3:ce9f1fb0
[ 597.400909] [<c013a7d0>] (do_page_fault+0x0/0x2f0) from [<c012c1b8>] (do_DataAbort+0x38/0xa0)
[ 597.410178] [<c012c180>] (do_DataAbort+0x0/0xa0) from [<c0131a88>] (ret_from_exception+0x0/0x10)
[ 597.419298] Exception stack(0xce9f1fb0 to 0xce9f1ff8)
[ 597.424664] 1fa0: 00000809 00011b30 000a34d0 00000000
[ 597.434300] 1fc0: 000a2cc8 00001250 00011b31 4034c25c 00000818 40336ea0 00000004 00000809
[ 597.442488] 1fe0: 00000000 be8fc220 402ac3cd 402aca32 60000030 ffffffff
[ 597.455455] r8:00000818 r7:4034c25c r6:00011b31 r5:0000000f r4:0000040f
If code was executing in user space, it should get SEGV.
void arm_notify_die(const char *str, struct pt_regs *regs,
struct siginfo *info, unsigned long err, unsigned long trap)
{
if (user_mode(regs)) {
current->thread.error_code = err;
current->thread.trap_no = trap;
force_sig_info(info->si_signo, info, current);
} else {
die(str, regs, err);
}
}
Why does it go into die()?
This happens repeatedly with the same backtrace for the same address 0x000a34d4. I can't say the stack has been hosed because the values look the same in different instances of this kernel oops.

Resources