Debug stack from kernel module - linux

I write kernel module to debug user processes' stack's. I found a way to get pointer to stack using mm field from task_struct structure, but when I try read value from stack adress my module is crashed.
Code (for existing process with pid 860):
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/proc_fs.h>
struct task_struct *ed_task;
long int *val;
void *stack_p;
int i = 0;
static int __init init(void)
{
ed_task = pid_task(find_vpid(860), PIDTYPE_PID);
stack_p = (void *) ed_task->mm->start_stack;
printk("stack %d: %p", i, stack_p);
val = stack_p - sizeof(void *);
printk("stack %d value: %ld", i, *val);
printk(">");
return 0;
}
static void __exit modex(void)
{
}
module_init(init);
module_exit(modex);
MODULE_LICENSE("GPL");
Error:
[ 2714.296489] stack 0: 00007fffbd922e30
[ 2714.296495] BUG: unable to handle kernel paging request at 00007fffbd922e28
[ 2714.297844] IP: init+0x65/0x1000 [main3]
[ 2714.299017] PGD 10140d067
[ 2714.299019] P4D 10140d067
[ 2714.300188] PUD 0
[ 2714.303364] Oops: 0000 [#4] PREEMPT SMP
[ 2714.306523] Modules linked in: main3(O+) main1(O+) main(O+) main2(O+) rndis_host cdc_ether usbnet mii fuse rfcomm bnep nls_iso8859_1 nls_cp437 vfat fat intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm hp_wmi irqbypass iTCO_wdt mousedev sparse_keymap iTCO_vendor_support ppdev joydev mei_wdt btusb crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel btrtl pcbc btbcm aesni_intel btintel bluetooth evdev input_leds aes_x86_64 uvcvideo crypto_simd glue_helper cryptd videobuf2_vmalloc intel_cstate intel_rapl_perf videobuf2_memops ecdh_generic pcspkr videobuf2_v4l2 videobuf2_core videodev media psmouse mac_hid hid_generic arc4 nouveau iwldvm mac80211 mxm_wmi iwlwifi ttm cfg80211 drm_kms_helper drm rfkill syscopyarea sysfillrect snd_hda_codec_hdmi sysimgblt snd_hda_codec_idt
[ 2714.312788] snd_hda_codec_generic fb_sys_fops snd_hda_intel i2c_algo_bit snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd soundcore parport_pc thermal tpm_infineon hp_accel ac e1000e lis3lv02d parport wmi mei_me mei input_polldev battery video ptp pps_core button lpc_ich shpchp tpm_tis tpm_tis_core tpm sch_fq_codel vboxnetflt(O) vboxnetadp(O) pci_stub vboxpci(O) vboxdrv(O) sg ip_tables x_tables ext4 crc16 jbd2 fscrypto mbcache sr_mod cdrom sd_mod usbhid hid serio_raw atkbd libps2 ahci libahci libata scsi_mod firewire_ohci xhci_pci xhci_hcd sdhci_pci sdhci ehci_pci led_class ehci_hcd firewire_core mmc_core crc_itu_t usbcore usb_common i8042 serio [last unloaded: main]
[ 2714.318036] CPU: 3 PID: 6244 Comm: insmod Tainted: G D O 4.12.4-1-ARCH #1
[ 2714.319130] Hardware name: Hewlett-Packard HP EliteBook 8560w/1631, BIOS 68SVD Ver. F.60 03/12/2015
[ 2714.320246] task: ffff917ea5819c80 task.stack: ffff9f90455c8000
[ 2714.321406] RIP: 0010:init+0x65/0x1000 [main3]
[ 2714.322524] RSP: 0018:ffff9f90455cbc90 EFLAGS: 00010286
[ 2714.323684] RAX: 00007fffbd922e30 RBX: 0000000000000000 RCX: ffffffff8ba55a68
[ 2714.324813] RDX: 00007fffbd922e28 RSI: 0000000000000000 RDI: ffffffffc0f08031
[ 2714.325937] RBP: ffff9f90455cbc90 R08: 000000000000044b R09: ffffffff8bca0940
[ 2714.327040] R10: fffff2d0c2ecac40 R11: 0000000000000000 R12: ffffffffc0498000
[ 2714.328142] R13: ffff917e6ee4c480 R14: ffffffffc0f09050 R15: ffff917f06e24660
[ 2714.329279] FS: 00007f8999054b80(0000) GS:ffff917f3dcc0000(0000) knlGS:0000000000000000
[ 2714.330389] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2714.331516] CR2: 00007fffbd922e28 CR3: 000000010150f000 CR4: 00000000000406e0
[ 2714.332652] Call Trace:
[ 2714.333810] do_one_initcall+0x50/0x190
[ 2714.334945] ? do_init_module+0x27/0x1e6
[ 2714.336065] do_init_module+0x5f/0x1e6
[ 2714.337197] load_module+0x2610/0x2ab0
[ 2714.338349] ? vfs_read+0x115/0x130
[ 2714.339469] SYSC_finit_module+0xf6/0x110
[ 2714.340574] ? SYSC_finit_module+0xf6/0x110
[ 2714.341675] SyS_finit_module+0xe/0x10
[ 2714.342782] entry_SYSCALL_64_fastpath+0x1a/0xa5
[ 2714.343899] RIP: 0033:0x7f8998765bb9
[ 2714.344988] RSP: 002b:00007ffc068a0528 EFLAGS: 00000206 ORIG_RAX: 0000000000000139
[ 2714.346111] RAX: ffffffffffffffda RBX: 00007f8998a26aa0 RCX: 00007f8998765bb9
[ 2714.347203] RDX: 0000000000000000 RSI: 000000000041aada RDI: 0000000000000003
[ 2714.348279] RBP: 00007f8998a26af8 R08: 0000000000000000 R09: 00007f8998a28e40
[ 2714.349368] R10: 0000000000000003 R11: 0000000000000206 R12: 0000000000001020
[ 2714.350441] R13: 0000000000001018 R14: 00007f8998a26af8 R15: 0000000000000001
[ 2714.351524] Code: 48 89 15 07 13 a7 00 e8 8d 11 cf ca 48 8b 05 fb 12 a7 00 8b 35 ed 12 a7 00 48 c7 c7 31 80 f0 c0 48 8d 50 f8 48 89 15 eb 12 a7 00 <48> 8b 50 f8 e8 65 11 cf ca 48 c7 c7 45 80 f0 c0 e8 59 11 cf ca
[ 2714.353786] RIP: init+0x65/0x1000 [main3] RSP: ffff9f90455cbc90
[ 2714.354914] CR2: 00007fffbd922e28
[ 2714.356130] ---[ end trace a150fd8aba7bd1e3 ]---
Why it doesn't work? Are stacks have any special memory protection or something wrong am I doing?

You cannot directly access userspace memory from the kernel space. In order to do so, you need to use the kernel provided API function copy_from_user().
Modifying your code to make use of this API should work:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h> // Header file corresponding to copy_from_user()
struct task_struct *ed_task;
long int *valp; // Pointer to destination kernel memory
long int val; // Destination kernel memory
void *stack_p;
int i = 0;
static int __init init(void)
{
ed_task = pid_task(find_vpid(2298), PIDTYPE_PID);
stack_p = (void *) ed_task->mm->start_stack;
printk("stack %d: %p\n", i, stack_p);
valp = stack_p - sizeof(void *);
copy_from_user(&val, valp, sizeof(val));
printk("stack %d value: %ld", i, val);
printk(">");
return 0;
}
static void __exit modex(void)
{
}
module_init(init);
module_exit(modex);
MODULE_LICENSE("GPL");

Related

How to run "invd" instruction with disabled SMP support?

I'm trying to execute "invd" instruction from a kernel module. I have asked a similar question How to execute “invd” instruction? previously and from #Peter Cordes's answer, I understand I can't safely run this instruction on SMP system after system boot. So, shouldn't I be able to run this instruction after boot without SMP support? Because there is no other core running, therefore there is no change for memory inconsistency? I have the following kernel module compiled with -o0 flag,
static int __init deviceDriver_init(void){
unsigned long flags;
int LEN=10;
int STEP=1;
int VALUE=1;
int arr[LEN];
int i;
unsigned long dummy;
printk(KERN_INFO "invd Driver loaded\n");
//wbinvd();
//asm volatile("cpuid\n":::);
local_irq_disable();
__asm__ __volatile__(
"wbinvd\n"
"loop:"
"movq %%rdx, (%%rbx);"
"leaq (%%rbx, %%rcx, 8), %%rbx;"
"cmpq %%rbx, %%rax;"
"jg loop;"
"invd\n"
: "=b"(dummy) // output
: "b" (arr),
"a" (arr+LEN),
"c" (STEP),
"d" (VALUE)
: "cc", "memory"
);
local_irq_enable();
//asm volatile("invd\n":::);
printk(KERN_INFO "invd execute\n");
return 0;
}
I'm still getting the following error upon inserting the module I'm getting Segmentation fault (core dumped) in the terminal and the dmesg shows,
[ 2590.518614] invd Driver loaded
[ 2590.518840] general protection fault: 0000 [#5] SMP PTI
I have boot my kernel with nosmp but I do not understand why dmesg still shows SMP PTI
$cat /proc/cmdline
BOOT_IMAGE=/boot/vmlinuz-4.15.0-136-generic root=UUID=dbe747ff-a6a5-45cb-8553-c6db6d445d3d ro quiet splash nosmp vt.handoff=7
Update post:
As I mentioned in the comment section, After disabling, SGX from BIOS, I was able to run this invd without any error. However, when I try to run the same code on a different machine with the same kernel version, I still get the same error message. It is strange and I can't explain why this is happening. As in the comment section, #prl mentions that the error may be coming from the instruction following invd. I begin to think that maybe that is true. Because second from the last line in the dmesg is higlighted in RED [ 153.527386] RIP: loop+0xc/0xf22 [noSmp8] RSP: ffffb8d9450a7be0. So, seems like the error is coming from inside the loop. I have updated the __init function code according to the suggestion. I'm not good at assembly code, can anyone please tell me if the inline assembly code is correct or not? If this inline assembly code is not correct how to fix the code? My whole dmesg trace is,
[ 153.514293] invd Driver loaded
[ 153.514547] general protection fault: 0000 [#1] SMP PTI
[ 153.514656] Modules linked in: noSmp8(OE+) xt_CHECKSUM iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c ipt_REJECT nf_reject_ipv4 xt_tcpudp bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ip_tables x_tables ccm arc4 intel_rapl rt2800usb rt2x00usb x86_pkg_temp_thermal intel_powerclamp rt2800lib coretemp rt2x00lib mac80211 cfg80211 kvm_intel kvm irqbypass snd_hda_codec_realtek crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd intel_cstate intel_rapl_perf dell_smm_hwmon dell_wmi dell_smbios dcdbas intel_wmi_thunderbolt snd_hda_codec_generic dell_wmi_descriptor wmi_bmof snd_seq_midi snd_seq_midi_event
[ 153.515454] serio_raw snd_hda_intel snd_hda_codec snd_hda_core sparse_keymap snd_hwdep snd_rawmidi joydev input_leds snd_seq snd_pcm snd_seq_device snd_timer snd soundcore mei_me mei shpchp intel_pch_thermal mac_hid acpi_pad parport_pc ppdev lp parport autofs4 hid_generic usbhid hid nouveau mxm_wmi ttm drm_kms_helper psmouse syscopyarea sysfillrect sysimgblt igb e1000e dca i2c_algo_bit ptp pps_core ahci libahci fb_sys_fops drm wmi video
[ 153.516038] CPU: 0 PID: 4024 Comm: insmod Tainted: G OE 4.15.0-136-generic #140~16.04.1-Ubuntu
[ 153.516331] Hardware name: Dell Inc. BIOS 1.3.2 01/25/2016
[ 153.516626] RIP: 0010:loop+0xc/0xf22 [noSmp8]
[ 153.516917] RSP: 0018:ffffb8d9450a7be0 EFLAGS: 00010046
[ 153.517213] RAX: ffffb8d9450a7c08 RBX: ffffb8d9450a7c08 RCX: 0000000000000001
[ 153.517513] RDX: 0000000000000001 RSI: ffffb8d9450a7be0 RDI: ffff8edaadc16490
[ 153.517814] RBP: ffffb8d9450a7c60 R08: 0000000000012c40 R09: ffffffffb39624c4
[ 153.518119] R10: ffffb8d9450a7c78 R11: 000000000000038c R12: ffffb8d9450a7c10
[ 153.518427] R13: 0000000000000000 R14: 0000000000000001 R15: ffff8eda4c6bd660
[ 153.518730] FS: 00007fd7f09cf700(0000) GS:ffff8edaadc00000(0000) knlGS:0000000000000000
[ 153.519036] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 153.519346] CR2: 00005634f95fde50 CR3: 000000040dd2c001 CR4: 00000000003606f0
[ 153.519656] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 153.519980] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 153.520289] Call Trace:
[ 153.520597] ? 0xffffffffc050d000
[ 153.520899] do_one_initcall+0x55/0x1ac
[ 153.521201] ? do_one_initcall+0x55/0x1ac
[ 153.521504] ? do_init_module+0x27/0x223
[ 153.521808] ? _cond_resched+0x32/0x50
[ 153.522107] ? kmem_cache_alloc_trace+0x165/0x1c0
[ 153.522408] do_init_module+0x5f/0x223
[ 153.522710] load_module+0x188c/0x1ea0
[ 153.523016] ? ima_post_read_file+0x83/0xa0
[ 153.523320] SYSC_finit_module+0xe5/0x120
[ 153.523623] ? SYSC_finit_module+0xe5/0x120
[ 153.523927] SyS_finit_module+0xe/0x10
[ 153.524231] do_syscall_64+0x73/0x130
[ 153.524534] entry_SYSCALL_64_after_hwframe+0x41/0xa6
[ 153.524838] RIP: 0033:0x7fd7f04fd599
[ 153.525144] RSP: 002b:00007ffda61c2968 EFLAGS: 00000202 ORIG_RAX: 0000000000000139
[ 153.525455] RAX: ffffffffffffffda RBX: 00005643631d7210 RCX: 00007fd7f04fd599
[ 153.525768] RDX: 0000000000000000 RSI: 0000564361c3226b RDI: 0000000000000003
[ 153.526084] RBP: 0000564361c3226b R08: 0000000000000000 R09: 00007fd7f07c2ea0
[ 153.526403] R10: 0000000000000003 R11: 0000000000000202 R12: 0000000000000000
[ 153.526722] R13: 00005643631d7ca0 R14: 0000000000000000 R15: 0000000000000000
[ 153.527040] Code: 00 48 8b 75 c8 48 8b 45 c8 8b 55 b8 48 63 d2 48 c1 e2 02 48 01 d0 8b 4d b4 8b 55 bc 48 89 f3 48 89 13 48 8d 1c cb 48 39 d8 7f f4 <0f> 08 48 89 d8 48 89 45 d0 e8 40 ef 73 00 48 c7 c7 c7 d0 c4 c0
[ 153.527386] RIP: loop+0xc/0xf22 [noSmp8] RSP: ffffb8d9450a7be0
[ 153.530228] ---[ end trace cc9ea64985c9fe34 ]---
So, it not possible to run invd even without SMP?
There's 2 questions here:
a) How to execute INVD (unsafely)
For this, you need to be running at CPL=0, and you have to make sure the CPU isn't using any "processor reserved memory protections" which are part of Intel's Software Guard Extensions (an extension to allow programs to have a shielded/private/encrypted space that the OS can't tamper with, often used for digital rights management schemes but possibly usable for enhancing security/confidentiality of other things).
Note that SGX is supported in recent versions of Linux, but I'm not sure when support was introduced or how old your kernel is, or if it's enabled/disabled.
If either of these isn't true (e.g. you're at CPL=3 or there are "processor reserved memory protections) you will get a general protection fault exception.
b) How to execute INVD Safely
For this, you have to make sure that the caches (which includes "external caches" - e.g. possibly including things like eDRAM and caches built into non-volatile RAM) don't contain any modified data that will cause problems if lost. This includes data from:
IRQs. These can be disabled.
NMI and machine check exceptions. For a running OS it's mostly impossible to stop/disable these and if you can disable them then it's like crossing your fingers while ignoring critical hardware failures (an extremely bad idea).
the firmware's System Management Mode. This is a special CPU mode the firmware uses for various things (e.g. ECC scrubbing, some power management, emulation of legacy devices) that't beyond the control of the OS/kernel. It can't be disabled.
writes done by the CPU itself. This includes updating the accessed/dirty flags in page tables (which can not be disabled), plus any performance monitoring or debugging features that store data in memory (which can be "not enabled").
With these restrictions (and not forgetting the performance problems) there are only 2 cases where INVD might be sane - early firmware code that needs to determine RAM chip sizes and configure memory controllers (where it's very likely to be useful/sane), and the instant before the computer is turned off (where it's likely to be pointless).
Guesswork
I'm guessing (based on my inability to think of any other plausible reason) that you want to construct temporary shielded/private area of memory (to enhance security - e.g. so that the data you put in that area won't/can't leak into RAM). In this case (ironically) it's possible that the tool designed specifically for this job (SGX) is preventing you from doing it badly.

linux kernel panic unable to handle kernel NULL pointer dereference at

I'm facing issues with some kernel panic but I don't have any idea how to find which soft is exacly causing this issue. I'm trying to compile some soft on remote host using distcc software but my machines which are compiling are going down because of this issue.
Could you point me where shoud I start looking? What could cause this issue? Which tools should I use?
Here is kernel panic output:
[591792.656853] IP: [< (null)>] (null)
[591792.658710] PGD 800000032ca05067 PUD 327bc6067 PMD 0
[591792.660439] Oops: 0010 [#1] SMP
[591792.661562] Modules linked in: fuse nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache nls_utf8 isofs sunrpc dm_mirror dm_region_hash dm_log dm_mod sb_edac iosf_mbi kvm_intel ppdev kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd cirrus ttm joydev drm_kms_helper sg virtio_balloon syscopyarea sysfillrect sysimgblt fb_sys_fops drm parport_pc parport drm_panel_orientation_quirks pcspkr i2c_piix4 ip_tables xfs libcrc32c sr_mod cdrom virtio_blk virtio_net ata_generic pata_acpi crct10dif_pclmul crct10dif_common crc32c_intel serio_raw floppy ata_piix libata virtio_pci virtio_ring virtio
[591792.682098] CPU: 2 PID: 25548 Comm: cc1plus Not tainted 3.10.0-957.el7.x86_64 #1
[591792.684495] Hardware name: Red Hat OpenStack Compute, BIOS 1.11.0-2.el7 04/01/2014
[591792.686923] task: ffff8ebb65ea1040 ti: ffff8ebb6b250000 task.ti: ffff8ebb6b250000
[591792.689315] RIP: 0010:[<0000000000000000>] [< (null)>] (null)
[591792.691729] RSP: 0018:ffff8ebb6b253da0 EFLAGS: 00010246
[591792.693438] RAX: 0000000000000000 RBX: ffff8ebb6b253e40 RCX: ffff8ebb6b253fd8
[591792.695716] RDX: ffff8ebb38098840 RSI: ffff8ebb6b253e40 RDI: ffff8ebb38098840
[591792.697992] RBP: ffff8ebb6b253e30 R08: 0000000000000100 R09: 0000000000000001
[591792.700271] R10: ffff8ebb7fd1f080 R11: ffffd7da0beb9380 R12: ffff8eb8417af000
[591792.702547] R13: ffff8eb875d1b000 R14: ffff8ebb6b253f24 R15: 0000000000000000
[591792.704821] FS: 0000000000000000(0000) GS:ffff8ebb7fd00000(0063) knlGS:00000000f7524740
[591792.707397] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
[591792.709242] CR2: 0000000000000000 CR3: 000000032eb0a000 CR4: 00000000003607e0
[591792.711519] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[591792.713814] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[591792.716100] Call Trace:
[591792.716927] [<ffffffff9165270b>] ? path_openat+0x3eb/0x640
[591792.718727] [<ffffffff91653dfd>] do_filp_open+0x4d/0xb0
[591792.720451] [<ffffffff91661504>] ? __alloc_fd+0xc4/0x170
[591792.722267] [<ffffffff9163ff27>] do_sys_open+0x137/0x240
[591792.724017] [<ffffffff916a1fab>] compat_SyS_open+0x1b/0x20
[591792.725820] [<ffffffff91b78bb0>] sysenter_dispatch+0xd/0x2b
[591792.727648] Code: Bad RIP value.
[591792.728795] RIP [< (null)>] (null)
[591792.730486] RSP <ffff8ebb6b253da0>
[591792.731625] CR2: 0000000000000000
[591792.734935] ---[ end trace ccfdca9d4733e7a5 ]---
[591792.736450] Kernel panic - not syncing: Fatal exception
[591792.738708] Kernel Offset: 0x10400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
It is quite difficult to tell what went wrong with this just piece of log you have pasted.
It seems like oops lead into kernel panic!.
Well,with helping you to find the real cause,I can help you with material to look into for further dissection of panic/crash.
link 1: analysing kernel panics
link 2 : oops
Hope it helps you! :)

Virtual address to physical address and reverse in android linux kernel

I'm trying to transform virtual address to physical address and map this physical address to virtual address with android linux kernel environment.
I can modify kernel code. So I tried next flow.
malloc() in android user space native binary not app
Transform va from malloc() to pa using the guide
Is there any API for determining the physical address from virtual address in Linux?
Pass pa to a system call function I made.
Re-map received pa to va in linux kernel space using ioremap()
Read value using readl() or ioread32()
But it's not working now.
The va to pa logic is in above link; in my native binary, below is the pseudo-code.
int main(){
char *va=malloc(100);
memset(va, "ttttt", ...)
uintptr_t paddr;
vir_to_phys_user(&paddr, getpid(), va);
syscall(sys_readpa, (unsigned long)paddr);
}
system call function
void sys_readpa(unsigned long pa){
void __iomem* mapped_add = ioremap(pa);
printk("%c", readl(mapped_add));
printk("%c", ioread32(mapped_add));
}
My code has similar logic:
I define va in user space and calculate pa from va.
I set va to "ttttt".
Pass pa to linux kernel space using syscall.
Remap this pa to va in kernel space.
Read va in kernel space and expect the value to be "ttttt"
I don't know the va to pa logic is correct. But it returns an address not failure.
But when syscall is called, kernel panic occur - e.g. "dereference for 0000000 address", and other kinds of errors. I checked pa in syscall is same with the one in user space.
My purpose of this try is study. I just wonder this implementation is possible if I can modify kernel code too but I met with an obstacle.
Please let me know what is problem or it's impossible? If needed, I'll update more detail code or specific error message.
I add detail errors and my debug log.
My user space log
: vitrual address : 0xf079c000
: 0xf079c000 -> 0xa4a8a000
I pass 0xa4a8a000 to syscall.
dmesg
[ 96.794448] accepted pa : 00000000a4a8a000
[ 96.794473] ------------[ cut here ]------------
[ 96.794500] WARNING: CPU: 6 PID: 11644 at arch/arm64/mm/ioremap.c:58 __ioremap_caller+0xc0/0xcc
[ 96.794519] Modules linked in:
[ 96.794552] CPU: 6 PID: 11644 Comm: mt Not tainted 4.14.113 #1
[ 96.794590] Call trace:
[ 96.794611] [<0000000000000000>] dump_backtrace+0x0/0x2b8
[ 96.794632] [<0000000000000000>] show_stack+0x18/0x24
[ 96.794655] [<0000000000000000>] dump_stack+0xa0/0xdc
[ 96.794676] [<0000000000000000>] __warn+0xbc/0x164
[ 96.794695] [<0000000000000000>] report_bug+0xac/0xdc
[ 96.794713] [<0000000000000000>] bug_handler+0x30/0x8c
[ 96.794732] [<0000000000000000>] brk_handler+0x94/0x150
[ 96.794751] [<0000000000000000>] do_debug_exception+0xd4/0x170
[ 96.794769] Exception stack(0xffffff8010fdbc10 to 0xffffff8010fdbd50)
[ 96.794787] bc00: 0000000000000000 0000000000000004
[ 96.794805] bc20: 00e8000000000f07 ffffff8008358714 000000000000000c 0000000000002d7c
[ 96.794822] bc40: ffffffc0119630e7 5b20205d38343434 0000000000000000 0000000000000001
[ 96.794839] bc60: 0000000000000001 00000000bab00000 0000000000000000 0000000080000000
[ 96.794856] bc80: ffffff800b18d000 0000000000000082 00000000000564c8 0000000000000074
[ 96.794873] bca0: 0000000000000074 00e8000000000f07 00000000a4a8a000 0000000000001000
[ 96.794890] bcc0: ffffff8008358714 0000000000000000 0000000000000011 000000000000018f
[ 96.794908] bce0: 000000000000018e ffffff8009316000 ffffffc8767edf80 ffffff8010fdbe80
[ 96.794926] bd00: ffffff80081fe124 ffffff8010fdbe50 ffffff80081fe188 0000000020400145
[ 96.794943] bd20: 0000000000000034 7cebe7b2cf849500 0000007fffffffff ffffff8009316000
[ 96.794961] bd40: ffffff8010fdbe80 ffffff80081fe188
[ 96.794978] [<0000000000000000>] el1_dbg+0x18/0x74
[ 96.794995] [<0000000000000000>] __ioremap_caller+0xc0/0xcc
[ 96.795014] [<0000000000000000>] __ioremap+0x10/0x1c
[ 96.795035] [<0000000000000000>] sys_readpa+0x78/0xfc
[ 96.795055] Exception stack(0xffffff8010fdbec0 to 0xffffff8010fdc000)
[ 96.795072] bec0: 00000000a4a8a000 0000000028bf4d08 0000000000000003 00000000f079c000
[ 96.795090] bee0: 0000000000000000 00000000a4a8a000 0000000000000000 000000000000018e
[ 96.795107] bf00: 00000000f09afd94 00000000f09d2b99 00000000ae6c9e84 00000000ae6a261e
[ 96.795124] bf20: 00000000ff921bf0 00000000ff921be0 00000000ae5f7b27 0000000000000000
[ 96.795142] bf40: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 96.795159] bf60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 96.795176] bf80: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 96.795195] bfa0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 96.795212] bfc0: 00000000f091ce20 0000000060000010 00000000a4a8a000 000000000000018e
[ 96.795229] bfe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
[ 96.795247] [<0000000000000000>] __sys_trace_return+0x0/0x4
[ 96.795265] ---[ end trace 91e76f3be7c0b9bd ]---
[ 96.795418] ioremap return null
I found fix.
ioremap have a check logic for validation of address.
This function is for reserved address but it tring to map address that's already mapped to a process.
So, I modify the check logic in ioreamp and it works well.

Syscall hijacking x64- unable to handle kernel paging request at ffffffff91000018

I write a kernel module which replaces syscall and have a problem. Module can't be loaded because is some problem in memory. I tried fix it for 3 hours, but it still not work. This code is working, when I choose memory closer sys_call_table (eg. linux_banner address from /proc/kallsyms), but it isn't always works.
Problem is usually, when function which search syscall table points to address which end is 18 (eg ffffffff91000018, ffffffff81000018).
Why it does not work?
Code:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/list.h>
#include <linux/unistd.h>
#include <linux/kobject.h>
#include <linux/init.h>
/* start of 64-bit kernel space is 0xffffffff80000000 */
#define END_MEM 0xffffffffffffffff /* end of 64-bit kernel */
#define START_MEM 0xffffffff81000000
unsigned long long **syscall_tab;
asmlinkage long (*orig_mkdir)(const char __user *pathname, umode_t mode);
asmlinkage long my_mkdir(const char __user *pathname, umode_t mode)
{
long ret;
ret = orig_mkdir(pathname, mode);
printk("Creating dir: %s", pathname);
return ret;
}
static void hide(void)
{
list_del(&THIS_MODULE->list);
kobject_del(&THIS_MODULE->mkobj.kobj);
}
static unsigned long long **find(void) {
unsigned long long **sctable;
unsigned long long i = START_MEM;
while (i < END_MEM) {
sctable = (unsigned long long **) i;
if ( sctable[__NR_close] == (unsigned long long *) sys_close) {
printk("syscall_tab %lx", syscall_tab);
return &sctable[0];
}
i += sizeof(void *);
}
return NULL;
}
static int __init init(void)
{
write_cr0(read_cr0() & (~0x10000));
if(!(syscall_tab = find())) {
return 0;
}
orig_mkdir = (void *) syscall_tab[__NR_mkdir];
printk("write_cr0");
syscall_tab[__NR_mkdir] = (unsigned long long*) my_mkdir;
printk("po podmiance");
write_cr0(read_cr0() | (~0x10000));
return 0;
}
static void __exit exitt(void)
{
write_cr0(read_cr0() & (~0x10000));
syscall_tab[__NR_mkdir] = (unsigned long long*) orig_mkdir;
write_cr0(read_cr0() | (~0x10000));
}
module_init(init);
module_exit(exitt);
MODULE_LICENSE("GPL");
Error:
[ 299.273838] BUG: unable to handle kernel paging request at ffffffff91000018
[ 299.273856] IP: init+0x23/0x1000 [hijack1]
[ 299.273860] PGD b6a0c067
[ 299.273861] P4D b6a0c067
[ 299.273863] PUD b6a0d063
[ 299.273866] PMD 0
[ 299.273872] Oops: 0000 [#1] PREEMPT SMP
[ 299.273877] Modules linked in: hijack1(O+) fuse rfcomm bnep nls_iso8859_1 nls_cp437 vfat fat intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc aesni_intel joydev ppdev hp_wmi mousedev iTCO_wdt aes_x86_64 sparse_keymap iTCO_vendor_support mei_wdt crypto_simd psmouse glue_helper pcspkr evdev input_leds cryptd mac_hid intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core btusb btrtl btbcm btintel bluetooth cdc_ether ecdh_generic usbnet videodev uas media mii hid_generic nouveau mxm_wmi ttm arc4 drm_kms_helper iwldvm drm syscopyarea sysfillrect mac80211 sysimgblt iwlwifi fb_sys_fops parport_pc parport snd_hda_codec_hdmi i2c_algo_bit snd_hda_codec_idt cfg80211
[ 299.273953] rfkill snd_hda_codec_generic hp_accel thermal lis3lv02d wmi input_polldev tpm_infineon video ac battery button snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm shpchp snd_timer e1000e snd ptp soundcore tpm_tis mei_me mei pps_core lpc_ich tpm_tis_core tpm sch_fq_codel vboxnetflt(O) vboxnetadp(O) pci_stub vboxpci(O) vboxdrv(O) sg ip_tables x_tables ext4 crc16 jbd2 fscrypto mbcache sr_mod sd_mod cdrom usb_storage usbhid hid serio_raw atkbd libps2 ahci libahci libata scsi_mod xhci_pci xhci_hcd ehci_pci sdhci_pci ehci_hcd sdhci firewire_ohci led_class firewire_core mmc_core crc_itu_t usbcore usb_common i8042 serio
[ 299.274005] CPU: 2 PID: 3384 Comm: insmod Tainted: G O 4.12.4-1-ARCH #1
[ 299.274009] Hardware name: Hewlett-Packard HP EliteBook 8560w/1631, BIOS 68SVD Ver. F.60 03/12/2015
[ 299.274014] task: ffff90127cc0c740 task.stack: ffffb72907298000
[ 299.274019] RIP: 0010:init+0x23/0x1000 [hijack1]
[ 299.274023] RSP: 0018:ffffb7290729bc88 EFLAGS: 00010206
[ 299.274027] RAX: 0000000080040033 RBX: ffffffff91000000 RCX: 0000000000000000
[ 299.274031] RDX: 00000000004bec82 RSI: 00000000004bec82 RDI: 0000000080040033
[ 299.274036] RBP: ffffb7290729bc90 R08: ffff901339003980 R09: ffffffffa018970a
[ 299.274040] R10: ffffe481c211ebc0 R11: 0000000000000000 R12: ffffffffc0030000
[ 299.274044] R13: ffff9012377965e0 R14: ffffffffc0a81050 R15: ffff90132e0eca80
[ 299.274049] FS: 00007f9a842a4b80(0000) GS:ffff90133dc80000(0000) knlGS:0000000000000000
[ 299.274053] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080040033
[ 299.274057] CR2: ffffffff91000018 CR3: 000000007cdb9000 CR4: 00000000000406e0
[ 299.274061] Call Trace:
[ 299.274068] do_one_initcall+0x50/0x190
[ 299.274073] ? do_init_module+0x27/0x1e6
[ 299.274077] do_init_module+0x5f/0x1e6
[ 299.274082] load_module+0x2610/0x2ab0
[ 299.274087] ? vfs_read+0x115/0x130
[ 299.274091] SYSC_finit_module+0xf6/0x110
[ 299.274095] ? SYSC_finit_module+0xf6/0x110
[ 299.274100] SyS_finit_module+0xe/0x10
[ 299.274105] entry_SYSCALL_64_fastpath+0x1a/0xa5
[ 299.274109] RIP: 0033:0x7f9a839b3bb9
[ 299.274111] RSP: 002b:00007ffd2386ee28 EFLAGS: 00000206 ORIG_RAX: 0000000000000139
[ 299.274120] RAX: ffffffffffffffda RBX: 00007f9a83c74aa0 RCX: 00007f9a839b3bb9
[ 299.274124] RDX: 0000000000000000 RSI: 000000000041aada RDI: 0000000000000003
[ 299.274128] RBP: 00007f9a83c74af8 R08: 0000000000000000 R09: 00007f9a83c76e40
[ 299.274132] R10: 0000000000000003 R11: 0000000000000206 R12: 0000000000001020
[ 299.274136] R13: 0000000000001018 R14: 00007f9a83c74af8 R15: 0000000000000001
[ 299.274141] Code: <48> 81 7b 18 40 a8 21 a0 75 2d 48 8b 35 14 13 a5 00 48 c7 c7 35 00
[ 299.276347] RIP: init+0x23/0x1000 [hijack1] RSP: ffffb7290729bc88
[ 299.277333] CR2: ffffffff91000018
[ 299.283408] ---[ end trace 63ac9e1e3a0e12c3 ]---
Syscall hijacking x64- unable to handle kernel paging request at ffffffff91000018...
I write kernel module which replace syscall and have a problem. Module can't be loaded because is some problem in memory. I tried fix it for 3 hours, but it still not work...
The problem is, hijacking syscalls is not technically feasible. You can't do it with Linux. Linux does not have a layered design that supports this sort of thing (as opposed to Windows or other operating systems).
About the best you will be able to do is interpositioning, which redirects calls made through the PLT into your shared object. I believe this is the way Valgrind works when it replaces malloc and free.
Also note that some system calls are not routed through the PLT. See the discussion of Double-underscore names for public API functions on the glibc wiki.
Also see Query regarding kernel modules intercepting system call on the Kernel Newbies mailing list and Multiple kernel modules intercepting same system call and crash during unload on Stack Overflow. The first one is the question where the kernel developers tell OP is not possible. I'm just reiterating what the kernel dev's have already stated.

Call trace when loading a module in Linux

I'm writing my first Linux kernel module, which actually is a RAM disk driver plus some additional features. When I tried to insmod the module, "Segmentation fault" happened.
And here is the corresponding kernel log, actually two pieces of kernel oops messages. After reading a lot of related tutorials, I still have some questions regarding this log:
In the call trace list, there are functions preceeded with and without question marks, what is the special meaning of the question mark "?" for that function?
My understanding of the call trace is: every function, except the bottom one, should be called by the one below it. But for this:
[ 397.855035] [<c05a603b>] ? exact_lock+0x0/0x16
[ 397.855035] [<f787c252>] ? diag_init+0x252/0x4bd [b2bntb_diag]
[ 397.855035] [<c0451e35>] ? __blocking_notifier_call_chain+0x42/0x4d
[ 397.855035] [<f787c000>] ? diag_init+0x0/0x4bd [b2bntb_diag]
diag_init the module init function written by me. It does not call any function named either exact_lock or __blocking_notifier_call_chain, how come these two functions appear such in the call trace here?
What is the error and how to resolve it?
BTW, the Linux kernel I'm running has version 2.6.35.6.
[ 397.850955] ------------[ cut here ]------------
[ 397.851544] WARNING: at lib/kobject.c:168 kobject_add_internal+0x3a/0x1e2()
[ 397.851601] Hardware name: VirtualBox
[ 397.851639] kobject: (f4580258): attempted to be registered with empty name!
[ 397.851678] Modules linked in: b2bntb_diag(+) fuse vboxvideo drm sunrpc ip6t_REJECT nf_conntrack_ipv6 ip6table_filter ip6_tables ipv6 vboxsf uinput snd_intel8x0 snd_ac97_codec vboxguest ac97_bus snd_seq snd_seq_device ppdev snd_pcm parport_pc parport microcode snd_timer joydev snd e1000 i2c_piix4 soundcore i2c_core snd_page_alloc [last unloaded: mperf]
[ 397.852707] Pid: 1958, comm: insmod Tainted: G W 2.6.35.6-45.fc14.i686 #1
[ 397.852749] Call Trace:
[ 397.852828] [<c043938d>] warn_slowpath_common+0x6a/0x7f
[ 397.852970] [<c05b054d>] ? kobject_add_internal+0x3a/0x1e2
[ 397.853130] [<c0439415>] warn_slowpath_fmt+0x2b/0x2f
[ 397.853182] [<c05b054d>] kobject_add_internal+0x3a/0x1e2
[ 397.853235] [<c05b098b>] kobject_add+0x5b/0x66
[ 397.853292] [<c064e8e3>] device_add+0xda/0x4b6
[ 397.853346] [<c05b7bc7>] ? kvasprintf+0x38/0x43
[ 397.853394] [<c05b08e0>] ? kobject_set_name_vargs+0x46/0x4c
[ 397.853467] [<c051b9bc>] register_disk+0x31/0x109
[ 397.853528] [<c05a6234>] ? blk_register_region+0x20/0x25
[ 397.853579] [<c05a6b08>] add_disk+0x9f/0xf0
[ 397.853627] [<c05a5bff>] ? exact_match+0x0/0xd
[ 397.853678] [<c05a603b>] ? exact_lock+0x0/0x16
[ 397.853731] [<f787c252>] diag_init+0x252/0x4bd [b2bntb_diag]
[ 397.853785] [<c0451e35>] ? __blocking_notifier_call_chain+0x42/0x4d
[ 397.853836] [<f787c000>] ? diag_init+0x0/0x4bd [b2bntb_diag]
[ 397.853889] [<c0401246>] do_one_initcall+0x4f/0x139
[ 397.853967] [<c0451e51>] ? blocking_notifier_call_chain+0x11/0x13
[ 397.854086] [<c04621a4>] sys_init_module+0x7f/0x19b
[ 397.854142] [<c07a7374>] syscall_call+0x7/0xb
[ 397.854177] ---[ end trace 6dc509801197bdc3 ]---
[ 397.855035] ------------[ cut here ]------------
[ 397.855035] kernel BUG at fs/sysfs/group.c:65!
[ 397.855035] invalid opcode: 0000 [#1] SMP
[ 397.855035] last sysfs file: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A03:00/PNP0C0A:00/power_supply/BAT0/energy_full
[ 397.855035] Modules linked in: b2bntb_diag(+) fuse vboxvideo drm sunrpc ip6t_REJECT nf_conntrack_ipv6 ip6table_filter ip6_tables ipv6 vboxsf uinput snd_intel8x0 snd_ac97_codec vboxguest ac97_bus snd_seq snd_seq_device ppdev snd_pcm parport_pc parport microcode snd_timer joydev snd e1000 i2c_piix4 soundcore i2c_core snd_page_alloc [last unloaded: mperf]
[ 397.855035]
[ 397.855035] Pid: 1958, comm: insmod Tainted: G W 2.6.35.6-45.fc14.i686 #1 /VirtualBox
[ 397.855035] EIP: 0060:[<c0520d15>] EFLAGS: 00010246 CPU: 0
[ 397.855035] EIP is at internal_create_group+0x23/0x103
[ 397.855035] EAX: f4580258 EBX: f4580258 ECX: c09d4344 EDX: 00000000
[ 397.855035] ESI: f60521f0 EDI: c09d4344 EBP: f45b7ef0 ESP: f45b7ed0
[ 397.855035] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[ 397.855035] Process insmod (pid: 1958, ti=f45b6000 task=f3a68ca0 task.ti=f45b6000)
[ 397.855035] Stack:
[ 397.855035] 00000000 f45b7ee4 c05b08e0 8eecb04c f4580200 f4580200 f60521f0 f4580200
[ 397.855035] <0> f45b7ef8 c0520e1c f45b7f00 c0498de9 f45b7f18 c05a261a f4580250 f4580200
[ 397.855035] <0> 00000001 00000000 f45b7f38 c05a6b0f c05a5bff c05a603b f4580200 0fc00000
[ 397.855035] Call Trace:
[ 397.855035] [<c05b08e0>] ? kobject_set_name_vargs+0x46/0x4c
[ 397.855035] [<c0520e1c>] ? sysfs_create_group+0x11/0x15
[ 397.855035] [<c0498de9>] ? blk_trace_init_sysfs+0x10/0x12
[ 397.855035] [<c05a261a>] ? blk_register_queue+0x3b/0xac
[ 397.855035] [<c05a6b0f>] ? add_disk+0xa6/0xf0
[ 397.855035] [<c05a5bff>] ? exact_match+0x0/0xd
[ 397.855035] [<c05a603b>] ? exact_lock+0x0/0x16
[ 397.855035] [<f787c252>] ? diag_init+0x252/0x4bd [b2bntb_diag]
[ 397.855035] [<c0451e35>] ? __blocking_notifier_call_chain+0x42/0x4d
[ 397.855035] [<f787c000>] ? diag_init+0x0/0x4bd [b2bntb_diag]
[ 397.855035] [<c0401246>] ? do_one_initcall+0x4f/0x139
[ 397.855035] [<c0451e51>] ? blocking_notifier_call_chain+0x11/0x13
[ 397.855035] [<c04621a4>] ? sys_init_module+0x7f/0x19b
[ 397.855035] [<c07a7374>] ? syscall_call+0x7/0xb
[ 397.855035] Code: 8d 65 f4 5b 5e 5f 5d c3 55 89 e5 57 56 53 83 ec 14 0f 1f 44 00 00 85 c0 89 c3 89 55 e0 89 cf 74 0a 85 d2 75 08 83 78 18 00 75 11 <0f> 0b 83 78 18 00 be ea ff ff ff 0f 84 c5 00 00 00 8b 17 85 d2
[ 397.855035] EIP: [<c0520d15>] internal_create_group+0x23/0x103 SS:ESP 0068:f45b7ed0
[ 397.865682] ---[ end trace 6dc509801197bdc4 ]---
[root#localhost ntb]#
The first oopss message is actually a warning from the kernel. The important part of the warning is right here: "attempted to be registered with empty name!". It means a descriptive name string field in a kobject was not supplied. Specifically, since in the call trace of the warning we see register_disk, I assume you forgot to properly init the name field of a struct you passed during registration. This is the warning part.
The next oopss message is an actual crash - some code in the sysfs file system that tried to create the name of a group from the name you were supposed to give in your registration process hit a kernel runtime assertion, not doubt due to the missing name field.
So this is why it is crashing. About your questions - some of the functions you see in the trace are actually called from inlined functions (and/or macros) that are used in your code. So your code is calling them, although not by name.
About the question mark, the kernel stack tracking mechanism reports if the address to symbol name lookup it does is "reliable" or not. Not 100% sure what that means, but if it doesn't you get the question mark in the symbol name.

Resources