Getting args from kprobe not finding regs->rdi x86_64 - linux

I'm writing a kernel module under Scientific Linux 6.3 x86_64 and I'm looking to use kprobes. In this module, I need access to the first argument of a function on return, so jprobes are out.
I found this very helpful post: Getting function arguments using kprobes
However, when I try accessing regs->rdi inside my probe, the compiler complains with
error: ‘struct pt_regs’ has no member named ‘rdi’
During my module initialization, I run this check with no problems:
#ifndef CONFIG_X86_64
printk(KERN_ALERT "Error: this module only supports x86_64!\n");
return -EINVAL;
#endif
Is there anything else I should be looking at?
uname -r returns 2.6.32-279.14.1.el6.x86_64.debug
Here is a MWE:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/blkdev.h>
static int kprobe_test(struct kprobe *p, struct pt_regs *regs) {
printk(KERN_INFO "rdi: %p\n", regs->rdi);
return 0;
}
static struct kprobe myprobe = {
.pre_handler = NULL,
.post_handler = kprobe_test,
.fault_handler = NULL,
.addr = (kprobe_opcode_t *) generic_make_request,
};
int init_module(void) {
register_kprobe(&myprobe);
return 0;
}
void cleanup_module(void) {
unregister_kprobe(&myprobe);
}
Which results in:
...
/home/user/kmod/kprobe_64_mwe/kprobe_mwe.c:7: error: ‘struct pt_regs’ has no member named ‘rdi’
...

The definition of pt_reg changes when __KERNEL__ is defined. Try using di instead.

Related

How my custom module on linux 3.2.28 can make a call to print_cpu_info?

I'm going to implement my custom module in which information for CPU is printed using print_cpu_info(). In order to call print_cpu_info(), I have included the needed header file, but it doesn't work. Here is my module.
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <asm/alternative.h>
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/mtrr.h>
#include <asm/cacheflush.h>
extern struct cpuinfo_x86 boot_cpu_data;
int cpuinfox86_init(void)
{
print_cpu_info(&boot_cpu_data);
return 0;
}
void cpuinfox86_exit(void)
{
printk("good bye cpu\n");
}
module_init(cpuinfox86_init);
module_exit(cpuinfox86_exit);
MODULE_LICENSE("GPL");
After compiling this module, I get
make -C /lib/modules/3.2.28-2009720166/build SUBDIRS=/home/tracking/1031_oslab modules
make[1]: Entering directory `/usr/src/linux-3.2.28'
CC [M] /home/tracking/1031_oslab/module.o
Building modules, stage 2.
MODPOST 1 modules
WARNING: "print_cpu_info" [/home/tracking/1031_oslab/module.ko] undefined!
CC /home/tracking/1031_oslab/module.mod.o
LD [M] /home/tracking/1031_oslab/module.ko
make[1]: Leaving directory `/usr/src/linux-3.2.28'
any idea?
"print_cpu_info" is not exported symbol, so it can not be used by modules. However, you can use "kallsyms_lookup_name", which is exported, to get the address of "print_cpu_info" and execute the function call using a function pointer.
static void* find_sym( const char *sym ) { // find address kernel symbol sym
static unsigned long faddr = 0; // static !!!
// ----------- nested functions are a GCC extension ---------
int symb_fn( void* data, const char* sym, struct module* mod, unsigned long addr ) {
if( 0 == strcmp( (char*)data, sym ) ) {
faddr = addr;
return 1;
}
else return 0;
};
// --------------------------------------------------------
kallsyms_on_each_symbol( symb_fn, (void*)sym );
return (void*)faddr;
}
static void (*cpuInfo)(struct cpuinfo_x86 *);
How to use it:
unsigned int cpu = 0;
struct cpuinfo_x86 *c;
cpuInfo = find_sym("print_cpu_info");
for_each_online_cpu(cpu)
{
c = &cpu_data(cpu);
cpuInfo(c);
}

Intercepting a system call [duplicate]

I'm trying to write some simple test code as a demonstration of hooking the system call table.
"sys_call_table" is no longer exported in 2.6, so I'm just grabbing the address from the System.map file, and I can see it is correct (Looking through the memory at the address I found, I can see the pointers to the system calls).
However, when I try to modify this table, the kernel gives an "Oops" with "unable to handle kernel paging request at virtual address c061e4f4" and the machine reboots.
This is CentOS 5.4 running 2.6.18-164.10.1.el5. Is there some sort of protection or do I just have a bug? I know it comes with SELinux, and I've tried putting it in to permissive mode, but it doesn't make a difference
Here's my code:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/unistd.h>
void **sys_call_table;
asmlinkage int (*original_call) (const char*, int, int);
asmlinkage int our_sys_open(const char* file, int flags, int mode)
{
printk("A file was opened\n");
return original_call(file, flags, mode);
}
int init_module()
{
// sys_call_table address in System.map
sys_call_table = (void*)0xc061e4e0;
original_call = sys_call_table[__NR_open];
// Hook: Crashes here
sys_call_table[__NR_open] = our_sys_open;
}
void cleanup_module()
{
// Restore the original call
sys_call_table[__NR_open] = original_call;
}
I finally found the answer myself.
http://www.linuxforums.org/forum/linux-kernel/133982-cannot-modify-sys_call_table.html
The kernel was changed at some point so that the system call table is read only.
cypherpunk:
Even if it is late but the Solution
may interest others too: In the
entry.S file you will find: Code:
.section .rodata,"a"
#include "syscall_table_32.S"
sys_call_table -> ReadOnly You have to
compile the Kernel new if you want to
"hack" around with sys_call_table...
The link also has an example of changing the memory to be writable.
nasekomoe:
Hi everybody. Thanks for replies. I
solved the problem long ago by
modifying access to memory pages. I
have implemented two functions that do
it for my upper level code:
#include <asm/cacheflush.h>
#ifdef KERN_2_6_24
#include <asm/semaphore.h>
int set_page_rw(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ | VM_WRITE;
return change_page_attr(pg, 1, prot);
}
int set_page_ro(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ;
return change_page_attr(pg, 1, prot);
}
#else
#include <linux/semaphore.h>
int set_page_rw(long unsigned int _addr)
{
return set_memory_rw(_addr, 1);
}
int set_page_ro(long unsigned int _addr)
{
return set_memory_ro(_addr, 1);
}
#endif // KERN_2_6_24
Here's a modified version of the original code that works for me.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/unistd.h>
#include <asm/semaphore.h>
#include <asm/cacheflush.h>
void **sys_call_table;
asmlinkage int (*original_call) (const char*, int, int);
asmlinkage int our_sys_open(const char* file, int flags, int mode)
{
printk("A file was opened\n");
return original_call(file, flags, mode);
}
int set_page_rw(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ | VM_WRITE;
return change_page_attr(pg, 1, prot);
}
int init_module()
{
// sys_call_table address in System.map
sys_call_table = (void*)0xc061e4e0;
original_call = sys_call_table[__NR_open];
set_page_rw(sys_call_table);
sys_call_table[__NR_open] = our_sys_open;
}
void cleanup_module()
{
// Restore the original call
sys_call_table[__NR_open] = original_call;
}
Thanks Stephen, your research here was helpful to me. I had a few problems, though, as I was trying this on a 2.6.32 kernel, and getting WARNING: at arch/x86/mm/pageattr.c:877 change_page_attr_set_clr+0x343/0x530() (Not tainted) followed by a kernel OOPS about not being able to write to the memory address.
The comment above the mentioned line states:
// People should not be passing in unaligned addresses
The following modified code works:
int set_page_rw(long unsigned int _addr)
{
return set_memory_rw(PAGE_ALIGN(_addr) - PAGE_SIZE, 1);
}
int set_page_ro(long unsigned int _addr)
{
return set_memory_ro(PAGE_ALIGN(_addr) - PAGE_SIZE, 1);
}
Note that this still doesn't actually set the page as read/write in some situations. The static_protections() function, which is called inside of set_memory_rw(), removes the _PAGE_RW flag if:
It's in the BIOS area
The address is inside .rodata
CONFIG_DEBUG_RODATA is set and the kernel is set to read-only
I found this out after debugging why I still got "unable to handle kernel paging request" when trying to modify the address of kernel functions. I was eventually able to solve that problem by finding the page table entry for the address myself and manually setting it to writable. Thankfully, the lookup_address() function is exported in version 2.6.26+. Here is the code I wrote to do that:
void set_addr_rw(unsigned long addr) {
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
if (pte->pte &~ _PAGE_RW) pte->pte |= _PAGE_RW;
}
void set_addr_ro(unsigned long addr) {
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
pte->pte = pte->pte &~_PAGE_RW;
}
Finally, while Mark's answer is technically correct, it'll case problem when ran inside Xen. If you want to disable write-protect, use the read/write cr0 functions. I macro them like this:
#define GPF_DISABLE write_cr0(read_cr0() & (~ 0x10000))
#define GPF_ENABLE write_cr0(read_cr0() | 0x10000)
Hope this helps anyone else who stumbles upon this question.
Note that the following will also work instead of using change_page_attr and cannot be depreciated:
static void disable_page_protection(void) {
unsigned long value;
asm volatile("mov %%cr0,%0" : "=r" (value));
if (value & 0x00010000) {
value &= ~0x00010000;
asm volatile("mov %0,%%cr0": : "r" (value));
}
}
static void enable_page_protection(void) {
unsigned long value;
asm volatile("mov %%cr0,%0" : "=r" (value));
if (!(value & 0x00010000)) {
value |= 0x00010000;
asm volatile("mov %0,%%cr0": : "r" (value));
}
}
If you are dealing with kernel 3.4 and later (it can also work with earlier kernels, I didn't test it) I would recommend a smarter way to acquire the system callы table location.
For example
#include <linux/module.h>
#include <linux/kallsyms.h>
static unsigned long **p_sys_call_table;
/* Aquire system calls table address */
p_sys_call_table = (void *) kallsyms_lookup_name("sys_call_table");
That's it. No addresses, it works fine with every kernel I've tested.
The same way you can use a not exported Kernel function from your module:
static int (*ref_access_remote_vm)(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write);
ref_access_remote_vm = (void *)kallsyms_lookup_name("access_remote_vm");
Enjoy!
As others have hinted, the whole story is a bit different now on modern kernels. I'll be covering x86-64 here, for syscall hijacking on modern arm64 refer to this other answer of mine. Also NOTE: this is plain and simple syscall hijacking. Non-invasive hooking can be done in a much nicer way using kprobes.
Since Linux v4.17, x86 (both 64 and 32 bit) now uses syscall wrappers that take a struct pt_regs * as the only argument (see commit 1, commit 2). You can see arch/x86/include/asm/syscall.h for the definitions.
Additionally, as others have described already in different answers, the simplest way to modify sys_call_table is to temporarily disable CR0 WP (Write-Protect) bit, which could be done using read_cr0() and write_cr0(). However, since Linux v5.3, [native_]write_cr0 will check sensitive bits that should never change (like WP) and refuse to change them (commit). In order to work around this, we need to write CR0 manually using inline assembly.
Here is a working kernel module (tested on Linux 5.10 and 5.18) that does syscall hijacking on modern Linux x86-64 considering the above caveats and assuming that you already know the address of sys_call_table (if you also want to find that in the module, see Proper way of getting the address of non-exported kernel symbols in a Linux kernel module):
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/**
* Test syscall table hijacking on x86-64. This module will replace the `read`
* syscall with a simple wrapper which logs every invocation of `read` using
* printk().
*
* Tested on Linux x86-64 v5.10, v5.18.
*
* Usage:
*
* sudo cat /proc/kallsyms | grep sys_call_table # grab address
* sudo insmod syscall_hijack.ko sys_call_table_addr=0x<address_here>
*/
#include <linux/init.h> // module_{init,exit}()
#include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h> // printk(), pr_*()
#include <asm/special_insns.h> // {read,write}_cr0()
#include <asm/processor-flags.h> // X86_CR0_WP
#include <asm/unistd.h> // __NR_*
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
static sys_call_ptr_t *real_sys_call_table;
static sys_call_ptr_t original_read;
static unsigned long sys_call_table_addr;
module_param(sys_call_table_addr, ulong, 0);
MODULE_PARM_DESC(sys_call_table_addr, "Address of sys_call_table");
// Since Linux v5.3 [native_]write_cr0 won't change "sensitive" CR0 bits, need
// to re-implement this ourselves.
static void write_cr0_unsafe(unsigned long val)
{
asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
}
static long myread(const struct pt_regs *regs)
{
pr_info("read(%ld, 0x%lx, %lx)\n", regs->di, regs->si, regs->dx);
return original_read(regs);
}
static int __init modinit(void)
{
unsigned long old_cr0;
real_sys_call_table = (typeof(real_sys_call_table))sys_call_table_addr;
pr_info("init\n");
// Temporarily disable CR0 WP to be able to write to read-only pages
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Overwrite syscall and save original to be restored later
original_read = real_sys_call_table[__NR_read];
real_sys_call_table[__NR_read] = myread;
// Restore CR0 WP
write_cr0_unsafe(old_cr0);
pr_info("init done\n");
return 0;
}
static void __exit modexit(void)
{
unsigned long old_cr0;
pr_info("exit\n");
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Restore original syscall
real_sys_call_table[__NR_read] = original_read;
write_cr0_unsafe(old_cr0);
pr_info("goodbye\n");
}
module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Test syscall table hijacking on x86-64.");
MODULE_AUTHOR("Marco Bonelli");
MODULE_LICENSE("Dual MIT/GPL");

Unknown symbol when loading my own kernel module

The following code (pasted last), taken mostly from here, is a very simple kernel module which acts as a keylogger. I can get it to compile and produce a .ko just fine, but when I try to load it, I get the following errors in dmesg:
[ 790.833828] keylogger: Unknown symbol unregister_keyboard_notifier (err 0)
[ 790.833846] keylogger: Unknown symbol register_keyboard_notifier (err 0)
I did not build my kernel from source, but am using the stock kernel provided with archlinux. I did install the kernel-headers package to get the module to compile, however.
So my question is: Are these two symbols really not found in my installed kernel? And if they are, why aren't they linking(?) correctly?
I can find evidence that the symbols are present. Firstly, I can see the symbols in /proc/kallsyms. Also, when I do nm /usr/src/vmlinux I can also see these two symbols. Are they not the same?
Module code:
#include <linux/module.h> /* Needed by all modules */
#include <linux/keyboard.h>
EXPORT_SYMBOL_NOVERS(unregister_keyboard_notifier);
EXPORT_SYMBOL_NOVERS(register_keyboard_notifier);
int hello_notify(struct notifier_block *nblock, unsigned long code, void *_param) {
struct keyboard_notifier_param *param = _param;
struct vc_data *vc = param->vc;
int ret = NOTIFY_OK;
if (code == KBD_KEYCODE) {
printk(KERN_DEBUG "KEYLOGGER %i %s\n", param->value, (param->down ? "down" : "up"));
}
}
static struct notifier_block nb = {
.notifier_call = hello_notify
};
static int hello_init(void)
{
register_keyboard_notifier(&nb);
return 0;
}
static void hello_release(void)
{
unregister_keyboard_notifier(&nb);
}
module_init(hello_init);
module_exit(hello_release);
I needed to add the following to my module source:
MODULE_LICENSE("GPL");

why am I getting an "implicit declaration of function 'ndo_get_stats' " error?

I'm building a VERY SIMPLE kernel module for gathering some stats from the network card here's the code, I keep getting an error implicit declaration of function 'ndo_get_stats'. I'm not sure why...
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/netdevice.h> /* Needed for netdevice*/
static int __init hello_start(void)
{
struct net_device *dev;
printk(KERN_INFO "Loading Stats module...\n");
printk(KERN_ALERT "Hello world\n");
dev = first_net_device(&init_net);
while (dev)
{
printk(KERN_INFO "found [%s] and it's [%d]\n", dev->name, dev->flags & IFF_UP);
printk(KERN_INFO "End of dev struct ... now starts the get_stats struct\n");
dev->stats = ndo_get_stats(dev);
printk(KERN_INFO "recive errors: [%li]\n transmission errors: [%li]\n number of collisions: [%li]", dev->stats.rx_errors , dev->stats.tx_errors, dev->stats.collisions);
dev = next_net_device(dev);
}
return 0;
}
static void __exit hello_end(void)
{
printk(KERN_ALERT "Goodbye.\n");
}
module_init(hello_start);
module_exit(hello_end);
Thanks
ndo_get_stats is a net_device_ops function pointer. You have to call it through the netdev_ops field of your net_device.
Something like this would work:
stats = dev->netdev_ops->ndo_get_stats(dev);

How do I use ioctl() to manipulate my kernel module?

So I'm trying to write a kernel module that uses the linux/timer.h file. I got it to work inside just the module, and now I am trying to get it to work from a user program.
Here is my kernel module:
//Necessary Includes For Device Drivers.
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <linux/timer.h>
#include <linux/ioctl.h>
#define DEVICE_NAME "mytimer"
#define DEVICE_FILE_NAME "mytimer"
#define MAJOR_NUM 61
#define MINOR_NUM 0
MODULE_LICENSE("Dual BSD/GPL");
static struct timer_list my_timer;
struct file_operations FileOps =
{
//No File Operations for this timer.
};
//Function to perform when timer expires.
void TimerExpire(int data)
{
printk("Timer Data: %d\n", data);
}
//Function to set up timers.
void TimerSetup(void)
{
setup_timer(&my_timer, TimerExpire, 5678);
mod_timer(&my_timer, jiffies + msecs_to_jiffies(5000));
}
//Module Init and Exit Functions.
int init_module(void)
{
int initResult = register_chrdev(MAJOR_NUM, "mytimer", &FileOps);
if (initResult < 0)
{
printk("Cannot obtain major number %d\n", MAJOR_NUM);
return initResult;
}
printk("Loading MyTimer Kernel Module...\n");
return 0;
}
void cleanup_module(void)
{
unregister_chrdev(MAJOR_NUM, "mytimer");
printk("Unloading MyTimer Kernel Module...\n");
}
More specifically, I want my user program to call the TimerSetup() function. I know that I'll need to use ioctl() but I'm not sure how to specify in my MODULE FILE that TimerSetup() should be callable via ioctl().
Also, my second question: I was able to insmod my module and also mknod into /dev/mytimer with the correct major number. But when I tried to open() it so that I can get the file descriptor from it, it kept returning -1, which I'm assuming is wrong. I made sure the permissions were fine (in fact, I made it 777 just to be sure)... It still doesn't work... Is there something I'm missing?
Here is the user program just in case:
#include <stdio.h>
int main(int argc, char* argv[])
{
int fd = open("/dev/mytimer", "r");
printf("fd: %d\n", fd);
return 0;
}
The example code you need can be found in drivers/watchdog/softdog.c (from Linux 2.6.33 at the time this was written), which illustrates proper file operations as well as how to permit userland to fill a structure with ioctl().
It's actually a great, working tutorial for anyone who needs to write trivial character device drivers.
I dissected softdog's ioctl interface when answering my own question, which may be helpful to you.
Here's the gist of it (though far from exhaustive) ...
In softdog_ioctl() you see a simple initialization of struct watchdog_info that advertises functionality, version and device information:
static const struct watchdog_info ident = {
.options = WDIOF_SETTIMEOUT |
WDIOF_KEEPALIVEPING |
WDIOF_MAGICCLOSE,
.firmware_version = 0,
.identity = "Software Watchdog",
};
We then look at a simple case where the user just wants to obtain these capabilities:
switch (cmd) {
case WDIOC_GETSUPPORT:
return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
... which of course, will fill the corresponding userspace watchdog_info with the initialized values above. If copy_to_user() fails, -EFAULT is returned which causes the corresponding userspace ioctl() call to return -1 with a meaningful errno being set.
Note, the magic requests are actually defined in linux/watchdog.h , so that the kernel and userspace share them:
#define WDIOC_GETSUPPORT _IOR(WATCHDOG_IOCTL_BASE, 0, struct watchdog_info)
#define WDIOC_GETSTATUS _IOR(WATCHDOG_IOCTL_BASE, 1, int)
#define WDIOC_GETBOOTSTATUS _IOR(WATCHDOG_IOCTL_BASE, 2, int)
#define WDIOC_GETTEMP _IOR(WATCHDOG_IOCTL_BASE, 3, int)
#define WDIOC_SETOPTIONS _IOR(WATCHDOG_IOCTL_BASE, 4, int)
#define WDIOC_KEEPALIVE _IOR(WATCHDOG_IOCTL_BASE, 5, int)
#define WDIOC_SETTIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 6, int)
#define WDIOC_GETTIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 7, int)
#define WDIOC_SETPRETIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 8, int)
#define WDIOC_GETPRETIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 9, int)
#define WDIOC_GETTIMELEFT _IOR(WATCHDOG_IOCTL_BASE, 10, int)
WDIOC obviously signifying "Watchdog ioctl"
You can easily take that a step further, having your driver do something and place the result of that something in the structure and copy it to userspace. For instance, if struct watchdog_info also had a member __u32 result_code. Note, __u32 is just the kernel's version of uint32_t.
With ioctl(), the user passes the address of an object, be it a structure, integer, whatever to the kernel expecting the kernel to write its reply in an identical object and copy the results to the address that was provided.
The second thing you are going to need to do is make sure your device knows what to do when someone opens, reads from it, writes to it, or uses a hook like ioctl(), which you can easily see by studying softdog.
Of interest is:
static const struct file_operations softdog_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.write = softdog_write,
.unlocked_ioctl = softdog_ioctl,
.open = softdog_open,
.release = softdog_release,
};
Where you see the unlocked_ioctl handler going to ... you guessed it, softdog_ioctl().
I think you might be juxtaposing a layer of complexity that really doesn't exist when dealing with ioctl(), it really is that simple. For that same reason, most kernel developers frown on new ioctl interfaces being added unless they are absolutely necessary. Its just too easy to lose track of the type that ioctl() is going to fill vs the magic you use to do it, which is the primary reason that copy_to_user() fails often resulting in the kernel rotting with hordes of userspace processes stuck in disk sleep.
For a timer, I agree, ioctl() is the shortest path to sanity.
You are missing a .open function pointer in your file_operations structure to specify the function to be called when a process attempts to open the device file. You will need to specify a .ioctl function pointer for your ioctl function as well.
Try reading through The Linux Kernel Module Programming Guide, specifically chapters 4 (Character Device Files) and 7 (Talking to Device Files).
Chapter 4 introduces the file_operations structure, which holds pointers to functions defined by the module/driver that perform various operations such as open or ioctl.
Chapter 7 provides information on communicating with a module/drive via ioctls.
Linux Device Drivers, Third Edition is another good resource.
Minimal runnable example
Tested in a fully reproducible QEMU + Buildroot environment, so might help others get their ioctl working. GitHub upstream:
kernel module |
shared header |
userland.
The most annoying part was understanding that some low ids are hijacked: ioctl is not called if cmd = 2 , you have to use _IOx macros.
Kernel module:
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/printk.h> /* printk */
#include "ioctl.h"
MODULE_LICENSE("GPL");
static struct dentry *dir;
static long unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long argp)
{
void __user *arg_user;
union {
int i;
lkmc_ioctl_struct s;
} arg_kernel;
arg_user = (void __user *)argp;
pr_info("cmd = %x\n", cmd);
switch (cmd) {
case LKMC_IOCTL_INC:
if (copy_from_user(&arg_kernel.i, arg_user, sizeof(arg_kernel.i))) {
return -EFAULT;
}
pr_info("0 arg = %d\n", arg_kernel.i);
arg_kernel.i += 1;
if (copy_to_user(arg_user, &arg_kernel.i, sizeof(arg_kernel.i))) {
return -EFAULT;
}
break;
case LKMC_IOCTL_INC_DEC:
if (copy_from_user(&arg_kernel.s, arg_user, sizeof(arg_kernel.s))) {
return -EFAULT;
}
pr_info("1 arg = %d %d\n", arg_kernel.s.i, arg_kernel.s.j);
arg_kernel.s.i += 1;
arg_kernel.s.j -= 1;
if (copy_to_user(arg_user, &arg_kernel.s, sizeof(arg_kernel.s))) {
return -EFAULT;
}
break;
default:
return -EINVAL;
break;
}
return 0;
}
static const struct file_operations fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = unlocked_ioctl
};
static int myinit(void)
{
dir = debugfs_create_dir("lkmc_ioctl", 0);
/* ioctl permissions are not automatically restricted by rwx as for read / write,
* but we could of course implement that ourselves:
* https://stackoverflow.com/questions/29891803/user-permission-check-on-ioctl-command */
debugfs_create_file("f", 0, dir, NULL, &fops);
return 0;
}
static void myexit(void)
{
debugfs_remove_recursive(dir);
}
module_init(myinit)
module_exit(myexit)
Shared header between the kernel module and userland:
ioctl.h
#ifndef IOCTL_H
#define IOCTL_H
#include <linux/ioctl.h>
typedef struct {
int i;
int j;
} lkmc_ioctl_struct;
#define LKMC_IOCTL_MAGIC 0x33
#define LKMC_IOCTL_INC _IOWR(LKMC_IOCTL_MAGIC, 0, int)
#define LKMC_IOCTL_INC_DEC _IOWR(LKMC_IOCTL_MAGIC, 1, lkmc_ioctl_struct)
#endif
Userland:
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "../ioctl.h"
int main(int argc, char **argv)
{
int fd, arg_int, ret;
lkmc_ioctl_struct arg_struct;
if (argc < 2) {
puts("Usage: ./prog <ioctl-file>");
return EXIT_FAILURE;
}
fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return EXIT_FAILURE;
}
/* 0 */
{
arg_int = 1;
ret = ioctl(fd, LKMC_IOCTL_INC, &arg_int);
if (ret == -1) {
perror("ioctl");
return EXIT_FAILURE;
}
printf("arg = %d\n", arg_int);
printf("ret = %d\n", ret);
printf("errno = %d\n", errno);
}
puts("");
/* 1 */
{
arg_struct.i = 1;
arg_struct.j = 1;
ret = ioctl(fd, LKMC_IOCTL_INC_DEC, &arg_struct);
if (ret == -1) {
perror("ioctl");
return EXIT_FAILURE;
}
printf("arg = %d %d\n", arg_struct.i, arg_struct.j);
printf("ret = %d\n", ret);
printf("errno = %d\n", errno);
}
close(fd);
return EXIT_SUCCESS;
}

Resources