Why doesn't this call to `poll` block correctly on a sysfs device attribute file? - linux

I have a simple sysfs device attribute which shows up under my sysfs directory, and on a call to read returns the value of a kernelspace variable. I want to call poll on this attribute to allow my userspace thread to block until the value shown by the attribute changes.
My problem is that poll doesn't seem to block on my attribute -- it keeps returning POLLPRI even though the value shown by the attribute does not change. In fact, I have no calls at all to sysfs_notify in the kernel module, yet the userspace call poll still does not block.
Perhaps I should be checking for a return value of something other than POLLPRI -- but according to the documentation in the Linux kernel, sysfs_poll should return POLLERR|POLLPRI:
/* ... When the content changes (assuming the
* manager for the kobject supports notification), poll will
* return POLLERR|POLLPRI ...
*/
Is there something I'm forgetting to do with poll?
The device attribute is located at: /sys/class/vilhelm/foo/blah.
I load a kernel module called foo which registers a device, and creates a class and this device attribute.
The userspace application called bar spawns a thread that calls poll on the device attribute, checking for POLLPRI.
If poll returns a positive number, POLLPRI has been returned.
Use fopen and fscan to read the value from the device attribute file.
If the value is 42, print FROM THREAD!!!.
The problem is that the message is printed nonstop when I'm expecting the call to poll to block indefinitely. The problem must lie with poll (the other calls successfully acquire the correct value of 42 from the device attribute).
userspace app - bar.c:
#include <stdio.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <unistd.h>
static void handle_val(unsigned val, FILE *fp);
void * start_val_service(void *arg);
int main(void){
pthread_t val_serv;
pthread_create(&val_serv, NULL, &start_val_service, NULL);
pthread_exit(NULL);
return 0;
}
static void handle_val(unsigned val, FILE *fp){
switch(val){
case 42:
{
printf("FROM THREAD!!!\n");
break;
}
default:
break;
}
}
void * start_val_service(void *arg){
struct pollfd fds;
fds.fd = open("/sys/class/vilhelm/foo/blah", O_RDONLY);
fds.events = POLLPRI;
do{
int ret = poll(&fds, 1, -1);
if(ret > 0){
FILE *fp = fopen("/sys/class/vilhelm/foo/blah", "r");
unsigned val;
fscanf(fp, "%u", &val);
handle_val(val, fp);
fclose(fp);
}
}while(1);
close(fds.fd);
pthread_exit(NULL);
}
kernel module - foo.c:
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/kernel.h>
static dev_t foo_dev;
static struct class *vilhelm;
static unsigned myvar = 42;
static ssize_t unsigned_dev_attr_show(struct device *dev, struct device_attribute *attr, char *buf);
struct unsigned_device_attribute{
struct device_attribute dev_attr;
unsigned *ptr;
};
static struct unsigned_device_attribute unsigned_dev_attr_blah = {
.dev_attr = __ATTR(blah, S_IRUGO, unsigned_dev_attr_show, NULL)
};
static int __init foo_init(void){
int retval = 0;
printk(KERN_INFO "HELLO FROM MODULE 1");
if(alloc_chrdev_region(&foo_dev, 0, 1, "vilhelm") < 0){
printk(KERN_ERR "foo: unable to register device");
retval = -1;
goto out_alloc_chrdev_region;
}
vilhelm = class_create(THIS_MODULE, "vilhelm");
if(IS_ERR(vilhelm)){
printk(KERN_ERR "foo: unable to create device class");
retval = PTR_ERR(vilhelm);
goto out_class_create;
}
struct device *foo_device = device_create(vilhelm, NULL, foo_dev, NULL, "foo");
if(IS_ERR(foo_device)){
printk(KERN_ERR "foo: unable to create device file");
retval = PTR_ERR(foo_device);
goto out_device_create;
}
unsigned_dev_attr_blah.ptr = &myvar;
retval = device_create_file(foo_device, &unsigned_dev_attr_blah.dev_attr);
if(retval){
printk(KERN_ERR "foo: unable to create device attribute files");
goto out_create_foo_dev_attr_files;
}
return 0;
out_create_foo_dev_attr_files:
device_destroy(vilhelm, foo_dev);
out_device_create:
class_destroy(vilhelm);
out_class_create:
unregister_chrdev_region(foo_dev, 1);
out_alloc_chrdev_region:
return retval;
}
static void __exit foo_exit(void){
printk(KERN_INFO "BYE FROM MODULE 1");
device_destroy(vilhelm, foo_dev);
class_destroy(vilhelm);
unregister_chrdev_region(foo_dev, 1);
}
static ssize_t unsigned_dev_attr_show(struct device *dev, struct device_attribute *attr, char *buf){
struct unsigned_device_attribute *tmp = container_of(attr, struct unsigned_device_attribute, dev_attr);
unsigned value = *(tmp->ptr);
return scnprintf(buf, PAGE_SIZE, "%u\n", value);
}
module_init(foo_init);
module_exit(foo_exit);
MODULE_LICENSE("GPL");
See also
Using the Linux sysfs_notify call

To quote some more from the comment you quoted:
Once poll/select indicates that the value has changed, you
need to close and re-open the file, or seek to 0 and read again.
But you do nothing with fds.fd.
Also, do a dummy read() before calling poll();
any newly opened file is considered changed.

Related

Miscellaneous Device Driver: Unable to open the device with open() system call

I am trying to implement a system call interception for sys_open() call via kernel module and for that I have defined a miscellaneous device driver MyDevice which can be inserted as kernel module. Below is the code for my kernel module:
#include <linux/version.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <asm/unistd.h>
MODULE_LICENSE("GPL");
// IOCTL commands
#define IOCTL_PATCH_TABLE 0x00000001
#define IOCTL_FIX_TABLE 0x00000004
//Global variables
int in_use = 0; //set to 1 in open handler and reset to zero in release handler
int is_set = 0; // flag to detect system call interception
unsigned long *sys_call_table = (unsigned long*)0xffffffff81801400; //hard coded address of sys_call_table from /boot/System.map
//function pointer to original sys_open
asmlinkage int (*real_open)(const char* __user, int, int);
//Replacement of original call with modified system call
asmlinkage int custom_open(const char* __user file_name, int flags, int mode)
{
printk("interceptor: open(\"%s\", %X, %X)\n", file_name,flags,mode);
return real_open(file_name,flags,mode);
}
/*
Make the memory page writable
This is little risky as directly arch level protection bit is changed
*/
int make_rw(unsigned long address)
{
unsigned int level;
pte_t *pte = lookup_address(address, &level);
if(pte->pte &~ _PAGE_RW)
pte->pte |= _PAGE_RW;
return 0;
}
/* Make the page write protected */
int make_ro(unsigned long address)
{
unsigned int level;
pte_t *pte = lookup_address(address, &level);
pte->pte = pte->pte &~ _PAGE_RW;
return 0;
}
/* This function will be invoked each time a user process attempts
to open my device. You should keep in mind that the prototype
of this function may change along different kernel versions. */
static int my_open(struct inode *inode, struct file *file)
{
/*Do not allow multiple processes to open this device*/
if(in_use)
return -EBUSY;
in_use++;
printk("MyDevice opened\n");
return 0;
}
/* This function, in turn, will be called when a process closes our device */
static int my_release(struct inode *inode, struct file *file)
{
in_use--;
printk("MyDevice closed\n");
return 0;
}
/*This static function handles ioctl calls performed on MyDevice*/
static int my_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
int retval = 0;
switch(cmd)
{
case IOCTL_PATCH_TABLE:
make_rw((unsigned long)sys_call_table);
real_open = (void*)*(sys_call_table + __NR_open);
*(sys_call_table + __NR_open) = (unsigned long)custom_open;
make_ro((unsigned long)sys_call_table);
is_set=1;
break;
case IOCTL_FIX_TABLE:
make_rw((unsigned long)sys_call_table);
*(sys_call_table + __NR_open) = (unsigned long)real_open;
make_ro((unsigned long)sys_call_table);
is_set=0;
break;
default:
printk("sys_open not executed\n");
break;
}
return retval;
}
//populate data struct for file operations
static const struct file_operations my_fops = {
.owner = THIS_MODULE,
.open = &my_open,
.release = &my_release,
.unlocked_ioctl = (void*)&my_ioctl,
.compat_ioctl = (void*)&my_ioctl
};
//populate miscdevice data structure
static struct miscdevice my_device = {
MISC_DYNAMIC_MINOR,
"MyDevice",
&my_fops
};
static int __init init_my_module(void)
{
int retval;
printk(KERN_INFO "Inside kernel space\n");
retval = misc_register(&my_device);
return retval;
}
static void __exit cleanup_my_module(void)
{
if (is_set)
{
make_rw((unsigned long)sys_call_table);
*(sys_call_table + __NR_open) = (unsigned long)real_open;
make_ro((unsigned long)sys_call_table);
}
misc_deregister(&my_device);
printk(KERN_INFO "Exiting kernel space\n");
return;
}
module_init(init_my_module);
module_exit(cleanup_my_module);
The code for my test file is as follows:
#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
/* Define ioctl commands */
#define IOCTL_PATCH_TABLE 0x00000001
#define IOCTL_FIX_TABLE 0x00000004
int main(void)
{
int device = open("/dev/MyDevice", O_RDWR);
printf("%d\n",device);
ioctl(device, IOCTL_PATCH_TABLE);
sleep(2);
ioctl(device, IOCTL_FIX_TABLE);
close(device);
return 0;
}
The problem is that in my test file open("/dev/MyDevice", O_RDWR); is always returning -1, why is it so ? Where am I going wrong ? I checked with ls -l /dev/MyDevice, MyDevice has been successfully registered with following details: crw------- 1 root root 10, 56 Dec 9 19:33 /dev/MyDevice
Sorry, seriously stupid mistake, that's what happens when rookies do things. I just needed to grant the read and write permissions for my miscellaneous char device driver.
sudo chmod a+r+w /dev/MyDevice

How to use a seq_file in Linux kernel modules?

Hello all I'm new to Linux and wondering how to use a Linux sequence file in a module to traverse kernel objects.
What I know is I can use the command:
cat /proc/kallsyms
to view the available symbols and from what I've read on google, the symbols in the list that have a 'D' or 'd' are pointers to data structures.
Though I know the basics of how to create a module, the examples on the internet on how to use seq operations are not uniform and I'm getting a little confused.
If someone knows of any good doco that will help me understand how to create a seq file to traverse kernel objects and could post a link (or a quick example), I would be greatly appreciative.
Minimal runnable example
The kernel docs contain an example under Documentation/filesystems/seq_file.txt, but here is a runnable version of that with loop termination.
This example is behaves just like a file that contains:
0
1
2
However, we only store a single integer in memory
and calculate the file on the fly in an iterator fashion.
The file works for both read and lseek system calls, but there is no write system call equivalent:
How to implement a writable proc file by using seq_file in a driver module
Play around with the file with cat and dd skip= for the seeks.
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/errno.h> /* EFAULT */
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/printk.h> /* pr_info */
#include <linux/seq_file.h> /* seq_read, seq_lseek, single_release */
#include <linux/slab.h>
#include <uapi/linux/stat.h> /* S_IRUSR */
MODULE_LICENSE("GPL");
static int max = 2;
module_param(max, int, S_IRUSR | S_IWUSR);
static struct dentry *debugfs_file;
/* Called at the beginning of every read.
*
* The return value is passsed to the first show.
* It normally represents the current position of the iterator.
* It could be any struct, but we use just a single integer here.
*
* NULL return means stop should be called next, and so the read will be empty..
* This happens for example for an ftell that goes beyond the file size.
*/
static void *start(struct seq_file *s, loff_t *pos)
{
loff_t *spos;
pr_info("start pos = %llx\n", (unsigned long long)*pos);
spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
if (!spos || *pos >= max)
return NULL;
*spos = *pos;
return spos;
}
/* The return value is passed to next show.
* If NULL, stop is called next instead of show, and read ends.
*
* Can get called multiple times, until enough data is returned for the read.
*/
static void *next(struct seq_file *s, void *v, loff_t *pos)
{
loff_t *spos;
spos = v;
pr_info("next pos = %llx\n", (unsigned long long)*pos);
if (*pos >= max)
return NULL;
*pos = ++*spos;
return spos;
}
/* Called at the end of every read. */
static void stop(struct seq_file *s, void *v)
{
pr_info("stop\n");
kfree(v);
}
/* Return 0 means success, SEQ_SKIP ignores previous prints, negative for error. */
static int show(struct seq_file *s, void *v)
{
loff_t *spos;
spos = v;
pr_info("show pos = %llx\n", (unsigned long long)*spos);
seq_printf(s, "%llx\n", (long long unsigned)*spos);
return 0;
}
static struct seq_operations my_seq_ops = {
.next = next,
.show = show,
.start = start,
.stop = stop,
};
static int open(struct inode *inode, struct file *file)
{
pr_info("open\n");
return seq_open(file, &my_seq_ops);
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.llseek = seq_lseek,
.open = open,
.read = seq_read,
.release = seq_release
};
static int myinit(void)
{
debugfs_file = debugfs_create_file(
"lkmc_seq_file", S_IRUSR, NULL, NULL, &fops);
if (debugfs_file) {
return 0;
} else {
return -EINVAL;
}
}
static void myexit(void)
{
debugfs_remove(debugfs_file);
}
module_init(myinit)
module_exit(myexit)
GitHub upstream.
Note how the seq_file API makes it much easier to write the read file operation.
single_open
If you have the entire read output upfront, single_open is an even more convenient version of seq_file.
This example behaves like a file that contains:
ab
cd
Code:
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/errno.h> /* EFAULT */
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/printk.h> /* pr_info */
#include <linux/seq_file.h> /* seq_read, seq_lseek, single_release */
#include <uapi/linux/stat.h> /* S_IRUSR */
MODULE_LICENSE("GPL");
static struct dentry *debugfs_file;
static int show(struct seq_file *m, void *v)
{
seq_printf(m, "ab\ncd\n");
return 0;
}
static int open(struct inode *inode, struct file *file)
{
return single_open(file, show, NULL);
}
static const struct file_operations fops = {
.llseek = seq_lseek,
.open = open,
.owner = THIS_MODULE,
.read = seq_read,
.release = single_release,
};
static int myinit(void)
{
debugfs_file = debugfs_create_file(
"lkmc_seq_file_single", S_IRUSR, NULL, NULL, &fops);
if (debugfs_file) {
return 0;
} else {
return -EINVAL;
}
}
static void myexit(void)
{
debugfs_remove(debugfs_file);
}
module_init(myinit)
module_exit(myexit)
GitHub upstream.
Tested on Linux 4.9.6.
It appears that starting from Linux 5, there was a backwards incompatible change that requires you to implement seq_file a bit differently, I think this talks about it: seq_file not working properly after next returns NULL and it appears that if you don't update this you get a warning:
seq_file: buggy .next function next [module-name] did not update position index

Please help me to make this fake character linux device driver work

Hello I am trying to write to a fake char device driver using:
echo > /dev/
and reading it using:
cat /dev/
My problem is that I am getting continuously the first character written printed on the terminal when I do a read with the above mentioned "cat" read method after writing using the echo method above.
My aim is to get the entire set of characters written to the driver back...
I am using dynamic memory allocation for this purpose but not getting the final result after trying many ways of rewriting the code of read() and write() in the driver. Please help..
my Makefile is correct... (I am using ubuntu with a kernel version of 2.6.33...)
My code is as below:
#include <linux/module.h>
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
static dev_t first;
static struct cdev c_dev;
static struct class *cl;
static char* k_buf = NULL;
static int my_open(struct inode *i,struct file *f)
{
printk(KERN_INFO "In driver open()\n");
return 0;
}
static int my_close(struct inode *i,struct file *f)
{
printk(KERN_INFO "In driver close()\n");
return 0;
}
static ssize_t my_read(struct file *f,char __user *buf,size_t len,loff_t *off)
{
printk(KERN_INFO "In driver read()\n");
if(k_buf == NULL)
{
printk(KERN_INFO "You cannot read before writing!\n");
return -1;
}
while(*k_buf != 'EOF')
{
if(copy_to_user(buf,k_buf,1))
return -EFAULT;
off++;
return 1;
}
return 0;
}
static ssize_t my_write(struct file *f,const char __user *buf,size_t len,loff_t *off)
{
printk(KERN_INFO "In driver write()\n");
k_buf = (char*) kmalloc(sizeof(len),GFP_KERNEL);
if(copy_from_user(k_buf,buf,len))
return -EFAULT;
off += len;
return (len);
}
static struct file_operations fops =
{
.owner = THIS_MODULE,
.open = my_open,
.release = my_close,
.read = my_read,
.write = my_write
};
static int __init rw_init(void) /*Constructor*/
{
printk(KERN_INFO "hello: rw_ch_driver registered\n");
if(alloc_chrdev_region(&first,0,1,"krishna") < 0)
{
return -1;
}
if ((cl = class_create(THIS_MODULE,"chardev")) == NULL)
{
unregister_chrdev_region(first,1);
return -1;
}
if (device_create(cl,NULL,first,NULL,"rw_char_driver") == NULL)
{
class_destroy(cl);
unregister_chrdev_region(first,1);
return -1;
}
cdev_init(&c_dev,&fops);
if(cdev_add(&c_dev,first,1) == -1)
{
device_destroy(cl,first);
class_destroy(cl);
unregister_chrdev_region(first,1);
return -1;
}
return 0;
}
static void __exit rw_exit(void)/*destructor*/
{
cdev_del(&c_dev);
device_destroy(cl,first);
class_destroy(cl);
unregister_chrdev_region(first,1);
printk(KERN_INFO "bye rw_chardriver unregistered");
}
module_init(rw_init);
module_exit(rw_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("krishna");
MODULE_DESCRIPTION("read write character driver");
Take a careful look at your while loop in my_read().
Most important note first: you don't need this loop. You've put a return statement in it, so it is never going to execute more than once, because the whole function is going to exit when the return is reached. It looks like you're trying to make the function return a single byte at a time repeatedly, but you should just call copy_to_user once, and pass it the number of bytes you want to give back to the user instead. If you only send one character at a time that's fine. It will be up to the user to make the read call again to get the next character.
The nice thing about copy_to_user, is that its return code will tell you if it failed because of bad array bounds, so there's no need to check for EOF on every character. In fact, you are not going to get 'EOF' as a character when you are reading from your buffer because it doesn't exist. Your buffer will store characters and usually a null terminator, '\0', but there is no 'EOF' character in C. EOF is a state you need to identify yourself and report to whoever called open. For the "cat" command, this is done by returning 0 from read. That being said, you should still check your array bounds so we don't end up with another Heartbleed. This SO answer has a good suggestion for how to do bounds checking to make sure you don't send more bytes than your buffer has.
Also, give [this post(https://meta.stackexchange.com/questions/981/syntax-highlighting-language-hints) a read. If you don't have your language in your question tags, it is helpful to other readers to tag your. I've edited your question to clean it up, so you can click "edit" now to see how I did it.

Using the Linux sysfs_notify call

I am trying to communicate asynchronously between a kernel driver and a user-space program (I know there are lots of questions here that ask for similar information, but I could find none that deal with sysfs_notify).
I am leaving Vilhelm's edit here, but adding the source to both a simple driver utilizing sysfs and a user-space program to poll it. The driver works fine (I got most of it from the net; it is missing the credits, but I couldn't find them when I went back to add them).
Unfortunately, the polling program does not work. It always returns success immediately. Interestingly, if I don't perform the two reads prior to the poll, the revents members are set to POLLERR | POLLIN instead of just POLLIN as seen in the program output.
Program output:
root#ubuntu:/home/wmulcahy/demo# ./readhello
triggered
Attribute file value: 74 (t) [0]
revents[0]: 00000001
revents[1]: 00000001
Here is the driver: hello.c (you can see where I started out...)
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
struct my_attr {
struct attribute attr;
int value;
};
static struct my_attr notify = {
.attr.name="notify",
.attr.mode = 0644,
.value = 0,
};
static struct my_attr trigger = {
.attr.name="trigger",
.attr.mode = 0644,
.value = 0,
};
static struct attribute * myattr[] = {
&notify.attr,
&trigger.attr,
NULL
};
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct my_attr *a = container_of(attr, struct my_attr, attr);
printk( "hello: show called (%s)\n", a->attr.name );
return scnprintf(buf, PAGE_SIZE, "%s: %d\n", a->attr.name, a->value);
}
static struct kobject *mykobj;
static ssize_t store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len)
{
struct my_attr *a = container_of(attr, struct my_attr, attr);
sscanf(buf, "%d", &a->value);
notify.value = a->value;
printk("sysfs_notify store %s = %d\n", a->attr.name, a->value);
sysfs_notify(mykobj, NULL, "notify");
return sizeof(int);
}
static struct sysfs_ops myops = {
.show = show,
.store = store,
};
static struct kobj_type mytype = {
.sysfs_ops = &myops,
.default_attrs = myattr,
};
static struct kobject *mykobj;
static int __init hello_module_init(void)
{
int err = -1;
printk("Hello: init\n");
mykobj = kzalloc(sizeof(*mykobj), GFP_KERNEL);
if (mykobj) {
kobject_init(mykobj, &mytype);
if (kobject_add(mykobj, NULL, "%s", "hello")) {
err = -1;
printk("Hello: kobject_add() failed\n");
kobject_put(mykobj);
mykobj = NULL;
}
err = 0;
}
return err;
}
static void __exit hello_module_exit(void)
{
if (mykobj) {
kobject_put(mykobj);
kfree(mykobj);
}
printk("Hello: exit\n");
}
module_init(hello_module_init);
module_exit(hello_module_exit);
MODULE_LICENSE("GPL");
And here is the poll program: readhello.c
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <poll.h>
#define TEST_SYSFS_TRIGGER "/sys/hello/trigger"
#define TEST_SYSFS_NOTIFY "/sys/hello/notify"
int main(int argc, char **argv)
{
int cnt, notifyFd, triggerFd, rv;
char attrData[100];
struct pollfd ufds[2];
// Open a connection to the attribute file.
if ((notifyFd = open(TEST_SYSFS_NOTIFY, O_RDWR)) < 0)
{
perror("Unable to open notify");
exit(1);
}
// Open a connection to the attribute file.
if ((triggerFd = open(TEST_SYSFS_TRIGGER, O_RDWR)) < 0)
{
perror("Unable to open trigger");
exit(1);
}
ufds[0].fd = notifyFd;
ufds[0].events = POLLIN;
ufds[1].fd = triggerFd;
ufds[1].events = POLLIN;
// Someone suggested dummy reads before the poll() call
cnt = read( notifyFd, attrData, 100 );
cnt = read( triggerFd, attrData, 100 );
ufds[0].revents = 0;
ufds[1].revents = 0;
if (( rv = poll( ufds, 2, 10000)) < 0 )
{
perror("poll error");
}
else if (rv == 0)
{
printf("Timeout occurred!\n");
}
else if (ufds[0].revents & POLLIN)
{
printf("triggered\n");
cnt = read( notifyFd, attrData, 1 );
printf( "Attribute file value: %02X (%c) [%d]\n", attrData[0], attrData[0], cnt );
}
printf( "revents[0]: %08X\n", ufds[0].revents );
printf( "revents[1]: %08X\n", ufds[1].revents );
close( triggerFd );
close( notifyFd );
}
Some upcoming sysfs enhancements.
Internally, the patch adds a wait queue head to every kobject on the
system; that queue is inserted into a poll table in response to a
poll() call. The sysfs code has no way of knowing, however, when the
value of any given sysfs attribute has changed, so the subsystem
implementing a pollable attribute must make explicit calls to:
void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
Thanks,
Lee
The blocking poll is from the user side. User code can simply tell the kernel which attributes it's interested in, then block in a poll() until one of them has changed.
The sysfs_notify() is a kernel-side call that releases the user-space poll(). After you adjust your kernel attribute value, just call sysfs_notify() to allow any user-space applications to respond to their outstanding poll().
Think of the poll() as "subscribing" to notices of a change in an attribute of interest, and sysfs_notify() as "publishing" the change to any subscribers.

How do I use ioctl() to manipulate my kernel module?

So I'm trying to write a kernel module that uses the linux/timer.h file. I got it to work inside just the module, and now I am trying to get it to work from a user program.
Here is my kernel module:
//Necessary Includes For Device Drivers.
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <linux/timer.h>
#include <linux/ioctl.h>
#define DEVICE_NAME "mytimer"
#define DEVICE_FILE_NAME "mytimer"
#define MAJOR_NUM 61
#define MINOR_NUM 0
MODULE_LICENSE("Dual BSD/GPL");
static struct timer_list my_timer;
struct file_operations FileOps =
{
//No File Operations for this timer.
};
//Function to perform when timer expires.
void TimerExpire(int data)
{
printk("Timer Data: %d\n", data);
}
//Function to set up timers.
void TimerSetup(void)
{
setup_timer(&my_timer, TimerExpire, 5678);
mod_timer(&my_timer, jiffies + msecs_to_jiffies(5000));
}
//Module Init and Exit Functions.
int init_module(void)
{
int initResult = register_chrdev(MAJOR_NUM, "mytimer", &FileOps);
if (initResult < 0)
{
printk("Cannot obtain major number %d\n", MAJOR_NUM);
return initResult;
}
printk("Loading MyTimer Kernel Module...\n");
return 0;
}
void cleanup_module(void)
{
unregister_chrdev(MAJOR_NUM, "mytimer");
printk("Unloading MyTimer Kernel Module...\n");
}
More specifically, I want my user program to call the TimerSetup() function. I know that I'll need to use ioctl() but I'm not sure how to specify in my MODULE FILE that TimerSetup() should be callable via ioctl().
Also, my second question: I was able to insmod my module and also mknod into /dev/mytimer with the correct major number. But when I tried to open() it so that I can get the file descriptor from it, it kept returning -1, which I'm assuming is wrong. I made sure the permissions were fine (in fact, I made it 777 just to be sure)... It still doesn't work... Is there something I'm missing?
Here is the user program just in case:
#include <stdio.h>
int main(int argc, char* argv[])
{
int fd = open("/dev/mytimer", "r");
printf("fd: %d\n", fd);
return 0;
}
The example code you need can be found in drivers/watchdog/softdog.c (from Linux 2.6.33 at the time this was written), which illustrates proper file operations as well as how to permit userland to fill a structure with ioctl().
It's actually a great, working tutorial for anyone who needs to write trivial character device drivers.
I dissected softdog's ioctl interface when answering my own question, which may be helpful to you.
Here's the gist of it (though far from exhaustive) ...
In softdog_ioctl() you see a simple initialization of struct watchdog_info that advertises functionality, version and device information:
static const struct watchdog_info ident = {
.options = WDIOF_SETTIMEOUT |
WDIOF_KEEPALIVEPING |
WDIOF_MAGICCLOSE,
.firmware_version = 0,
.identity = "Software Watchdog",
};
We then look at a simple case where the user just wants to obtain these capabilities:
switch (cmd) {
case WDIOC_GETSUPPORT:
return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
... which of course, will fill the corresponding userspace watchdog_info with the initialized values above. If copy_to_user() fails, -EFAULT is returned which causes the corresponding userspace ioctl() call to return -1 with a meaningful errno being set.
Note, the magic requests are actually defined in linux/watchdog.h , so that the kernel and userspace share them:
#define WDIOC_GETSUPPORT _IOR(WATCHDOG_IOCTL_BASE, 0, struct watchdog_info)
#define WDIOC_GETSTATUS _IOR(WATCHDOG_IOCTL_BASE, 1, int)
#define WDIOC_GETBOOTSTATUS _IOR(WATCHDOG_IOCTL_BASE, 2, int)
#define WDIOC_GETTEMP _IOR(WATCHDOG_IOCTL_BASE, 3, int)
#define WDIOC_SETOPTIONS _IOR(WATCHDOG_IOCTL_BASE, 4, int)
#define WDIOC_KEEPALIVE _IOR(WATCHDOG_IOCTL_BASE, 5, int)
#define WDIOC_SETTIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 6, int)
#define WDIOC_GETTIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 7, int)
#define WDIOC_SETPRETIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 8, int)
#define WDIOC_GETPRETIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 9, int)
#define WDIOC_GETTIMELEFT _IOR(WATCHDOG_IOCTL_BASE, 10, int)
WDIOC obviously signifying "Watchdog ioctl"
You can easily take that a step further, having your driver do something and place the result of that something in the structure and copy it to userspace. For instance, if struct watchdog_info also had a member __u32 result_code. Note, __u32 is just the kernel's version of uint32_t.
With ioctl(), the user passes the address of an object, be it a structure, integer, whatever to the kernel expecting the kernel to write its reply in an identical object and copy the results to the address that was provided.
The second thing you are going to need to do is make sure your device knows what to do when someone opens, reads from it, writes to it, or uses a hook like ioctl(), which you can easily see by studying softdog.
Of interest is:
static const struct file_operations softdog_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.write = softdog_write,
.unlocked_ioctl = softdog_ioctl,
.open = softdog_open,
.release = softdog_release,
};
Where you see the unlocked_ioctl handler going to ... you guessed it, softdog_ioctl().
I think you might be juxtaposing a layer of complexity that really doesn't exist when dealing with ioctl(), it really is that simple. For that same reason, most kernel developers frown on new ioctl interfaces being added unless they are absolutely necessary. Its just too easy to lose track of the type that ioctl() is going to fill vs the magic you use to do it, which is the primary reason that copy_to_user() fails often resulting in the kernel rotting with hordes of userspace processes stuck in disk sleep.
For a timer, I agree, ioctl() is the shortest path to sanity.
You are missing a .open function pointer in your file_operations structure to specify the function to be called when a process attempts to open the device file. You will need to specify a .ioctl function pointer for your ioctl function as well.
Try reading through The Linux Kernel Module Programming Guide, specifically chapters 4 (Character Device Files) and 7 (Talking to Device Files).
Chapter 4 introduces the file_operations structure, which holds pointers to functions defined by the module/driver that perform various operations such as open or ioctl.
Chapter 7 provides information on communicating with a module/drive via ioctls.
Linux Device Drivers, Third Edition is another good resource.
Minimal runnable example
Tested in a fully reproducible QEMU + Buildroot environment, so might help others get their ioctl working. GitHub upstream:
kernel module |
shared header |
userland.
The most annoying part was understanding that some low ids are hijacked: ioctl is not called if cmd = 2 , you have to use _IOx macros.
Kernel module:
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/printk.h> /* printk */
#include "ioctl.h"
MODULE_LICENSE("GPL");
static struct dentry *dir;
static long unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long argp)
{
void __user *arg_user;
union {
int i;
lkmc_ioctl_struct s;
} arg_kernel;
arg_user = (void __user *)argp;
pr_info("cmd = %x\n", cmd);
switch (cmd) {
case LKMC_IOCTL_INC:
if (copy_from_user(&arg_kernel.i, arg_user, sizeof(arg_kernel.i))) {
return -EFAULT;
}
pr_info("0 arg = %d\n", arg_kernel.i);
arg_kernel.i += 1;
if (copy_to_user(arg_user, &arg_kernel.i, sizeof(arg_kernel.i))) {
return -EFAULT;
}
break;
case LKMC_IOCTL_INC_DEC:
if (copy_from_user(&arg_kernel.s, arg_user, sizeof(arg_kernel.s))) {
return -EFAULT;
}
pr_info("1 arg = %d %d\n", arg_kernel.s.i, arg_kernel.s.j);
arg_kernel.s.i += 1;
arg_kernel.s.j -= 1;
if (copy_to_user(arg_user, &arg_kernel.s, sizeof(arg_kernel.s))) {
return -EFAULT;
}
break;
default:
return -EINVAL;
break;
}
return 0;
}
static const struct file_operations fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = unlocked_ioctl
};
static int myinit(void)
{
dir = debugfs_create_dir("lkmc_ioctl", 0);
/* ioctl permissions are not automatically restricted by rwx as for read / write,
* but we could of course implement that ourselves:
* https://stackoverflow.com/questions/29891803/user-permission-check-on-ioctl-command */
debugfs_create_file("f", 0, dir, NULL, &fops);
return 0;
}
static void myexit(void)
{
debugfs_remove_recursive(dir);
}
module_init(myinit)
module_exit(myexit)
Shared header between the kernel module and userland:
ioctl.h
#ifndef IOCTL_H
#define IOCTL_H
#include <linux/ioctl.h>
typedef struct {
int i;
int j;
} lkmc_ioctl_struct;
#define LKMC_IOCTL_MAGIC 0x33
#define LKMC_IOCTL_INC _IOWR(LKMC_IOCTL_MAGIC, 0, int)
#define LKMC_IOCTL_INC_DEC _IOWR(LKMC_IOCTL_MAGIC, 1, lkmc_ioctl_struct)
#endif
Userland:
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "../ioctl.h"
int main(int argc, char **argv)
{
int fd, arg_int, ret;
lkmc_ioctl_struct arg_struct;
if (argc < 2) {
puts("Usage: ./prog <ioctl-file>");
return EXIT_FAILURE;
}
fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return EXIT_FAILURE;
}
/* 0 */
{
arg_int = 1;
ret = ioctl(fd, LKMC_IOCTL_INC, &arg_int);
if (ret == -1) {
perror("ioctl");
return EXIT_FAILURE;
}
printf("arg = %d\n", arg_int);
printf("ret = %d\n", ret);
printf("errno = %d\n", errno);
}
puts("");
/* 1 */
{
arg_struct.i = 1;
arg_struct.j = 1;
ret = ioctl(fd, LKMC_IOCTL_INC_DEC, &arg_struct);
if (ret == -1) {
perror("ioctl");
return EXIT_FAILURE;
}
printf("arg = %d %d\n", arg_struct.i, arg_struct.j);
printf("ret = %d\n", ret);
printf("errno = %d\n", errno);
}
close(fd);
return EXIT_SUCCESS;
}

Resources