How to use a seq_file in Linux kernel modules? - linux

Hello all I'm new to Linux and wondering how to use a Linux sequence file in a module to traverse kernel objects.
What I know is I can use the command:
cat /proc/kallsyms
to view the available symbols and from what I've read on google, the symbols in the list that have a 'D' or 'd' are pointers to data structures.
Though I know the basics of how to create a module, the examples on the internet on how to use seq operations are not uniform and I'm getting a little confused.
If someone knows of any good doco that will help me understand how to create a seq file to traverse kernel objects and could post a link (or a quick example), I would be greatly appreciative.

Minimal runnable example
The kernel docs contain an example under Documentation/filesystems/seq_file.txt, but here is a runnable version of that with loop termination.
This example is behaves just like a file that contains:
0
1
2
However, we only store a single integer in memory
and calculate the file on the fly in an iterator fashion.
The file works for both read and lseek system calls, but there is no write system call equivalent:
How to implement a writable proc file by using seq_file in a driver module
Play around with the file with cat and dd skip= for the seeks.
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/errno.h> /* EFAULT */
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/printk.h> /* pr_info */
#include <linux/seq_file.h> /* seq_read, seq_lseek, single_release */
#include <linux/slab.h>
#include <uapi/linux/stat.h> /* S_IRUSR */
MODULE_LICENSE("GPL");
static int max = 2;
module_param(max, int, S_IRUSR | S_IWUSR);
static struct dentry *debugfs_file;
/* Called at the beginning of every read.
*
* The return value is passsed to the first show.
* It normally represents the current position of the iterator.
* It could be any struct, but we use just a single integer here.
*
* NULL return means stop should be called next, and so the read will be empty..
* This happens for example for an ftell that goes beyond the file size.
*/
static void *start(struct seq_file *s, loff_t *pos)
{
loff_t *spos;
pr_info("start pos = %llx\n", (unsigned long long)*pos);
spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
if (!spos || *pos >= max)
return NULL;
*spos = *pos;
return spos;
}
/* The return value is passed to next show.
* If NULL, stop is called next instead of show, and read ends.
*
* Can get called multiple times, until enough data is returned for the read.
*/
static void *next(struct seq_file *s, void *v, loff_t *pos)
{
loff_t *spos;
spos = v;
pr_info("next pos = %llx\n", (unsigned long long)*pos);
if (*pos >= max)
return NULL;
*pos = ++*spos;
return spos;
}
/* Called at the end of every read. */
static void stop(struct seq_file *s, void *v)
{
pr_info("stop\n");
kfree(v);
}
/* Return 0 means success, SEQ_SKIP ignores previous prints, negative for error. */
static int show(struct seq_file *s, void *v)
{
loff_t *spos;
spos = v;
pr_info("show pos = %llx\n", (unsigned long long)*spos);
seq_printf(s, "%llx\n", (long long unsigned)*spos);
return 0;
}
static struct seq_operations my_seq_ops = {
.next = next,
.show = show,
.start = start,
.stop = stop,
};
static int open(struct inode *inode, struct file *file)
{
pr_info("open\n");
return seq_open(file, &my_seq_ops);
}
static struct file_operations fops = {
.owner = THIS_MODULE,
.llseek = seq_lseek,
.open = open,
.read = seq_read,
.release = seq_release
};
static int myinit(void)
{
debugfs_file = debugfs_create_file(
"lkmc_seq_file", S_IRUSR, NULL, NULL, &fops);
if (debugfs_file) {
return 0;
} else {
return -EINVAL;
}
}
static void myexit(void)
{
debugfs_remove(debugfs_file);
}
module_init(myinit)
module_exit(myexit)
GitHub upstream.
Note how the seq_file API makes it much easier to write the read file operation.
single_open
If you have the entire read output upfront, single_open is an even more convenient version of seq_file.
This example behaves like a file that contains:
ab
cd
Code:
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/errno.h> /* EFAULT */
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/printk.h> /* pr_info */
#include <linux/seq_file.h> /* seq_read, seq_lseek, single_release */
#include <uapi/linux/stat.h> /* S_IRUSR */
MODULE_LICENSE("GPL");
static struct dentry *debugfs_file;
static int show(struct seq_file *m, void *v)
{
seq_printf(m, "ab\ncd\n");
return 0;
}
static int open(struct inode *inode, struct file *file)
{
return single_open(file, show, NULL);
}
static const struct file_operations fops = {
.llseek = seq_lseek,
.open = open,
.owner = THIS_MODULE,
.read = seq_read,
.release = single_release,
};
static int myinit(void)
{
debugfs_file = debugfs_create_file(
"lkmc_seq_file_single", S_IRUSR, NULL, NULL, &fops);
if (debugfs_file) {
return 0;
} else {
return -EINVAL;
}
}
static void myexit(void)
{
debugfs_remove(debugfs_file);
}
module_init(myinit)
module_exit(myexit)
GitHub upstream.
Tested on Linux 4.9.6.
It appears that starting from Linux 5, there was a backwards incompatible change that requires you to implement seq_file a bit differently, I think this talks about it: seq_file not working properly after next returns NULL and it appears that if you don't update this you get a warning:
seq_file: buggy .next function next [module-name] did not update position index

Related

How could I use `kallsyms_lookup_name` function to fix `unknown character` error when loading Linux kernel module?

I'm trying to complete a hooking sample attachment in a program for my uni assignment. The task requires to get a system call sys_rt_sigaction hooked when initiating a loadable module in Linux kernel (I use Ubuntu 18.04 LTS, kernel version is 5.0.0-23-generic). So, the case I'm struggling originates from an error could not insert module <module name>: Unknown symbol in module once I started sudo insmod <my module name>.ko.
After some googling, I see clear this problem arises due to missing sys_call_table export to run inserting as smoothly as well. Following this post, I want to cope that invoking kallsyms_lookup_name call before kicking off init procedure.
There is .c-file which provides with definitions of operations accessible by module (file name is buffer.c):
#define __KERNEL__
#define MODULE
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/syscalls.h>
#include <linux/kallsyms.h>
#include <linux/unistd.h>
void * sys_call_table = (void *) kallsyms_lookup_name("sys_call_table");// some wrongness here, but what exactly?
MODULE_LICENSE("GPL");
int (*real_rt_sigaction)(const char * path); // true syscall prototype
static int __init buffer_init_module(void);
static void __exit buffer_exit_module(void);
static int device_open(struct inode *, struct file *); // driver file opening
static int device_release(struct inode *, struct file *); // return of system resource control
static ssize_t device_read(struct file *, char *, size_t, loff_t *); // reading from driver file
static ssize_t device_write(struct file *, const char *, size_t, loff_t *); // writing into driver file
#define DEVICE_NAME "buffer"
#define BUF_LEN 80
// to be called instead
int alter_rt_sigaction(int signum, const struct sigaction *act,
struct sigaction *oldact, size_t sigsetsize) {
printk(KERN_INFO "Syscall function hooked - you've lost control of your experience");
return 0;
}
static int Major;
static int Device_Open = 0;
static int total_open = 1;
static char Buf[BUF_LEN + 1] = "Buffer is empty, add some input\n";
static char *Msg_ptr;
static int Buf_Char = 50;
static int Bytes_Read = 0;
static struct file_operations fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release
};
static int __init buffer_init_module(void)
{
printk(KERN_INFO
"Device initializing in progress...");
Major = register_chrdev(0, DEVICE_NAME, &fops);
if(Major < 0) {
printk("Major number hasn't been assigned - Driver registration failed\n");
return Major;
}
printk(KERN_INFO "Registration success - device major number: %d\n", Major);
real_rt_sigaction=sys_call_table[__NR_rt_sigaction];
sys_call_table[__NR_rt_sigaction]=alter_rt_sigaction; // hooking implementation
return 0;
}
static void __exit buffer_exit_module(void)
{
unregister_chrdev(Major, DEVICE_NAME);
printk(KERN_INFO "Outside the module - exit successfully completed\n");
sys_call_table[__NR_rt_sigaction]=real_rt_sigaction; // original call reset
}
static int device_open(struct inode *inode, struct file *file)
{
if(Device_Open)
return -EBUSY;
Device_Open++;
printk(KERN_INFO "Device file has been accessed %d time(s)\n", total_open++);
Msg_ptr = Buf;
try_module_get(THIS_MODULE);
Bytes_Read = 0;
return 0;
}
static int device_release(struct inode * node, struct file * filep)
{
Device_Open--;
module_put(THIS_MODULE);
printk(KERN_INFO "Device file gets close\n");
return 0;
}
static ssize_t device_read(struct file * filep, char * buffer, size_t len, loff_t * offset)
{
int got_read = Bytes_Read;
if(Bytes_Read >= Buf_Char)
return 0;
while(len && (Bytes_Read < Buf_Char)) {
put_user(Msg_ptr[Bytes_Read], buffer+Bytes_Read);
len--;
Bytes_Read++;
}
return Bytes_Read-got_read;
}
static ssize_t device_write(struct file * filep, const char * buffer, size_t len, loff_t * offset)
{
Buf_Char = 0;
if(Buf_Char >= BUF_LEN) {
return 0;
}
while(len && (Buf_Char < BUF_LEN))
{
get_user(Msg_ptr[Buf_Char], buffer+Buf_Char);
len--;
Buf_Char++;
}
return Buf_Char;
}
module_init(buffer_init_module);
module_exit(buffer_exit_module);
Additively, there is code in Makefile:
obj-m += buffer.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
The painful moment here is an error message initializer element is not constant whenever I was trying to build module via sudo make in my project folder. As I follow the beginner's tutorials and need for some basic insight, it might be highly appreciable to see any help with solution or even some ideas how to handle the same problem more effectively, indeed.

Pseudo Block Driver: I have generated the device file for block driver using mknod

I have generated the device file for block driver using mknod. Now how to read/write/transfer any request or file through my generated device file?
Go through this sample block driver
sbd.c
/*
* A sample, extra-simple block driver. Updated for kernel 2.6.31.
*
* (C) 2003 Eklektix, Inc.
* (C) 2010 Pat Patterson <pat at superpat dot com>
* Redistributable under the terms of the GNU GPL.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/kernel.h> /* printk() */
#include <linux/fs.h> /* everything... */
#include <linux/errno.h> /* error codes */
#include <linux/types.h> /* size_t */
#include <linux/vmalloc.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
MODULE_LICENSE("Dual BSD/GPL");
static char *Version = "1.4";
static int major_num = 0;
module_param(major_num, int, 0);
static int logical_block_size = 512;
module_param(logical_block_size, int, 0);
static int nsectors = 1024; /* How big the drive is */
module_param(nsectors, int, 0);
/*
* We can tweak our hardware sector size, but the kernel talks to us
* in terms of small sectors, always.
*/
#define KERNEL_SECTOR_SIZE 512
/*
* Our request queue.
*/
static struct request_queue *Queue;
/*
* The internal representation of our device.
*/
static struct sbd_device {
unsigned long size;
spinlock_t lock;
u8 *data;
struct gendisk *gd;
} Device;
/*
* Handle an I/O request.
*/
static void sbd_transfer(struct sbd_device *dev, sector_t sector,
unsigned long nsect, char *buffer, int write) {
unsigned long offset = sector * logical_block_size;
unsigned long nbytes = nsect * logical_block_size;
if ((offset + nbytes) > dev->size) {
printk (KERN_NOTICE "sbd: Beyond-end write (%ld %ld)\n", offset, nbytes);
return;
}
if (write)
memcpy(dev->data + offset, buffer, nbytes);
else
memcpy(buffer, dev->data + offset, nbytes);
}
static void sbd_request(struct request_queue *q) {
struct request *req;
req = blk_fetch_request(q);
while (req != NULL) {
// blk_fs_request() was removed in 2.6.36 - many thanks to
// Christian Paro for the heads up and fix...
//if (!blk_fs_request(req)) {
if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
printk (KERN_NOTICE "Skip non-CMD request\n");
__blk_end_request_all(req, -EIO);
continue;
}
sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
req->buffer, rq_data_dir(req));
if ( ! __blk_end_request_cur(req, 0) ) {
req = blk_fetch_request(q);
}
}
}
/*
* The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which
* calls this. We need to implement getgeo, since we can't
* use tools such as fdisk to partition the drive otherwise.
*/
int sbd_getgeo(struct block_device * block_device, struct hd_geometry * geo) {
long size;
/* We have no real geometry, of course, so make something up. */
size = Device.size * (logical_block_size / KERNEL_SECTOR_SIZE);
geo->cylinders = (size & ~0x3f) >> 6;
geo->heads = 4;
geo->sectors = 16;
geo->start = 0;
return 0;
}
/*
* The device operations structure.
*/
static struct block_device_operations sbd_ops = {
.owner = THIS_MODULE,
.getgeo = sbd_getgeo
};
static int __init sbd_init(void) {
/*
* Set up our internal device.
*/
Device.size = nsectors * logical_block_size;
spin_lock_init(&Device.lock);
Device.data = vmalloc(Device.size);
if (Device.data == NULL)
return -ENOMEM;
/*
* Get a request queue.
*/
Queue = blk_init_queue(sbd_request, &Device.lock);
if (Queue == NULL)
goto out;
blk_queue_logical_block_size(Queue, logical_block_size);
/*
* Get registered.
*/
major_num = register_blkdev(major_num, "sbd");
if (major_num < 0) {
printk(KERN_WARNING "sbd: unable to get major number\n");
goto out;
}
/*
* And the gendisk structure.
*/
Device.gd = alloc_disk(16);
if (!Device.gd)
goto out_unregister;
Device.gd->major = major_num;
Device.gd->first_minor = 0;
Device.gd->fops = &sbd_ops;
Device.gd->private_data = &Device;
strcpy(Device.gd->disk_name, "sbd0");
set_capacity(Device.gd, nsectors);
Device.gd->queue = Queue;
add_disk(Device.gd);
return 0;
out_unregister:
unregister_blkdev(major_num, "sbd");
out:
vfree(Device.data);
return -ENOMEM;
}
static void __exit sbd_exit(void)
{
del_gendisk(Device.gd);
put_disk(Device.gd);
unregister_blkdev(major_num, "sbd");
blk_cleanup_queue(Queue);
vfree(Device.data);
}
module_init(sbd_init);
module_exit(sbd_exit);
For Pseudo block device Also refer this
Reference http://free-electrons.com/doc/block_drivers.pdf

Miscellaneous Device Driver: Unable to open the device with open() system call

I am trying to implement a system call interception for sys_open() call via kernel module and for that I have defined a miscellaneous device driver MyDevice which can be inserted as kernel module. Below is the code for my kernel module:
#include <linux/version.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <asm/unistd.h>
MODULE_LICENSE("GPL");
// IOCTL commands
#define IOCTL_PATCH_TABLE 0x00000001
#define IOCTL_FIX_TABLE 0x00000004
//Global variables
int in_use = 0; //set to 1 in open handler and reset to zero in release handler
int is_set = 0; // flag to detect system call interception
unsigned long *sys_call_table = (unsigned long*)0xffffffff81801400; //hard coded address of sys_call_table from /boot/System.map
//function pointer to original sys_open
asmlinkage int (*real_open)(const char* __user, int, int);
//Replacement of original call with modified system call
asmlinkage int custom_open(const char* __user file_name, int flags, int mode)
{
printk("interceptor: open(\"%s\", %X, %X)\n", file_name,flags,mode);
return real_open(file_name,flags,mode);
}
/*
Make the memory page writable
This is little risky as directly arch level protection bit is changed
*/
int make_rw(unsigned long address)
{
unsigned int level;
pte_t *pte = lookup_address(address, &level);
if(pte->pte &~ _PAGE_RW)
pte->pte |= _PAGE_RW;
return 0;
}
/* Make the page write protected */
int make_ro(unsigned long address)
{
unsigned int level;
pte_t *pte = lookup_address(address, &level);
pte->pte = pte->pte &~ _PAGE_RW;
return 0;
}
/* This function will be invoked each time a user process attempts
to open my device. You should keep in mind that the prototype
of this function may change along different kernel versions. */
static int my_open(struct inode *inode, struct file *file)
{
/*Do not allow multiple processes to open this device*/
if(in_use)
return -EBUSY;
in_use++;
printk("MyDevice opened\n");
return 0;
}
/* This function, in turn, will be called when a process closes our device */
static int my_release(struct inode *inode, struct file *file)
{
in_use--;
printk("MyDevice closed\n");
return 0;
}
/*This static function handles ioctl calls performed on MyDevice*/
static int my_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
int retval = 0;
switch(cmd)
{
case IOCTL_PATCH_TABLE:
make_rw((unsigned long)sys_call_table);
real_open = (void*)*(sys_call_table + __NR_open);
*(sys_call_table + __NR_open) = (unsigned long)custom_open;
make_ro((unsigned long)sys_call_table);
is_set=1;
break;
case IOCTL_FIX_TABLE:
make_rw((unsigned long)sys_call_table);
*(sys_call_table + __NR_open) = (unsigned long)real_open;
make_ro((unsigned long)sys_call_table);
is_set=0;
break;
default:
printk("sys_open not executed\n");
break;
}
return retval;
}
//populate data struct for file operations
static const struct file_operations my_fops = {
.owner = THIS_MODULE,
.open = &my_open,
.release = &my_release,
.unlocked_ioctl = (void*)&my_ioctl,
.compat_ioctl = (void*)&my_ioctl
};
//populate miscdevice data structure
static struct miscdevice my_device = {
MISC_DYNAMIC_MINOR,
"MyDevice",
&my_fops
};
static int __init init_my_module(void)
{
int retval;
printk(KERN_INFO "Inside kernel space\n");
retval = misc_register(&my_device);
return retval;
}
static void __exit cleanup_my_module(void)
{
if (is_set)
{
make_rw((unsigned long)sys_call_table);
*(sys_call_table + __NR_open) = (unsigned long)real_open;
make_ro((unsigned long)sys_call_table);
}
misc_deregister(&my_device);
printk(KERN_INFO "Exiting kernel space\n");
return;
}
module_init(init_my_module);
module_exit(cleanup_my_module);
The code for my test file is as follows:
#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
/* Define ioctl commands */
#define IOCTL_PATCH_TABLE 0x00000001
#define IOCTL_FIX_TABLE 0x00000004
int main(void)
{
int device = open("/dev/MyDevice", O_RDWR);
printf("%d\n",device);
ioctl(device, IOCTL_PATCH_TABLE);
sleep(2);
ioctl(device, IOCTL_FIX_TABLE);
close(device);
return 0;
}
The problem is that in my test file open("/dev/MyDevice", O_RDWR); is always returning -1, why is it so ? Where am I going wrong ? I checked with ls -l /dev/MyDevice, MyDevice has been successfully registered with following details: crw------- 1 root root 10, 56 Dec 9 19:33 /dev/MyDevice
Sorry, seriously stupid mistake, that's what happens when rookies do things. I just needed to grant the read and write permissions for my miscellaneous char device driver.
sudo chmod a+r+w /dev/MyDevice

implement a write function to a debugfs file

I try to implement a write function to a debugfs file. I hope I can use echo "hello" > /sys/kernel/debugfs/mydir/myfile to write a string to the file. And use echo "world" >> /sys/kernel/debugfs/mydir/myfile to append world after hello. I found two problem in my implementation. One is the echo command would stuck if the length of input string is over the buffer size. The other is the echo "world" >> /sys/kernel/debugfs/mydir/myfile never append the string. Instead, it new a string. Below is my implementation.
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/miscdevice.h>
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/sched.h>
MODULE_LICENSE("GPL");
#define BUF_SIZE 10
static char foo_buf[BUF_SIZE];
static struct dentry *debug_dir;
static struct dentry *debug_foo;
static ssize_t foo_read(struct file *file, char __user *buf, size_t count,
loff_t *f_pos)
{
return simple_read_from_buffer(buf, count, f_pos, foo_buf, sizeof(foo_buf));
}
static ssize_t foo_write(struct file *file, const char __user *buf, size_t count,
loff_t *f_pos)
{
size_t ret;
if (*f_pos > BUF_SIZE)
return -EINVAL;
ret = simple_write_to_buffer(foo_buf, sizeof(foo_buf), f_pos, buf, count);
if (ret < 0)
return ret;
foo_buf[ret] = '\0';
return ret;
}
static const struct file_operations foo_fops = {
.owner = THIS_MODULE,
.read = foo_read,
.write = foo_write,
};
static int __init debugfs_start(void)
{
pr_err("init debugfs");
debug_dir = debugfs_create_dir("mydir", NULL);
if (debug_dir == NULL) {
pr_err("debugfs create my dir failed");
return -ENOMEM;
}
debug_foo = debugfs_create_file("foo", 0744, debug_dir,
NULL, &foo_fops);
if (!debug_foo) {
debugfs_remove(debug_dir);
return -ENOMEM;
}
return 0;
}
static void __exit debugfs_end(void)
{
pr_err("exit debugfs");
debugfs_remove_recursive(debug_dir);
}
module_init(debugfs_start);
module_exit(debugfs_end);
One is the echo command would stuck if the length of input string is
over the buffer size.
This is because it keeps retrying to write to the file while each attempt would fail.
The other is the echo "world" >>
/sys/kernel/debugfs/mydir/myfile never append the string. Instead, it
new a string.
this is expected with your implementation. you would need to cat the new one to the existing string if you want to have it appended. That is, you need to keep a record of the string length. But this is
different than the f_pos which is specific for a open file of a process.
How do I identify what commands(echo > or echo >>) users will use?
so you mean whether or not the user 'truncates' the file after opening it?
debugfs doesn't seem to support seek but i suppose you can provide your .open function and also the .llseek function to implement that. You need to see to the end of the file when opening the file if it is for APPEND.
Sorry I could not provide the complete code but just some pointers.

Using the Linux sysfs_notify call

I am trying to communicate asynchronously between a kernel driver and a user-space program (I know there are lots of questions here that ask for similar information, but I could find none that deal with sysfs_notify).
I am leaving Vilhelm's edit here, but adding the source to both a simple driver utilizing sysfs and a user-space program to poll it. The driver works fine (I got most of it from the net; it is missing the credits, but I couldn't find them when I went back to add them).
Unfortunately, the polling program does not work. It always returns success immediately. Interestingly, if I don't perform the two reads prior to the poll, the revents members are set to POLLERR | POLLIN instead of just POLLIN as seen in the program output.
Program output:
root#ubuntu:/home/wmulcahy/demo# ./readhello
triggered
Attribute file value: 74 (t) [0]
revents[0]: 00000001
revents[1]: 00000001
Here is the driver: hello.c (you can see where I started out...)
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
struct my_attr {
struct attribute attr;
int value;
};
static struct my_attr notify = {
.attr.name="notify",
.attr.mode = 0644,
.value = 0,
};
static struct my_attr trigger = {
.attr.name="trigger",
.attr.mode = 0644,
.value = 0,
};
static struct attribute * myattr[] = {
&notify.attr,
&trigger.attr,
NULL
};
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct my_attr *a = container_of(attr, struct my_attr, attr);
printk( "hello: show called (%s)\n", a->attr.name );
return scnprintf(buf, PAGE_SIZE, "%s: %d\n", a->attr.name, a->value);
}
static struct kobject *mykobj;
static ssize_t store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len)
{
struct my_attr *a = container_of(attr, struct my_attr, attr);
sscanf(buf, "%d", &a->value);
notify.value = a->value;
printk("sysfs_notify store %s = %d\n", a->attr.name, a->value);
sysfs_notify(mykobj, NULL, "notify");
return sizeof(int);
}
static struct sysfs_ops myops = {
.show = show,
.store = store,
};
static struct kobj_type mytype = {
.sysfs_ops = &myops,
.default_attrs = myattr,
};
static struct kobject *mykobj;
static int __init hello_module_init(void)
{
int err = -1;
printk("Hello: init\n");
mykobj = kzalloc(sizeof(*mykobj), GFP_KERNEL);
if (mykobj) {
kobject_init(mykobj, &mytype);
if (kobject_add(mykobj, NULL, "%s", "hello")) {
err = -1;
printk("Hello: kobject_add() failed\n");
kobject_put(mykobj);
mykobj = NULL;
}
err = 0;
}
return err;
}
static void __exit hello_module_exit(void)
{
if (mykobj) {
kobject_put(mykobj);
kfree(mykobj);
}
printk("Hello: exit\n");
}
module_init(hello_module_init);
module_exit(hello_module_exit);
MODULE_LICENSE("GPL");
And here is the poll program: readhello.c
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <poll.h>
#define TEST_SYSFS_TRIGGER "/sys/hello/trigger"
#define TEST_SYSFS_NOTIFY "/sys/hello/notify"
int main(int argc, char **argv)
{
int cnt, notifyFd, triggerFd, rv;
char attrData[100];
struct pollfd ufds[2];
// Open a connection to the attribute file.
if ((notifyFd = open(TEST_SYSFS_NOTIFY, O_RDWR)) < 0)
{
perror("Unable to open notify");
exit(1);
}
// Open a connection to the attribute file.
if ((triggerFd = open(TEST_SYSFS_TRIGGER, O_RDWR)) < 0)
{
perror("Unable to open trigger");
exit(1);
}
ufds[0].fd = notifyFd;
ufds[0].events = POLLIN;
ufds[1].fd = triggerFd;
ufds[1].events = POLLIN;
// Someone suggested dummy reads before the poll() call
cnt = read( notifyFd, attrData, 100 );
cnt = read( triggerFd, attrData, 100 );
ufds[0].revents = 0;
ufds[1].revents = 0;
if (( rv = poll( ufds, 2, 10000)) < 0 )
{
perror("poll error");
}
else if (rv == 0)
{
printf("Timeout occurred!\n");
}
else if (ufds[0].revents & POLLIN)
{
printf("triggered\n");
cnt = read( notifyFd, attrData, 1 );
printf( "Attribute file value: %02X (%c) [%d]\n", attrData[0], attrData[0], cnt );
}
printf( "revents[0]: %08X\n", ufds[0].revents );
printf( "revents[1]: %08X\n", ufds[1].revents );
close( triggerFd );
close( notifyFd );
}
Some upcoming sysfs enhancements.
Internally, the patch adds a wait queue head to every kobject on the
system; that queue is inserted into a poll table in response to a
poll() call. The sysfs code has no way of knowing, however, when the
value of any given sysfs attribute has changed, so the subsystem
implementing a pollable attribute must make explicit calls to:
void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
Thanks,
Lee
The blocking poll is from the user side. User code can simply tell the kernel which attributes it's interested in, then block in a poll() until one of them has changed.
The sysfs_notify() is a kernel-side call that releases the user-space poll(). After you adjust your kernel attribute value, just call sysfs_notify() to allow any user-space applications to respond to their outstanding poll().
Think of the poll() as "subscribing" to notices of a change in an attribute of interest, and sysfs_notify() as "publishing" the change to any subscribers.

Resources