What's the max file mapping size in 64bits machine - linux

I'm new to 64-bits architecture. Could you tell me what's MAX file size supported by file mapping in 64 bits linux machine. I want to open more than 20GB files by file mapping, is it available?
I write a sample code. But it causes Bus Error when I get the value of the pointer in GBSIZE offset:
unsigned char* pCur = pBegin + GBSIZE;
//pBegin is the pointer returned by mmap
printf("%c",*pCur);
BTW, printf("%c",*pBegin ); works fine. and my address sizes : 38 bits physical, 48 bits virtual
Here is the full code:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
//#define FILEPATH "smallfile"
#define FILEPATH "bigfile"
#define GBSIZE (1024L*1024L*1024L)
#define TBSIZE (1024L*GBSIZE)
#define NUMSIZE (20L * GBSIZE)
//#define NUMSIZE (10)
#define FILESIZE (NUMINTS * sizeof(int))
int main(int argc, char *argv[])
{
int i;
int fd;
unsigned char *pBegin;
fd = open(FILEPATH, O_RDONLY);
if (fd == -1) {
perror("Error opening file for reading");
exit(EXIT_FAILURE);
}
pBegin = mmap(0, NUMSIZE, PROT_READ, MAP_SHARED, fd, 0);
if (pBegin == MAP_FAILED) {
close(fd);
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
/** ERROR happens here!!! **/
unsigned char* pCur = pBegin + GBSIZE;
printf("%c",*pCur);
if (munmap(pBegin, NUMSIZE) == -1) {
perror("Error un-mmapping the file");
}
close(fd);
return 0;
}

Although pointers are 64-bit wide, most processors do not actually support virtual addresses using the full 64 bits. To see what size virtual addresses your processor supports, look in /proc/cpuinfo (48 bits is typical).
grep "address sizes" /proc/cpuinfo
Additionally, half of the virtual address space is used by the kernel and not available to userspace - leaving 47 bits in the current Linux implementation.
However, even taking this into account, you will still have plenty of room for a 20GB file. 47 bits in theory means a virtual address space of 128TB.

From the mmap(2) man page:
void *mmap(void *addr, size_t length, int prot, int flags,
int fd, off_t offset);
length is a size_t, which on 64-bit machines is 64 bits in length. Therefore yes, you can theoretically map a 20GB file.

64-bit addresses allow for many orders of magnitude more than 20 GB.

(This answer was originally edited into the question by OP)
You have requested a 20GB map onto a file which was only 50MB in size.
As described by the mmap man page, mmap succeeds when you request the length too big, however it will give SIGBUS or SIGSEGV when you actually try to read beyond the end of the underlying file.

Agree with MarkR, you are dereference an invalid address.
// A bug in these lines.
unsigned char* pCur = pBegin + GBSIZE;
printf("%c",*pCur);
unsigned char* pEnd = pBegin + NUMSIZE;
unsigned char* pLast = pEnd - 1;
unsigned char* pCur = pLast;
I modified your code to use HUGE TLB flags as the following.
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
#define KSIZE 1024L
#define MSIZE (1024L*1024L)
#define GSIZE (1024L*1024L*1024L)
#define TSIZE (1024L*GSIZE)
#define INIT_MEM 0
// Fail on my MacBook Pro (Retina, 13-inch, Early 2015)
// Darwin Kernel Version 16.5.0:x86_64
// #define NUMSIZE (16L * TSIZE)
// mmap ok; init: got killed; signal 9
// #define NUMSIZE (8L * TSIZE)
// Got killed signal 9
// #define NUMSIZE (1L * TSIZE)
// OK
// #define NUMSIZE (200L * GSIZE)
// OK
#define NUMSIZE (20L * GSIZE)
typedef unsigned long long ETYPE;
#define MEMSIZE (NUMSIZE*sizeof(ETYPE))
#define PGSIZE (16*KSIZE)
void init(ETYPE* ptr) {
*ptr = (ETYPE)ptr;
}
int verify(ETYPE* ptr) {
if (*ptr != (ETYPE)ptr) {
fprintf(stderr, "ERROR: 0x%016llx != %p.\n", *ptr, ptr);
return -1;
}
else {
fprintf(stdout, "OK: 0x%016llx = %p.\n", *ptr, ptr);
}
return 0;
}
int main(int argc, char *argv[])
{
int i;
int fd;
ETYPE *pBegin;
int flags = MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_1GB;
printf("mmap memory size:%lu GB\n", MEMSIZE/GSIZE);
pBegin = (ETYPE*) mmap(0, MEMSIZE, PROT_READ | PROT_WRITE, flags, -1, 0);
if (pBegin == MAP_FAILED) {
perror("Error mmapping the file");
exit(EXIT_FAILURE);
}
ETYPE* pEnd = pBegin + NUMSIZE;
ETYPE* pCur = pBegin;
#if INIT_MEM
while (pCur < pEnd) {
init(pCur);
// ++pCur; //slow if init all addresses.
pCur += (PGSIZE/sizeof(ETYPE));
}
#endif
init(&pBegin[0]);
init(&pBegin[NUMSIZE-1]);
verify(&pBegin[0]);
verify(&pBegin[NUMSIZE-1]);
if (munmap(pBegin, MEMSIZE) == -1) {
perror("Error un-mmapping the file");
}
return 0;
}

Related

Why mmap throws error while using MAP_FIXED?

Using Ubuntu 16.04, kernel 4.17.4
I am trying the following code where I have defined a shared memory and mapped current process's address space to the shared memory. When I use MAP_FIXED in mmap() it shows:
mmap: Invalid argument
The code is as follows:
#include <stdio.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#define STORAGE_ID "/SHM_TEST"
#define STORAGE_SIZE 4096
int main(int argc, char *argv[])
{
int res, temp;
int fd;
int len;
pid_t pid;
void *addr;
char data[STORAGE_SIZE];
fd = shm_open(STORAGE_ID, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
if (fd == -1)
{
perror("open");
return 10;
}
res = ftruncate(fd, STORAGE_SIZE);
if (res == -1)
{
perror("ftruncate");
return 20;
}
addr = mmap(NULL, STORAGE_SIZE, PROT_WRITE, MAP_FIXED, fd, 0);
if (addr == MAP_FAILED)
{
perror("mmap");
return 30;
}
res = munmap(addr, STORAGE_SIZE);
if (res == -1)
{
perror("munmap");
return 40;
}
// shm_open cleanup
fd = shm_unlink(STORAGE_ID);
if (fd == -1)
{
perror("unlink");
return 100;
}
return 0;
}
The reason I am using MAP_FIXED is I will manually put an address instead of NULL (tried this and got same error). What do I need to do to use MAP_FIXED with mmap()?

Which Linux kernel version support uffdio_writeprotect structure and how to compile and install that linux kernel?

I have try to compile a program which manage userfault-fd to collect some dirty pages on memory. In this program i have used the uffdio_writeprotect structure to survey a memory region where program try to access. I am using Ubuntu 18.04 with linux kernel version 5.14.0. But when i compile a program with the command gcc -o with_userfault -I. with_userfault.c
i still have the same errors such as: with_userfault.c:108:34: error: storage size of ‘wp’ isn’t known
struct uffdio_writeprotect wp;
with_userfault.c:114:21: error: ‘UFFDIO_WRITEPROTECT’ undeclared (first use in this function); did you mean ‘UFFDIO_REGISTER’?
if (ioctl(fd, UFFDIO_WRITEPROTECT, &wp) == -1)
I don't know how to solve this problem. Please I need your help to solve this issue because i don't know what i am suppose to do now. thanks!!!!!
this is a source code:
The header file:
#ifndef __RDTSC_H_DEFINED__
#define __RDTSC_H_DEFINED__
#if defined(__i386__)
static __inline__ unsigned long long rdtsc(void)
{
unsigned long long int x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
static __inline__ unsigned long long rdtsc(void)
{
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
#elif defined(__powerpc__)
static __inline__ unsigned long long rdtsc(void)
{
unsigned long long int result=0;
unsigned long int upper, lower,tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
result = upper;
result = result<<32;
result = result|lower;
return(result);
}
#else
#error "No tick counter is available!"
#endif
/* $RCSfile: $ $Author: kazutomo $
* $Revision: 1.6 $ $Date: 2005/04/13 18:49:58 $
*/
#endif
The source file
/*
* Example program about using userfaultfd(2) for garbage collection.
*
* This establishes a couple pages, all of which are filled from
* compressed files on disk when first accessed. For simplicity
* these are
* one file per page. Files are written at the beginning of the
* program.
*
* Later, this program demonstrates the use of write protection to
* get
* a notification on write access, analogous to using
* mprotect(!PROT_WRITE)
* and doing the bookkeeping in a SIGSEGV handler.
*
*/
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <time.h>
#include <math.h>
#include <unistd.h>
#include <strings.h>
#include <string.h>
#include <unistd.h>
#include <asm/unistd.h>
#include <poll.h>
#include <pthread.h>
#include "rdtsc.h"
//#define size 102400
#define FILE_BUF 25
#define PAGE_SIZE sysconf(_SC_PAGE_SIZE)
#define n_pages 102400
//102400
#define iterations 1
pthread_t tracker;
void *tracker_task();
int i;
unsigned long main_tsc_start, main_tsc_end;
// This is doing the work in the uffd handler thread
void *tracker_task(void *data)
{
int fd = *(int *)(data);
for (;;)
{
struct uffd_msg msg;
struct pollfd pollfd;
int pollres, readret;
unsigned long addr, page_begin, whichpage;
unsigned long handler_tsc_start, handler_tsc_end;
pollfd.fd = fd;
pollfd.events = POLLIN;
pollres = poll(&pollfd, 1, -1);
if( pollres == -1 )
perror("poll");
if (pollfd.revents & POLLERR)
{
fprintf(stderr, "POLLERR on userfaultfd\n");
exit(1);
}
readret = read(fd, &msg, sizeof(msg));
if (readret == -1)
perror("read userfaultfd");
if (readret != sizeof(msg))
{
fprintf(stderr, "short read, not expected, exiting\n");
exit(1);
}
/*
* Proper sequence is important here.
*
* For the GC we expect that write-protected pages can only
* be pages already backed by physical pages.
* Regular writes into unprotected pages that come before
* reads need the page be filled.
*
* So we do the WP case first and get it out of the way.
* Then both of the other cases need the page read.
*/
if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP)
{
// send write unlock
struct uffdio_writeprotect wp;
wp.range.start = msg.arg.pagefault.address;
wp.range.len = PAGE_SIZE;
wp.mode = 0;
//printf("sending !UFFDIO_WRITEPROTECT event to
//userfaultfd\n");
if (ioctl(fd, UFFDIO_WRITEPROTECT, &wp) == -1)
perror("ioctl(UFFDIO_WRITEPROTECT)");
//continue;
}
}
printf("end\n");
return NULL;
}
int main(int argc, char *argv[])
{
unsigned long *region;
int uffd, uffd_flags, expected, t_create;
void *status;
struct uffdio_writeprotect wp;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
char *clear = "3", *drop_caches_path = "/proc/sys/vm/drop_caches";
FILE *drop_caches_file;
main_tsc_start = rdtsc();
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
{
perror("syscall");
exit(2);
}
uffdio_api.api = UFFD_API;
uffdio_api.features = 1;
if (ioctl(uffd, UFFDIO_API, &uffdio_api))
{
fprintf(stderr, "UFFDIO_API\n");
return 1;
}
//printf("Features: 0x%llx\n", uffdio_api.features);
if (uffdio_api.api != UFFD_API)
{
fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
return 1;
}
/* Allocate memory that will be tracked */
region = (unsigned long *) mmap(NULL, PAGE_SIZE * n_pages, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
if (!region)
{
perror("mmap");
exit(2);
}
/* Force alignment of contiguous pages */
if (posix_memalign((void **)region, PAGE_SIZE, PAGE_SIZE * (n_pages - 1)))
{
fprintf(stderr, "cannot align by PAGE_SIZE %ld\n", PAGE_SIZE);
exit(1);
}
uffdio_register.range.start = (unsigned long)region;
uffdio_register.range.len = PAGE_SIZE * n_pages;
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
//main_tsc_start = rdtsc();
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
{
perror("ioctl(UFFDIO_REGISTER)");
exit(1);
}
expected = UFFD_API_RANGE_IOCTLS;
if ((uffdio_register.ioctls & expected) != expected)
{
fprintf(stderr, "ioctl set is incorrect\n");
exit(1);
}
if( (t_create = pthread_create(&tracker, NULL, tracker_task,
&uffd)) != 0 )
{
errno = t_create;
perror("pthread_create");
}
//printf("mainline writing writable pages.\n");
for (unsigned long i = 0; i < n_pages; i++)
{
unsigned long entry = (i * PAGE_SIZE / sizeof(unsigned long)) + ((rand()%10000) % 512);//(i % 512);
region[entry] = i;
}
drop_caches_file = fopen(drop_caches_path, "w");
for (i = 0; i < iterations; i++)//
{
/* Indicate the range of pages to be write-protected */
//sync();
fwrite(clear, sizeof(clear), 1, drop_caches_file);
wp.range.start = (unsigned long long)region;
wp.range.len = PAGE_SIZE * n_pages;
wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
/* Write-protect pages */
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp) == -1)
{
perror("ioctl(UFFDIO_WRITEPROTECT)");
exit(1);
}
/* Now try to "touch" the pages to trigger page faults and handling by the tracker thread */
for (unsigned long i = 0; i < n_pages; i++)
{
unsigned long entry = (i * PAGE_SIZE / sizeof(unsigned long)) + ((rand()%10000) % 512);//(i % 512);
region[entry] = i;
}
}
fclose(drop_caches_file);
if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
fprintf(stderr, "ioctl unregister failure\n");
return 1;
}
main_tsc_end = rdtsc();
printf("Apllication TSC : %lu\n", (unsigned long) (main_tsc_end-main_tsc_start));
return 0;
}

Data transfer between Processor and FPGA through GPMC using EDMA

We are working on a board containing AM3359 processor.
SDK we are using is ti-processor-sdk-linux-am335x-evm-05.00.00.15.
Our customer wants to see the throughput of the data transfer between processor (AM3359) and FPGA connected through 8 GPMC lines using EDMA
All we want now is to implement GPMC_EDMA data transfer program as per the below flow.
Processor and FPGA lines
Processor booted ----> Run GPMC_EDMA Data transfer program ----> Complete data transfer ----> Calculate throughput.
Here is my code for transferring data to FPGA through 8 GPMC lines.
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <termios.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <sys/mman.h>
#define FATAL do { fprintf(stderr, "Error at line %d, file %s (%d) [%s]\n", \
__LINE__, __FILE__, errno, strerror(errno)); exit(1); } while(0)
int verbose = 0;
#define BUF_SIZE 4096
int main(){
int i,fd;char ch;
int buf[BUF_SIZE];
volatile uint8_t *map_base;
int a=1;
for(i=0;i<BUF_SIZE;i++){
if(i%100==0){
a++;
printf("i: %d, a: %d\n",i,a);
}
buf[i]=a;
}
//buf[BUF_SIZE]=43;
if((fd=open("/dev/mem", O_RDWR | O_SYNC)) == -1) {
printf("Error in opening gpmc\n");return 0;
}
printf("Successfully opened\n");
map_base =(uint8_t *) mmap((void*)0x01000000, 0x01000000, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x01000000);
if(map_base == (void *) -1) FATAL;
if (verbose) {
printf("Memory mapped at address %p.\n", map_base);
fflush(stdout);
}
volatile uint32_t *p32 = (uint32_t *) &map_base[0x0800];
//while(1/*(ch=getchar())!='\n'*/){
//*p32 = 0x1;
for(i=0;i<BUF_SIZE;i++){
*p32 = buf[i];
}
printf("Sent %d bytes\n",BUF_SIZE);
//}
printf("Closing FD\n");
close(fd);
}
Someone kindly help me to proceed further as we have a severe time restriction and also we are new to this one.
Regards
Vamsi

Device driver not working

I wrote a small device driver for a "coin" device. I create an entry in /drivers/char/Kconfig
and corresponding Makefile, then selected built-in option in menuconfig. The kernel compiled fine (built-in.o file was created). But I still can't access the device (/dev/coin was not created) and there was no entry under /proc/devices.
Please help!!
I am cross-compiling for powerpc
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/device.h>
#include <linux/random.h>
#include <linux/debugfs.h>
#include <linux/init.h>
#define DEVNAME "coin"
#define LEN 20
enum values {HEAD, TAIL};
struct dentry *dir, *file;
int file_value;
int stats[2] = {0, 0};
char *msg[2] = {"head\n", "tail\n"};
static int major;
static struct class *class_coin;
static struct device *dev_coin;
static ssize_t r_coin(struct file *f, char __user *b,
size_t cnt, loff_t *lf)
{
char *ret;
u32 value = random32() % 2;
ret = msg[value];
stats[value]++;
return simple_read_from_buffer(b, cnt,
lf, ret,
strlen(ret));
}
static struct file_operations fops = { .read = r_coin };
#ifdef CONFIG_COIN_STAT
static ssize_t r_stat(struct file *f, char __user *b,
size_t cnt, loff_t *lf)
{
char buf[LEN];
snprintf(buf, LEN, "head=%d tail=%d\n",
stats[HEAD], stats[TAIL]);
return simple_read_from_buffer(b, cnt,
lf, buf,
strlen(buf));
}
static struct file_operations fstat = { .read = r_stat };
#endif
static int __init coin_init(void)
{
void *ptr_err;
major = register_chrdev(0, DEVNAME, &fops);
if (major < 0)
return major;
class_coin = class_create(THIS_MODULE,
DEVNAME);
if (IS_ERR(class_coin)) {
ptr_err = class_coin;
goto err_class;
}
dev_coin = device_create(class_coin, NULL,
MKDEV(major, 0),
NULL, DEVNAME);
if (IS_ERR(dev_coin))
goto err_dev;
#ifdef CONFIG_COIN_STAT
dir = debugfs_create_dir("coin", NULL);
file = debugfs_create_file("stats", 0644,
dir, &file_value,
&fstat);
#endif
return 0;
err_dev:
ptr_err = class_coin;
class_destroy(class_coin);
err_class:
unregister_chrdev(major, DEVNAME);
return PTR_ERR(ptr_err);
}
static void __exit coin_exit(void)
{
#ifdef CONFIG_COIN_STAT
debugfs_remove(file);
debugfs_remove(dir);
#endif
device_destroy(class_coin, MKDEV(major, 0));
class_destroy(class_coin);
return unregister_chrdev(major, DEVNAME);
}
module_init(coin_init);
module_exit(coin_exit);
What if you manually insert module into kernel using insmod? Does it work? Any messages in dmesg?
As I remember entries in /dev (/dev/coin) should be created manually using mknod, but you need major number of registered device. Just printk it after register_chrdev().

How do I use ioctl() to manipulate my kernel module?

So I'm trying to write a kernel module that uses the linux/timer.h file. I got it to work inside just the module, and now I am trying to get it to work from a user program.
Here is my kernel module:
//Necessary Includes For Device Drivers.
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <linux/timer.h>
#include <linux/ioctl.h>
#define DEVICE_NAME "mytimer"
#define DEVICE_FILE_NAME "mytimer"
#define MAJOR_NUM 61
#define MINOR_NUM 0
MODULE_LICENSE("Dual BSD/GPL");
static struct timer_list my_timer;
struct file_operations FileOps =
{
//No File Operations for this timer.
};
//Function to perform when timer expires.
void TimerExpire(int data)
{
printk("Timer Data: %d\n", data);
}
//Function to set up timers.
void TimerSetup(void)
{
setup_timer(&my_timer, TimerExpire, 5678);
mod_timer(&my_timer, jiffies + msecs_to_jiffies(5000));
}
//Module Init and Exit Functions.
int init_module(void)
{
int initResult = register_chrdev(MAJOR_NUM, "mytimer", &FileOps);
if (initResult < 0)
{
printk("Cannot obtain major number %d\n", MAJOR_NUM);
return initResult;
}
printk("Loading MyTimer Kernel Module...\n");
return 0;
}
void cleanup_module(void)
{
unregister_chrdev(MAJOR_NUM, "mytimer");
printk("Unloading MyTimer Kernel Module...\n");
}
More specifically, I want my user program to call the TimerSetup() function. I know that I'll need to use ioctl() but I'm not sure how to specify in my MODULE FILE that TimerSetup() should be callable via ioctl().
Also, my second question: I was able to insmod my module and also mknod into /dev/mytimer with the correct major number. But when I tried to open() it so that I can get the file descriptor from it, it kept returning -1, which I'm assuming is wrong. I made sure the permissions were fine (in fact, I made it 777 just to be sure)... It still doesn't work... Is there something I'm missing?
Here is the user program just in case:
#include <stdio.h>
int main(int argc, char* argv[])
{
int fd = open("/dev/mytimer", "r");
printf("fd: %d\n", fd);
return 0;
}
The example code you need can be found in drivers/watchdog/softdog.c (from Linux 2.6.33 at the time this was written), which illustrates proper file operations as well as how to permit userland to fill a structure with ioctl().
It's actually a great, working tutorial for anyone who needs to write trivial character device drivers.
I dissected softdog's ioctl interface when answering my own question, which may be helpful to you.
Here's the gist of it (though far from exhaustive) ...
In softdog_ioctl() you see a simple initialization of struct watchdog_info that advertises functionality, version and device information:
static const struct watchdog_info ident = {
.options = WDIOF_SETTIMEOUT |
WDIOF_KEEPALIVEPING |
WDIOF_MAGICCLOSE,
.firmware_version = 0,
.identity = "Software Watchdog",
};
We then look at a simple case where the user just wants to obtain these capabilities:
switch (cmd) {
case WDIOC_GETSUPPORT:
return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
... which of course, will fill the corresponding userspace watchdog_info with the initialized values above. If copy_to_user() fails, -EFAULT is returned which causes the corresponding userspace ioctl() call to return -1 with a meaningful errno being set.
Note, the magic requests are actually defined in linux/watchdog.h , so that the kernel and userspace share them:
#define WDIOC_GETSUPPORT _IOR(WATCHDOG_IOCTL_BASE, 0, struct watchdog_info)
#define WDIOC_GETSTATUS _IOR(WATCHDOG_IOCTL_BASE, 1, int)
#define WDIOC_GETBOOTSTATUS _IOR(WATCHDOG_IOCTL_BASE, 2, int)
#define WDIOC_GETTEMP _IOR(WATCHDOG_IOCTL_BASE, 3, int)
#define WDIOC_SETOPTIONS _IOR(WATCHDOG_IOCTL_BASE, 4, int)
#define WDIOC_KEEPALIVE _IOR(WATCHDOG_IOCTL_BASE, 5, int)
#define WDIOC_SETTIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 6, int)
#define WDIOC_GETTIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 7, int)
#define WDIOC_SETPRETIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 8, int)
#define WDIOC_GETPRETIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 9, int)
#define WDIOC_GETTIMELEFT _IOR(WATCHDOG_IOCTL_BASE, 10, int)
WDIOC obviously signifying "Watchdog ioctl"
You can easily take that a step further, having your driver do something and place the result of that something in the structure and copy it to userspace. For instance, if struct watchdog_info also had a member __u32 result_code. Note, __u32 is just the kernel's version of uint32_t.
With ioctl(), the user passes the address of an object, be it a structure, integer, whatever to the kernel expecting the kernel to write its reply in an identical object and copy the results to the address that was provided.
The second thing you are going to need to do is make sure your device knows what to do when someone opens, reads from it, writes to it, or uses a hook like ioctl(), which you can easily see by studying softdog.
Of interest is:
static const struct file_operations softdog_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.write = softdog_write,
.unlocked_ioctl = softdog_ioctl,
.open = softdog_open,
.release = softdog_release,
};
Where you see the unlocked_ioctl handler going to ... you guessed it, softdog_ioctl().
I think you might be juxtaposing a layer of complexity that really doesn't exist when dealing with ioctl(), it really is that simple. For that same reason, most kernel developers frown on new ioctl interfaces being added unless they are absolutely necessary. Its just too easy to lose track of the type that ioctl() is going to fill vs the magic you use to do it, which is the primary reason that copy_to_user() fails often resulting in the kernel rotting with hordes of userspace processes stuck in disk sleep.
For a timer, I agree, ioctl() is the shortest path to sanity.
You are missing a .open function pointer in your file_operations structure to specify the function to be called when a process attempts to open the device file. You will need to specify a .ioctl function pointer for your ioctl function as well.
Try reading through The Linux Kernel Module Programming Guide, specifically chapters 4 (Character Device Files) and 7 (Talking to Device Files).
Chapter 4 introduces the file_operations structure, which holds pointers to functions defined by the module/driver that perform various operations such as open or ioctl.
Chapter 7 provides information on communicating with a module/drive via ioctls.
Linux Device Drivers, Third Edition is another good resource.
Minimal runnable example
Tested in a fully reproducible QEMU + Buildroot environment, so might help others get their ioctl working. GitHub upstream:
kernel module |
shared header |
userland.
The most annoying part was understanding that some low ids are hijacked: ioctl is not called if cmd = 2 , you have to use _IOx macros.
Kernel module:
#include <asm/uaccess.h> /* copy_from_user, copy_to_user */
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/printk.h> /* printk */
#include "ioctl.h"
MODULE_LICENSE("GPL");
static struct dentry *dir;
static long unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long argp)
{
void __user *arg_user;
union {
int i;
lkmc_ioctl_struct s;
} arg_kernel;
arg_user = (void __user *)argp;
pr_info("cmd = %x\n", cmd);
switch (cmd) {
case LKMC_IOCTL_INC:
if (copy_from_user(&arg_kernel.i, arg_user, sizeof(arg_kernel.i))) {
return -EFAULT;
}
pr_info("0 arg = %d\n", arg_kernel.i);
arg_kernel.i += 1;
if (copy_to_user(arg_user, &arg_kernel.i, sizeof(arg_kernel.i))) {
return -EFAULT;
}
break;
case LKMC_IOCTL_INC_DEC:
if (copy_from_user(&arg_kernel.s, arg_user, sizeof(arg_kernel.s))) {
return -EFAULT;
}
pr_info("1 arg = %d %d\n", arg_kernel.s.i, arg_kernel.s.j);
arg_kernel.s.i += 1;
arg_kernel.s.j -= 1;
if (copy_to_user(arg_user, &arg_kernel.s, sizeof(arg_kernel.s))) {
return -EFAULT;
}
break;
default:
return -EINVAL;
break;
}
return 0;
}
static const struct file_operations fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = unlocked_ioctl
};
static int myinit(void)
{
dir = debugfs_create_dir("lkmc_ioctl", 0);
/* ioctl permissions are not automatically restricted by rwx as for read / write,
* but we could of course implement that ourselves:
* https://stackoverflow.com/questions/29891803/user-permission-check-on-ioctl-command */
debugfs_create_file("f", 0, dir, NULL, &fops);
return 0;
}
static void myexit(void)
{
debugfs_remove_recursive(dir);
}
module_init(myinit)
module_exit(myexit)
Shared header between the kernel module and userland:
ioctl.h
#ifndef IOCTL_H
#define IOCTL_H
#include <linux/ioctl.h>
typedef struct {
int i;
int j;
} lkmc_ioctl_struct;
#define LKMC_IOCTL_MAGIC 0x33
#define LKMC_IOCTL_INC _IOWR(LKMC_IOCTL_MAGIC, 0, int)
#define LKMC_IOCTL_INC_DEC _IOWR(LKMC_IOCTL_MAGIC, 1, lkmc_ioctl_struct)
#endif
Userland:
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "../ioctl.h"
int main(int argc, char **argv)
{
int fd, arg_int, ret;
lkmc_ioctl_struct arg_struct;
if (argc < 2) {
puts("Usage: ./prog <ioctl-file>");
return EXIT_FAILURE;
}
fd = open(argv[1], O_RDONLY);
if (fd == -1) {
perror("open");
return EXIT_FAILURE;
}
/* 0 */
{
arg_int = 1;
ret = ioctl(fd, LKMC_IOCTL_INC, &arg_int);
if (ret == -1) {
perror("ioctl");
return EXIT_FAILURE;
}
printf("arg = %d\n", arg_int);
printf("ret = %d\n", ret);
printf("errno = %d\n", errno);
}
puts("");
/* 1 */
{
arg_struct.i = 1;
arg_struct.j = 1;
ret = ioctl(fd, LKMC_IOCTL_INC_DEC, &arg_struct);
if (ret == -1) {
perror("ioctl");
return EXIT_FAILURE;
}
printf("arg = %d %d\n", arg_struct.i, arg_struct.j);
printf("ret = %d\n", ret);
printf("errno = %d\n", errno);
}
close(fd);
return EXIT_SUCCESS;
}

Resources