Kernel Panic after changes in sys_close - linux

I'm doing a course on operating systems and we work in Linux Red Hat 8.0
AS part of an assignment I had to change sys close and sys open. Changes to sys close passed without an incident, but when I introduce the changes to sys close suddenly the OS encounters an error during booting, claiming it cannot mount root fs, and invokes panic. EIP is reportedly at sys close when this happens.
Here are the changes I made (look for the "HW1 additions" comment):
In fs/open.c:
asmlinkage long sys_open(const char * filename, int flags, int mode)
{
char * tmp;
int fd, error;
event_t* new_event;
#if BITS_PER_LONG != 32
flags |= O_LARGEFILE;
#endif
tmp = getname(filename);
fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd();
if (fd >= 0) {
struct file *f = filp_open(tmp, flags, mode);
error = PTR_ERR(f);
if (IS_ERR(f))
goto out_error;
fd_install(fd, f);
}
/* HW1 additions */
if (current->record_flag==1){
new_event=(event_t*)kmalloc(sizeof(event_t), GFP_KERNEL);
if (!new_event){
new_event->type=Open;
strcpy(new_event->filename, tmp);
file_queue_add(*new_event, current->queue);
}
}
/* End HW1 additions */
out:
putname(tmp);
}
return fd;
out_error:
put_unused_fd(fd);
fd = error;
goto out;
}
asmlinkage long sys_close(unsigned int fd)
{
struct file * filp;
struct files_struct *files = current->files;
event_t* new_event;
char* tmp = files->fd[fd]->f_dentry->d_name.name;
write_lock(&files->file_lock);
if (fd >= files->max_fds)
goto out_unlock;
filp = files->fd[fd];
if (!filp)
goto out_unlock;
files->fd[fd] = NULL;
FD_CLR(fd, files->close_on_exec);
__put_unused_fd(files, fd);
write_unlock(&files->file_lock);
/* HW1 additions */
if(current->record_flag == 1){
new_event=(event_t*)kmalloc(sizeof(event_t), GFP_KERNEL);
if (!new_event){
new_event->type=Close;
strcpy(new_event->filename, tmp);
file_queue_add(*new_event, current->queue);
}
}
/* End HW1 additions */
return filp_close(filp, files);
out_unlock:
write_unlock(&files->file_lock);
return -EBADF;
}
The task_struct defined in schedule.h was changed at the end to include:
unsigned int record_flag; /* when zero: do not record. when one: record. */
file_queue* queue;
And file queue as well as event t are defined in a separate file as follows:
typedef enum {Open, Close} EventType;
typedef struct event_t{
EventType type;
char filename[256];
}event_t;
typedef struct file_quque_t{
event_t queue[101];
int head, tail;
}file_queue;
file queue add works like this:
void file_queue_add(event_t event, file_queue* queue){
queue->queue[queue->head]=event;
queue->head = (queue->head+1) % 101;
if (queue->head==queue->tail){
queue->tail=(queue->tail+1) % 101;
}
}

if (!new_event) {
new_event->type = …
That's equivalent to if (new_event == NULL). I think you mean if (new_event != NULL), which the kernel folks typically write as if (new_event).

Can you please post the stackdump of the error. I don't see a place where queue_info structure is allocated memory. One more thing is you cannot be sure that process record_flag will be always zero if unassigned in kernel, because kernel is a long running program and memory contains garbage.
Its also possible to check the exact location in the function is occurring by looking at the stack trace.

Related

How to handle more than one SIGSEGV occurrence in linux?

I have written a program to scan kernel memory for a pattern from user space. I run it from root. I expect that it will generate SIGSEGVs when it hits pages that aren't accessible; I would like to ignore those faults and just jump to the next page to continue the search. I have set up a signal handler that works fine for the first occurrence, and it continues onward as expected. However, when a second SIGSEGV occurs, the handler is ignored (it was reregistered after the first occurrence) and the program terminates. The relevant portions of the code are:
jmp_buf restore_point;
void segv_handler(int sig, siginfo_t* info, void* ucontext)
{
longjmp(restore_point, SIGSEGV);
}
void setup_segv_handler()
{
struct sigaction sa;
sa.sa_flags = SA_SIGINFO|SA_RESTART|SA_RESETHAND;
sigemptyset (&sa.sa_mask);
sa.sa_sigaction = &segv_handler;
if (sigaction(SIGSEGV, &sa, NULL) == -1) {
fprintf(stderr, "failed to setup SIGSEGV handler\n");
}
}
unsigned long search_kernel_memory_area(unsigned long start_address, size_t area_len, const void* pattern, size_t pattern_len)
{
int fd;
char* kernel_mem;
fd = open("/dev/kmem", O_RDONLY);
if (fd < 0)
{
perror("open /dev/kmem failed");
return -1;
}
unsigned long page_size = sysconf(_SC_PAGESIZE);
unsigned long page_aligned_offset = (start_address/page_size)*page_size;
unsigned long area_pages = area_len/page_size + (area_len%page_size ? 1 : 0);
kernel_mem =
mmap(0, area_pages,
PROT_READ, MAP_SHARED,
fd, page_aligned_offset);
if (kernel_mem == MAP_FAILED)
{
perror("mmap failed");
return -1;
}
if (!mlock((const void*)kernel_mem,area_len))
{
perror("mlock failed");
return -1;
}
unsigned long offset_into_page = start_address-page_aligned_offset;
unsigned long start_area_address = (unsigned long)kernel_mem + offset_into_page;
unsigned long end_area_address = start_area_address+area_len-pattern_len+1;
unsigned long addr;
setup_segv_handler();
for (addr = start_area_address; addr < end_area_address;addr++)
{
unsigned char* kmp = (unsigned char*)addr;
unsigned char* pmp = (unsigned char*)pattern;
size_t index = 0;
for (index = 0; index < pattern_len; index++)
{
if (setjmp(restore_point) == 0)
{
unsigned char p = *pmp;
unsigned char k = *kmp;
if (k != p)
{
break;
}
pmp++;
kmp++;
}
else
{
addr += page_size -1;
setup_segv_handler();
break;
}
}
if (index >= pattern_len)
{
return addr;
}
}
munmap(kernel_mem,area_pages);
close(fd);
return 0;
}
I realize I can use functions like memcmp to avoid programming the matching part directly (I did this initially), but I subsequently wanted to insure the finest grained control for recovering from the faults so I could see exactly what was happening.
I scoured the Internet to find information about this behavior, and came up empty. The linux system I am running this under is arm 3.12.30.
If what I am trying to do is not possible under linux, is there some way I can get the current state of the kernel pages from user space (which would allow me to avoid trying to search pages that are inaccessible.) I searched for calls that might provide such information, but also came up empty.
Thanks for your help!
While longjmp is perfectly allowed to be used in the signal handler (the function is known as async-signal-safe, see man signal-safety) and effectively exits from the signal handling, it doesn't restore signal mask. The mask is automatically modified at the time when signal handler is called to block new SIGSEGV signal to interrupt the handler.
While one may restore signal mask manually, it is better (and simpler) to use siglongjmp function instead: aside from the effect of longjmp, it also restores the signal mask. Of course, in that case sigsetjmp function should be used instead of setjmp:
// ... in main() function
if(sigsetjmp(restore_point, 1)) // Aside from other things, store signal mask
// ...
// ... in the signal handler
siglongjmp(restore_point); // Also restore signal mask as it was at sigsetjmp() call

IOCTL Method - Linux

I have an exam question and I can't quite see how to solve it.
A driver that needs the ioctl method to be implemented and tested.
I have to write the ioctl() method, the associated test program as well as the common IOCTL definitions.
The ioctl() method should only handle one command. In this command, I need to transmit a data structure from user space to kernel space.
Below is the structure shown:
struct data
{
     char label [10];
     int value;
}
The driver must print the IOCTL command data, using printk();
Device name is "/dev/mydevice"
The test program must validate driver mode using an initialized data structure.
Hope there are some that can help
thanks in advance
My suggestion:
static int f_on_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
unsigned long arg)
{
int ret;
switch (cmd)
{
case PASS_STRUCT:
struct data pass_data;
ret = copy_from_user(&pass_data, arg, sizeof(*pass_data));
if(ret < 0)
{
printk("PASS_STRUCT\n");
return -1;
}
printk(KERN ALERT "Message PASS_STRUCT : %d and %c\n",pass_data.value, pass_data.label);
break;
default:
return ENOTTY;
}
return 0;
}
Definitions:
Common.h
#define SYSLED_IOC_MAGIC 'k'
#define PASS_STRUCT _IOW(SYSLED_IOC_MAGIC, 1, struct data)
The test program:
int main()
{
int fd = open("/dev/mydevice", O_RDWR);
data data_pass;
data_pass.value = 2;
data_pass.label = "hej";
ioctl(fd, PASS_STRUCT, &data_pass);
close(fd);
return 0;
}
Is this completely wrong??

Call to ioctl() on a usb device using usbfs does not work

I am trying to create my own driver for my Gamepad right now, I found out the original reason why I wanted to create it does not exist but I still want to do it for the experience. So please don't tell me there is a better way to do this than writing my own driver.
The part in kernelspace with the ioctl function that should be called is:
static int xpad_ioctl (struct usb_interface *intf, unsigned int code,void *buf) {
//struct usb_xpad *xpad = usb_get_intfdata(intf);
printk(KERN_INFO"(Ongy)IOCTL called\n");
//if (_IOC_TYPE(code) != XPAD_IOMAGIC) return -ENOTTY;
//if (_IOC_NR(code) > XPAD_IOMAX) return -ENOTTY;
switch(code){
case XPAD_IORMAP:
printk(KERN_INFO"(Ongy)IORMAP called\n");
break;
default:
return -EINVAL;
}
return 0;
}
static struct usb_driver xpad_driver =
{
.name = "Cyborg-V5-driver",
.probe = xpad_probe,
.disconnect = xpad_disconnect,
.unlocked_ioctl = xpad_ioctl,
.id_table = xpad_table,
};
The part in userspace to call it is (this is part of a Qt-application):
int openfile() {
char *device = "/dev/bus/usb/005/009";
printf("Opening device %s\n", device);
return open(device, /*O_RDONLY*/O_WRONLY | O_NONBLOCK );
}
[...] the closefile(int file_desc) is missing here, this and the openfile functions exist because of me not knowing one can call "::open()" when Qt overrides function calls.
void MainContainer::callioctl() {
int file_desc, ret_val;
errno = 0;
file_desc = openfile();
if (file_desc==-1){
printf("Ioctl notcalled because of: error %s\n", strerror(errno));
}
else
{
errno = 0;
//struct usbdevfs_getdriver* driver = (usbdevfs_getdriver*)malloc(sizeof(struct usbdevfs_getdriver));
struct mappingpair* pair = (mappingpair*)malloc(sizeof(struct mappingpair));
ret_val = ioctl(file_desc, XPAD_IORMAP, pair);
//printf("Drivername %s\n", driver->driver);
closefile(file_desc);
if (ret_val==-1) printf("Ioctl failed with error %s\n", strerror(errno));
else printf("Ioctl call successfull\n");
}
}
ok, the string to the file I open I get with a call to lsusb and change it by hand in the code, this is only for debugging and until I get the ioctl calls working
When I call the callioctl() it prints:
Ioctl failed with error Unpassender IOCTL (I/O-Control) für das Gerät
The German part means "wrong ioctl (I/O-Control) for the device" and nothing appears in dmesg, that is why I think my ioctl function in the driver is not called.
If you look at http://www.hep.by/gnu/kernel/usb/usbfs.html it says that to send an ioctl to the usb_driver device you need to do:
struct usbdevfs_ioctl {
int ifno;
int ioctl_code;
void *data;
};
/* user mode call looks like this.
* 'request' becomes the driver->ioctl() 'code' parameter.
* the size of 'param' is encoded in 'request', and that data
* is copied to or from the driver->ioctl() 'buf' parameter.
*/
static int
usbdev_ioctl (int fd, int ifno, unsigned request, void *param)
{
struct usbdevfs_ioctl wrapper;
wrapper.ifno = ifno;
wrapper.ioctl_code = request;
wrapper.data = param;
return ioctl (fd, USBDEVFS_IOCTL, &wrapper);
}
The documentation is listing usb device under /proc/bus so admittedly this may have changed.

copy_to_user not working in kernel module

I was trying to use copy_to_user in kernel module read function, but am not able to copy the data from kernel to user buffer. Please can anyone tell me if I am doing some mistake. My kernel version is 2.6.35. I am giving the portion of kernel module as well as the application being used to test it. Right now my focus is why this copy_to_user is not working. Any help will great.
///////////////////////////////////kernel module//////////////////////////////////////
#define BUF_LEN 80
static char msg[BUF_LEN];
static char *msg_Ptr;
static int device_open(struct inode *inode, struct file *file)
{
static int counter = 0;
if (Device_Open)
return -EBUSY;
Device_Open++;
printk(KERN_ALERT "In open device call\n");
sprintf(msg, "I already told you %d times Hello world!\n", counter++);
msg_Ptr = msg;
try_module_get(THIS_MODULE);
return SUCCESS;
}
static ssize_t device_read(struct file *filp,
char __user *buffer,
size_t length,
loff_t * offset)
{
/*
* Number of bytes actually written to the buffer
*/
int bytes_read = 0;
/*
* If we are at the end of the message,
* return 0 signifying end of file
*/
if (*msg_Ptr == 0)
return 0;
/*
* Actually put the data into the buffer
*/
else {
bytes_read=copy_to_user(buffer, msg, length);
if (bytes_read==-1);
{
printk(KERN_INFO "Error in else while copying the data \n");
}
}
return bytes_read;
}
////////////////////////////////////////application////////////////////////
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#define BUF_SIZE 40
int main()
{
ssize_t num_bytes;
int fd, n=0;
char buf[BUF_SIZE];
fd=open("/dev/chardev", O_RDWR);
if(fd== -1){perror("Error while opening device");exit(1);}
printf("fd=%d\n",fd);
num_bytes=read(fd, buf, BUF_SIZE);
if(num_bytes==-1){perror("Error while reading"); exit(2);}
printf("The value fetched is %lu bytes\n", num_bytes);
while(n<=num_bytes)
{
printf("%c",buf[n]);
n++;
}
close(fd);
return 0;
}
There are a few problems in the code snippet you wrote. First of all, it is not a good thing to make the call try_module_get(THIS_MODULE);
This statement tries to increase the refcount of the module ... in the module itself ! Instead, you should set the owner field of the file_ops structure to THIS_MODULE in your init method. This way, the reference handling will happen outside the module code, in the VFS layer. You might take a look at Linux Kernel Modules: When to use try_module_get / module_put.
Then, as it was stated by Vineet you should retrieve the pointer from the file_ops private_data field.
And last but not least, here is the reason why it seems an error happened while ... Actually ... It did not :
The copy_to_user call returns 0 if it has successfully copied all the desired bytes into the destination memory area and a strictly positive value stating the number of bytes that were NOT copied in case of error. That said, when you run :
/* Kernel part */
bytes_read=copy_to_user(buffer, msg, length);
/*
* Wrong error checking :
* In the below statement, "-1" is viewed as an unsigned long.
* With a simple equality test, this will not bother you
* But this is dangerous with other comparisons like "<" or ">"
* (unsigned long)(-1) is at least 2^32 - 1 so ...
*/
if (-1 == bytes_read) {
/* etc. */
}
return bytes_read;
/* App part */
num_bytes=read(fd, buf, BUF_SIZE);
/* etc.. */
while(n<=num_bytes) {
printf("%c",buf[n]);
n++;
}
You should only get one character upon a successful copy, that is only a single "I" in your case.
Moreover, you use your msg_Ptr pointer as a safeguard but you never update it. This might result in a wrong call to copy_to_user.
copy_to_user checks the user-space pointer with a call to access_ok, but if the kernel-space pointer and the given length are not allright, this might end in a Kernel Oops/Panic.
I think you should update the file->private_data in open and then you have to fetch that in your structure. Because I guess the msg buffer ( kernel buffer ) is not getting proper refernce.

Shared memory across processes on Linux/x86_64

I have a few questions on using shared memory with processes. I looked at several previous posts and couldn't glean the answers precisely enough. Thanks in advance for your help.
I'm using shm_open + mmap like below. This code works as intended with parent and child alternating to increment g_shared->count (the synchronization is not portable; it works only for certain memory models, but good enough for my case for now). However, when I change MAP_SHARED to MAP_ANONYMOUS | MAP_SHARED, the memory isn't shared and the program hangs since the 'flag' doesn't get flipped. Removing the flag confirms what's happening with each process counting from 0 to 10 (implying that each has its own copy of the structure and hence the 'count' field). Is this the expected behavior? I don't want the memory to be backed by a file; I really want to emulate what might happen if these were threads instead of processes (they need to be processes for other reasons).
Do I really need shm_open? Since the processes belong to the same hierarchy, can I just use mmap alone instead? I understand this would be fairly straightforward if there wasn't an 'exec,' but how do I get it to work when there is an 'exec' following the 'fork?'
I'm using kernel version 3.2.0-23 on x86_64 (Intel i7-2600). For this implementation, does mmap give the same behavior (correctness as well as performance) as shared memory with pthreads sharing the same global object? For example, does the MMU map the segment with 'cacheable' MTRR/TLB attributes?
Is the cleanup_shared() code correct? Is it leaking any memory? How could I check? For example, is there an equivalent of System V's 'ipcs?'
thanks,
/Doobs
shmem.h:
#ifndef __SHMEM_H__
#define __SHMEM_H__
//includes
#define LEN 1000
#define ITERS 10
#define SHM_FNAME "/myshm"
typedef struct shmem_obj {
int count;
char buff[LEN];
volatile int flag;
} shmem_t;
extern shmem_t* g_shared;
extern char proc_name[100];
extern int fd;
void cleanup_shared() {
munmap(g_shared, sizeof(shmem_t));
close(fd);
shm_unlink(SHM_FNAME);
}
static inline
void init_shared() {
int oflag;
if (!strcmp(proc_name, "parent")) {
oflag = O_CREAT | O_RDWR;
} else {
oflag = O_RDWR;
}
fd = shm_open(SHM_FNAME, oflag, (S_IREAD | S_IWRITE));
if (fd == -1) {
perror("shm_open");
exit(EXIT_FAILURE);
}
if (ftruncate(fd, sizeof(shmem_t)) == -1) {
perror("ftruncate");
shm_unlink(SHM_FNAME);
exit(EXIT_FAILURE);
}
g_shared = mmap(NULL, sizeof(shmem_t),
(PROT_WRITE | PROT_READ),
MAP_SHARED, fd, 0);
if (g_shared == MAP_FAILED) {
perror("mmap");
cleanup_shared();
exit(EXIT_FAILURE);
}
}
static inline
void proc_write(const char* s) {
fprintf(stderr, "[%s] %s\n", proc_name, s);
}
#endif // __SHMEM_H__
shmem1.c (parent process):
#include "shmem.h"
int fd;
shmem_t* g_shared;
char proc_name[100];
void work() {
int i;
for (i = 0; i &lt ITERS; ++i) {
while (g_shared->flag);
++g_shared->count;
sprintf(g_shared->buff, "%s: %d", proc_name, g_shared->count);
proc_write(g_shared->buff);
g_shared->flag = !g_shared->flag;
}
}
int main(int argc, char* argv[], char* envp[]) {
int status, child;
strcpy(proc_name, "parent");
init_shared(argv);
fprintf(stderr, "Map address is: %p\n", g_shared);
if (child = fork()) {
work();
waitpid(child, &status, 0);
cleanup_shared();
fprintf(stderr, "Parent finished!\n");
} else { /* child executes shmem2 */
execvpe("./shmem2", argv + 2, envp);
}
}
shmem2.c (child process):
#include "shmem.h"
int fd;
shmem_t* g_shared;
char proc_name[100];
void work() {
int i;
for (i = 0; i &lt ITERS; ++i) {
while (!g_shared->flag);
++g_shared->count;
sprintf(g_shared->buff, "%s: %d", proc_name, g_shared->count);
proc_write(g_shared->buff);
g_shared->flag = !g_shared->flag;
}
}
int main(int argc, char* argv[], char* envp[]) {
int status;
strcpy(proc_name, "child");
init_shared(argv);
fprintf(stderr, "Map address is: %p\n", g_shared);
work();
cleanup_shared();
return 0;
}
Passing MAP_ANONYMOUS causes the kernel to ignore your file descriptor argument and give you a private mapping instead. That's not what you want.
Yes, you can create an anonymous shared mapping in a parent process, fork, and have the child process inherit the mapping, sharing the memory with the parent and any other children. That obvoiusly doesn't survive an exec() though.
I don't understand this question; pthreads doesn't allocate memory. The cacheability will depend on the file descriptor you mapped. If it's a disk file or anonymous mapping, then it's cacheable memory. If it's a video framebuffer device, it's probably not.
That's the right way to call munmap(), but I didn't verify the logic beyond that. All processes need to unmap, only one should call unlink.
2b) as a middle-ground of a sort, it is possible to call:
int const shm_fd = shm_open(fn,...);
shm_unlink(fn);
in a parent process, and then pass fd to a child process created by fork()/execve() via argp or envp. since open file descriptors of this type will survive the fork()/execve(), you can mmap the fd in both the parent process and any dervied processes. here's a more complete code example copied and simplified/sanitized from code i ran successfully under Ubuntu 12.04 / linux kernel 3.13 / glibc 2.15:
int create_shm_fd( void ) {
int oflags = O_RDWR | O_CREAT | O_TRUNC;
string const fn = "/some_shm_fn_maybe_with_pid";
int fd;
neg_one_fail( fd = shm_open( fn.c_str(), oflags, S_IRUSR | S_IWUSR ), "shm_open" );
if( fd == -1 ) { rt_err( strprintf( "shm_open() failed with errno=%s", str(errno).c_str() ) ); }
// for now, we'll just pass the open fd to our child process, so
// we don't need the file/name/link anymore, and by unlinking it
// here we can try to minimize the chance / amount of OS-level shm
// leakage.
neg_one_fail( shm_unlink( fn.c_str() ), "shm_unlink" );
// by default, the fd returned from shm_open() has FD_CLOEXEC
// set. it seems okay to remove it so that it will stay open
// across execve.
int fd_flags = 0;
neg_one_fail( fd_flags = fcntl( fd, F_GETFD ), "fcntl" );
fd_flags &= ~FD_CLOEXEC;
neg_one_fail( fcntl( fd, F_SETFD, fd_flags ), "fcntl" );
// resize the shm segment for later mapping via mmap()
neg_one_fail( ftruncate( fd, 1024*1024*4 ), "ftruncate" );
return fd;
}
it's not 100% clear to me if it's okay spec-wise to remove the FD_CLOEXEC and/or assume that after doing so the fd really will survive the exec. the man page for exec is unclear; it says: "POSIX shared memory regions are unmapped", but to me that's redundant with the general comments earlier that mapping are not preserved, and doesn't say that shm_open()'d fd will be closed. any of course there's the fact that, as i mentioned, the code does seem to work in at least one case.
the reason i might use this approach is that it would seem to reduce the chance of leaking the shared memory segment / filename, and it makes it clear that i don't need persistence of the memory segment.

Resources