rrdtool gives bus error when an attempt is made to create an rrd database - linux

I'm having problems getting rrdtool (version 1.4.8) create a database on my system (x86_64 CentOS 4.1, kernel 2.6.18.128, and ext3 filesystem type).
I tried running a simple command to create a database as shown this tutorial, but I get a SIGBUS error and it seems to be related to memcpy(). I've shown the backtrace from gdb below.
(gdb) run create test.rrd --start 920804400 DS:speed:COUNTER:600:U:U RRA:AVERAGE:0.5:1:24 RRA:AVERAGE:0.5:6:10
Starting program: rrdtool-1.4.8/src/.libs/rrdtool create test.rrd --start 920804400 DS:speed:COUNTER:600:U:U RRA:AVERAGE:0.5:1:24 RRA:AVERAGE:0.5:6:10
(no debugging symbols found)
Program received signal SIGBUS, Bus error.
0x00002acb298da7d3 in memcpy () from /lib64/tls/libc.so.6
(gdb) bt
#0 0x00002b46e74ae7d3 in memcpy () from /lib64/tls/libc.so.6
#1 0x00002b46e731d612 in rrd_write (rrd_file=0x1c46e230, buf=0x1c46e0a0, count=128) at rrd_open.c:716
#2 0x00002b46e731515e in rrd_create_fn (file_name=0x7fffc38b9a3f "test.rrd", rrd=0x7fffc38b8b60) at rrd_create.c:727
#3 0x00002b46e7314a28 in rrd_create_r (filename=0x7fffc38b9a3f "test.rrd", pdp_step=300, last_up=920804400, argc=3, argv=0x7fffc38b9070) at rrd_create.c:580
#4 0x00002b46e731330e in rrd_create (argc=7, argv=0x7fffc38b9050) at rrd_create.c:113
#5 0x00000000004028db in HandleInputLine (argc=8, argv=0x7fffc38b9048, out=0x2b46e766a680) at rrd_tool.c:646
#6 0x00000000004023a2 in main (argc=8, argv=0x7fffc38b9048) at rrd_tool.c:521
(gdb) up
#1 0x00002b46e731d612 in rrd_write (rrd_file=0x1c46e230, buf=0x1c46e0a0, count=128) at rrd_open.c:716
716 memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
(gdb) p rrd_file->pos
$21 = 0
(gdb) p (char *)buf
$25 = 0x1c46e0a0 "RRD"
(gdb) p rrd_simple_file->file_start
$22 = 0x2b46ea64b000 "RRD"
(gdb) p count
$23 = 128
What is the problem here and how do I fix it?

This problem may be related to some mmap bug of your old Centos distribution.
If upgrading to a newer version is not an option, you could try to compile your rrdtool with the following option:
./configure --disable-mmap
By doing so, the faulty memcpy at line 716 in the rrd_write() function (shown below) should not be executed:
/* Write count bytes from buffer buf to the current position
* rrd_file->pos of rrd_simple_file->fd.
* Returns the number of bytes written or <0 on error. */
ssize_t rrd_write(
rrd_file_t *rrd_file,
const void *buf,
size_t count)
{
rrd_simple_file_t *rrd_simple_file = (rrd_simple_file_t *)rrd_file->pvt;
#ifdef HAVE_MMAP
size_t old_size = rrd_file->file_len;
if (count == 0)
return 0;
if (buf == NULL)
return -1; /* EINVAL */
if((rrd_file->pos + count) > old_size)
{
rrd_set_error("attempting to write beyond end of file");
return -1;
}
memcpy(rrd_simple_file->file_start + rrd_file->pos, buf, count);
rrd_file->pos += count;
return count; /* mimmic write() semantics */
#else
ssize_t _sz = write(rrd_simple_file->fd, buf, count);
if (_sz > 0)
rrd_file->pos += _sz;
return _sz;
#endif
}

Related

symbol without any name and completed.7392 symbol in .bss section

In my sample C program, compiled with gcc, .bss section has an index [24], as shown by readelf -S.
When I try to see the things stored in .bss, by running
readelf -s ./pointer | grep 24, I get
Num: Value Size Type Bind Vis Ndx Name
24: 00000000000040a0 0 SECTION LOCAL DEFAULT 24
31: 00000000000040a8 1 OBJECT LOCAL DEFAULT 24 completed.7392
54: 00000000000040b0 8 OBJECT GLOBAL DEFAULT 24 label
68: 00000000000040c0 0 NOTYPE GLOBAL DEFAULT 24 _end
70: 00000000000040b8 4 OBJECT GLOBAL DEFAULT 24 i
71: 0000000000004090 0 NOTYPE GLOBAL DEFAULT 24 __bss_start
79: 00000000000040ac 4 OBJECT GLOBAL DEFAULT 24 err
81: 00000000000040a0 8 OBJECT GLOBAL DEFAULT 24 stderr##GLIBC_2.2.5
size ./pointer gives me
text data bss dec hex filename
3196 680 32 3908 f44 ./pointer
what's the symbol without any name and symbol with name completed.7392?
and, why size doesn't add up to 32 bytes, as shown by size? [ it is 25 now ]
As a side question, where are stdin and stdout symbols? I can find only stderr, and that is in the bss section.
program source attached below. compiled with gcc version 9.2
/*
* A program that will read and print printable characters in it's memory given a memory address
* until it segfaults
*/
#define _GNU_SOURCE /* Bring REG_XXX names from /usr/include/sys/ucontext.h */
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <ucontext.h>
int i,err=0;
void sighandler(int signum);
void* label;
void readmem(){
long loc;
i=0;
err=0;
printf("enter mem location:");
scanf("%lx",&loc);
printf("--dump begin--\n");
char * addr = (void *) loc;
char ch;
label=&&l;
/* was kept there so that I can find how many bytes to increment rip to recover fro segfault
address can be found using gdb also
*/
while(1){
ch=addr[i];
l:
printf("%c",ch);
i++;
if ( err == 1)
break;
// printf("%d\n",i);
}
}
static void sigaction_segv(int signal, siginfo_t *si, void *arg)
{
ucontext_t *ctx = (ucontext_t *)arg;
/* We are on linux x86, the returning IP is stored in RIP (64bit) or EIP (32bit).
In this example, the length of the offending instruction is 6 bytes.
So we skip the offender !
&&l will return address pointed by label l
(gdb) disass readmem
...
0x0000555555555284 <+139>: mov -0x10(%rbp),%rax
0x0000555555555288 <+143>: add %rdx,%rax
=> 0x000055555555528b <+146>: movzbl (%rax),%eax -> rip on segfault
0x000055555555528e <+149>: mov %al,-0x19(%rbp)
0x0000555555555291 <+152>: movsbl -0x19(%rbp),%eax
0x0000555555555295 <+156>: mov %eax,%edi
0x0000555555555297 <+158>: callq 0x555555555030 <putchar#plt>
...
>>
(gdb) p (void*) label
$1 = (void *) 0x555555555291 <readmem+152> ->address of next instruction
>>
we need to go to <readmem+152> ie, next instruction
so we add decimal 6 to rip in sighandler
*/
#if __WORDSIZE == 64
printf("\nCaught SIGSEGV, addr %p, RIP 0x%lx\n",si->si_addr,ctx->uc_mcontext.gregs[REG_RIP]);
ctx->uc_mcontext.gregs[REG_RIP] += 6;
#else
printf("Caught SIGSEGV, addr %p, EIP 0x%x\n",si->si_addr,ctx->uc_mcontext.gregs[REG_EIP]);
ctx->uc_mcontext.gregs[REG_EIP] += 6;
#endif
err=1;
printf("no of bytes read:%d\n",i);
}
int main () {
// 0x0 is hex literal that defaults to signed integer
// here we are casting it to a void pointer
// and then assigning it to a value declared to be a void pointer
// this is the correct way to create an arbitrary pointer in C
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sigemptyset(&sa.sa_mask);
sa.sa_sigaction = sigaction_segv;
sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGSEGV, &sa, NULL) == -1) {
fprintf(stderr, "failed to setup SIGSEGV handler\n");
exit(1);
}
char c[25];
sprintf(c,"cat /proc/%d/maps",getpid());
system(c);
while(1){
readmem();
}
}

Why does cat call read() twice when once was enough?

I am new to Linux kernel module. I am learning char driver module based on a web course. I have a very simple module that creates a /dev/chardevexample, and I have a question for my understanding:
When I do echo "hello4" > /dev/chardevexample, I see the write execute exactly once as expected. However, when I do cat /dev/chardevexample, I see the read executed two times.
I see this both in my code and in the course material. All the data was returned in the first read(), so why does cat call it again?
All the things I did so far are as follows:
insmod chardev.ko to load my module
echo "hello4" > /dev/chardevexample. This is the write and I see it happening exactly once in dmesg
cat /dev/chardevexample. This is the read, and dmesg shows it happening twice.
I did strace cat /dev/chardevexample, and I indeed see the function call being called twice for read. There is a write in between as well
read(3, "hello4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 131072) = 4096
write(1, "hello4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096hello4) = 4096
read(3, "", 131072)
dmesg after read (cat command)
[909836.517402] DEBUG-device_read: To User hello4 and bytes_to_do 4096 ppos 0 # Read #1
[909836.517428] DEBUG-device_read: Data send to app hello4, nbytes=4096 # Read #1
[909836.519086] DEBUG-device_read: To User and bytes_to_do 0 ppos 4096 # Read #2
[909836.519093] DEBUG-device_read: Data send to app hello4, nbytes=0 # Read #2
Code snippet for read, write and file_operations is attached. Any
guidance would help. I searched extensively and couldn't understand.
Hence the post.
/*!
* #brief Write to device from userspace to kernel space
* #returns Number of bytes written
*/
static ssize_t device_write(struct file *file, //!< File pointer
const char *buf,//!< from for copy_from_user. Takes 'buf' from user space and writes to
//!< kernel space in 'buffer'. Happens on fwrite or write
size_t lbuf, //!< length of buffer
loff_t *ppos) //!< position to write to
{
int nbytes = lbuf - copy_from_user(
buffer + *ppos, /* to */
buf, /* from */
lbuf); /* how many bytes */
*ppos += nbytes;
buffer[strcspn(buffer, "\n")] = 0; // Remove End of line character
pr_info("Recieved data \"%s\" from apps, nbytes=%d\n", buffer, nbytes);
return nbytes;
}
/*!
* #brief Read from device - from kernel space to user space
* #returns Number of bytes read
*/
static ssize_t device_read(struct file *file,//!< File pointer
char *buf, //!< for copy_to_user. buf is 'to' from buffer
size_t lbuf, //!< Length of buffer
loff_t *ppos)//!< Position {
int nbytes;
int maxbytes;
int bytes_to_do;
maxbytes = PAGE_SIZE - *ppos;
if(maxbytes >lbuf)
bytes_to_do = lbuf;
else
bytes_to_do = maxbytes;
buffer[strcspn(buffer, "\n")] = 0; // Remove End of line character
printk("DEBUG-device_read: To User %s and bytes_to_do %d ppos %lld\n", buffer + *ppos, bytes_to_do, *ppos);
nbytes = bytes_to_do - copy_to_user(
buf, /* to */
buffer + *ppos, /* from */
bytes_to_do); /* how many bytes*/
*ppos += nbytes;
pr_info("DEBUG-device_read: Data send to app %s, nbytes=%d\n", buffer, nbytes);
return nbytes;} /* Every Device is like a file - this is device file operation */ static struct file_operations device_fops = {
.owner = THIS_MODULE,
.write = device_write,
.open = device_open,
.read = device_read,};
The Unix convention for indicating end-of-file is to have read return 0 bytes.
In this case, cat asks for 131072 bytes and only receives 4096. This is normal and not to be interpreted as having reached the end of the file. For example, it happens when you read from the keyboard but the user only inputs a small amount of data.
Because cat has not yet seen EOF (i.e. read did not return 0), it continues to issue read calls until it does. This means that if there's any data, you will always see a minimum of two read calls: one (or more) for the data, and one final one that returns 0.

ioctl() call resets file descriptor to 0

Consider the following code:
file_fd = open(device, O_RDWR);
if (file_fd < 0) {
perror("open");
return -1;
}
printf("File descriptor: %d\n", file_fd);
uint32_t DskSize;
if (ioctl(file_fd, BLKGETSIZE, &DskSize) < 0) {
perror("ioctl");
return -1;
}
printf("File descriptor after: %d\n", file_fd);
This snippet yields this:
File descriptor: 3
File descriptor after: 0
Why does my file descriptor get reset to 0? The program writes the stuff out to stdout instead of my block device.
This should not happen. I expect my file_fd to be non-zero and retain its value.
Looks like you smash your stack.
Since there are only two stack variables file_fd and DskSize and changing DskSize changes file_fd suggests that DiskSize must be unsigned long or size_t (a 64-bit value), not uint32_t.
Looking at BLKGETSIZE implementation confirms that the value type is unsigned long.
You may like to run your applications under valgrind, it reports this kind of errors.

Accessing another process virtual memory in Linux (debugging)

How does gdb access another process virtual memory on Linux? Is it all done via /proc?
How does gdb access another process virtual memory on Linux? Is it all done via /proc?
On Linux for reading memory:
1) If the number of bytes to read is fewer than 3 * sizeof (long) or the filesystem /proc is unavailable or reading from /proc/PID/mem is unsuccessful then ptrace is used with PTRACE_PEEKTEXT to read data.
These are these conditions in the function linux_proc_xfer_partial():
/* Don't bother for one word. */
if (len < 3 * sizeof (long))
return 0;
/* We could keep this file open and cache it - possibly one per
thread. That requires some juggling, but is even faster. */
xsnprintf (filename, sizeof filename, "/proc/%d/mem",
ptid_get_pid (inferior_ptid));
fd = gdb_open_cloexec (filename, O_RDONLY | O_LARGEFILE, 0);
if (fd == -1)
return 0;
2) If the number of bytes to read is greater or equal to 3 * sizeof (long) and /proc is available then pread64 or (lseek() and read() are used:
static LONGEST
linux_proc_xfer_partial (struct target_ops *ops, enum target_object object,
const char *annex, gdb_byte *readbuf,
const gdb_byte *writebuf,
ULONGEST offset, LONGEST len)
{
.....
/* If pread64 is available, use it. It's faster if the kernel
supports it (only one syscall), and it's 64-bit safe even on
32-bit platforms (for instance, SPARC debugging a SPARC64
application). */
#ifdef HAVE_PREAD64
if (pread64 (fd, readbuf, len, offset) != len)
#else
if (lseek (fd, offset, SEEK_SET) == -1 || read (fd, readbuf, len) != len)
#endif
ret = 0;
else
ret = len;
close (fd);
return ret;
}
On Linux for writing memory:
1) ptrace with PTRACE_POKETEXT or PTRACE_POKEDATA is used.
As for your second question:
where can I find information about ... setting hardware watchpoints
gdb, Internals Watchpoint:s http://sourceware.org/gdb/wiki/Internals%20Watchpoints
Reference:
http://linux.die.net/man/2/ptrace
http://www.alexonlinux.com/how-debugger-works

Capturing user-space assembly with ftrace and kprobes (by using virtual address translation)?

Apologies for the longish post, I'm having trouble formulating it in a shorter way. Also, maybe this is more appropriate for Unix & Linux Stack Exchange, but I'll try here at SO first, as there is an ftrace tag.
Anyways - I'd like to observe do machine instructions of a user program execute in the context of a full function_graph capture using ftrace. One problem is that I need this for an older kernel:
$ uname -a
Linux mypc 2.6.38-16-generic #67-Ubuntu SMP Thu Sep 6 18:00:43 UTC 2012 i686 i686 i386 GNU/Linux
... and in this edition, there is no UPROBES - which, as Uprobes in 3.5 [LWN.net] notes, should be able to do something like that. (As long as I don't have to patch the original kernel, I would be willing to try a kernel module built out of tree, as User-Space Probes (Uprobes) [chunghwan.com] seems to demonstrate; but as far as I can see from 0: Inode based uprobes [LWN.net], the 2.6 would probably need a full patch)
However, on this version, there is a /sys/kernel/debug/kprobes, and /sys/kernel/debug/tracing/kprobe_events; and Documentation/trace/kprobetrace.txt implies that a kprobe can be set directly on an address; even if I cannot find an example anywhere on how this is used.
In any case, I would still not be sure what addresses to use - as a small example, let's say I want to trace the start of the main function of the wtest.c program (included below). I can do this to compile and obtain an machine instruction assembly listing:
$ gcc -g -O0 wtest.c -o wtest
$ objdump -S wtest | less
...
08048474 <main>:
int main(void) {
8048474: 55 push %ebp
8048475: 89 e5 mov %esp,%ebp
8048477: 83 e4 f0 and $0xfffffff0,%esp
804847a: 83 ec 30 sub $0x30,%esp
804847d: 65 a1 14 00 00 00 mov %gs:0x14,%eax
8048483: 89 44 24 2c mov %eax,0x2c(%esp)
8048487: 31 c0 xor %eax,%eax
char filename[] = "/tmp/wtest.txt";
...
return 0;
804850a: b8 00 00 00 00 mov $0x0,%eax
}
...
I would set up ftrace logging via this script:
sudo bash -c '
KDBGPATH="/sys/kernel/debug/tracing"
echo function_graph > $KDBGPATH/current_tracer
echo funcgraph-abstime > $KDBGPATH/trace_options
echo funcgraph-proc > $KDBGPATH/trace_options
echo 0 > $KDBGPATH/tracing_on
echo > $KDBGPATH/trace
echo 1 > $KDBGPATH/tracing_on ; ./wtest ; echo 0 > $KDBGPATH/tracing_on
cat $KDBGPATH/trace > wtest.ftrace
'
You can see a portion of the (otherwise complex) resulting ftrace log in debugging - Observing a hard-disk write in kernel space (with drivers/modules) - Unix & Linux Stack Exchange (where I got the example from).
Basically, I'd want a printout in this ftrace log, when the first instructions of main - say, the instructions at 0x8048474, 0x8048475, 0x8048477, 0x804847a, 0x804847d, 0x8048483 and 0x8048487 - are executed by (any) CPU. The problem is, as far as I can understand from Anatomy of a Program in Memory : Gustavo Duarte, these addresses are the virtual addresses, as seen from the perspective of the process itself (and I gather, the same perspective is shown by /proc/PID/maps)... And apparently, for krpobe_event I'd need a physical address?
So, my idea would be: if I can find the physical addresses corresponding to the virtual addresses of the program disassembly (say by coding a kernel module, which would accept pid and address, and return the physical address via procfs), I could set up addresses as a sort of "tracepoints" via /sys/kernel/debug/tracing/kprobe_events in the above script - and hopefully get them in the ftrace log. Could this work, in principle?
One problem with this, I found on Linux(ubuntu), C language: Virtual to Physical Address Translation - Stack Overflow:
In user code, you can't know the physical address corresponding to a virtual address. This is information is simply not exported outside the kernel. It could even change at any time, especially if the kernel decides to swap out part of your process's memory.
...
Pass the virtual address to the kernel using systemcall/procfs and use vmalloc_to_pfn. Return the Physical address through procfs/registers.
However, vmalloc_to_pfn doesn't seem to be trivial either:
x86 64 - vmalloc_to_pfn returns 32 bit address on Linux 32 system. Why does it chop off higher bits of PAE physical address? - Stack Overflow
VA: 0xf8ab87fc PA using vmalloc_to_pfn: 0x36f7f7fc. But I'm actually expecting: 0x136f7f7fc.
...
The physical address falls between 4 to 5 GB. But I can't get the exact physical address, I only get the chopped off 32-bit address. Is there another way to get true physical address?
So, I'm not sure how reliably I could extract the physical addresses so they are traced by kprobes - especially since "it could even change at any time". But here, I would hope that since the program is small and trivial, there would be a reasonable chance that the program would not swap while being traced, allowing for a proper capture to be obtained. (So even if I have to run the debug script above multiple times, as long as I can hope to obtain a "proper" capture once out of 10 times (or even 100 times), I'd be OK with it.).
Note that I'd want an output through ftrace, so that the timestamps are expressed in the same domain (see Reliable Linux kernel timestamps (or adjustment thereof) with both usbmon and ftrace? - Stack Overflow for an illustration of a problem with timestamps). Thus, even if I could come up with, say, a gdb script, to run and trace the program from userspace (while simultaneously an ftrace capture is obtained) - I'd like to avoid that, as the overhead from gdb itself will show in the ftrace logs.
So, in summary:
Is the approach of obtaining (possibly through a separate kernel module) physical addresses from the virtual (from a disassembly of an executable) addresses - so they are used to trigger a kprobe_event logged by ftrace - worth pursuing? If so, are there any examples of kernel modules that can be used for this address translation purpose?
Could I otherwise use a kernel module to "register" a callback/handler function when a particular memory address is being executed? Then I could simply use a trace_printk in that function to have an ftrace log (or even without that, the handler function name itself should show in the ftrace log), and it doesn't seem there will be too much overhead with that...
Actually, in this 2007 posting, Jim Keniston - utrace-based uprobes: systemtap mailing list, there is a 11. Uprobes Example (added to Documentation/uprobes.txt), which seems to be exactly that - a kernel module registering a handler function. Unfortunately, it uses linux/uprobes.h; and I have only kprobes.h in my /usr/src/linux-headers-2.6.38-16/include/linux/. Also, on my system, even systemtap complains about CONFIG_UTRACE not being enabled (see this comment)... So if there's any other approach I could use to obtain a debug trace like I want, without having to recompile the kernel to get uprobes, it would be great to know...
wtest.c:
#include <stdio.h>
#include <fcntl.h> // O_CREAT, O_WRONLY, S_IRUSR
int main(void) {
char filename[] = "/tmp/wtest.txt";
char buffer[] = "abcd";
int fd;
mode_t perms = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
fd = open(filename, O_RDWR|O_CREAT, perms);
write(fd,buffer,4);
close(fd);
return 0;
}
Obviously, this would be much easier with built-in uprobes on kernels 3.5+; but given that uprobes for my kernel 2.6.38 is a very deep-going patch (which I couldn't really isolate in a separate kernel module, so as to avoid patching the kernel), here is what I can note for a standalone module on 2.6.38. (Since I'm still unsure of many things, I would still like to see an answer that would corrects any misunderstandings in this post.)
I think I got somewhere, but not with kprobes. I'm not sure, but it seems I managed to get physical addresses right; however, kprobes documentation is specific that when using "#ADDR : fetch memory at ADDR (ADDR should be in kernel)"; and the physical addresses I get are below kernel boundary of 0xc0000000 (but then, 0xc0000000 is usually together with the virtual memory layout?).
So I used a hardware breakpoint instead - the module is below, however caveat emptor - it behaves randomly, and occasionally can cause a kernel oops!. By compiling the module, and running in bash:
$ sudo bash -c 'KDBGPATH="/sys/kernel/debug/tracing" ;
echo function_graph > $KDBGPATH/current_tracer ; echo funcgraph-abstime > $KDBGPATH/trace_options
echo funcgraph-proc > $KDBGPATH/trace_options ; echo 8192 > $KDBGPATH/buffer_size_kb ;
echo 0 > $KDBGPATH/tracing_on ; echo > $KDBGPATH/trace'
$ sudo insmod ./callmodule.ko && sleep 0.1 && sudo rmmod callmodule && \
tail -n25 /var/log/syslog | tee log.txt && \
sudo cat /sys/kernel/debug/tracing/trace >> log.txt
... I get a log. I want to trace the first two instructions of the main() of wtest, which for me are:
$ objdump -S wtest/wtest | grep -A3 'int main'
int main(void) {
8048474: 55 push %ebp
8048475: 89 e5 mov %esp,%ebp
8048477: 83 e4 f0 and $0xfffffff0,%esp
... at virtual addresses 0x08048474 and 0x08048475. In the syslog output, I could get, say:
...
[ 1106.383011] callmodule: parent task a: f40a9940 c: kworker/u:1 p: [14] s: stopped
[ 1106.383017] callmodule: - wtest [9404]
[ 1106.383023] callmodule: Trying to walk page table; addr task 0xEAE90CA0 ->mm ->start_code: 0x08048000 ->end_code: 0x080485F4
[ 1106.383029] callmodule: walk_ 0x8048000 callmodule: Valid pgd : Valid pud: Valid pmd: page frame struct is # f63e5d80; *virtual (page_address) # (null) (is_vmalloc_addr 0 virt_addr_valid 0 virt_to_phys 0x40000000) page_to_pfn 639ec page_to_phys 0x639ec000
[ 1106.383049] callmodule: walk_ 0x80483c0 callmodule: Valid pgd : Valid pud: Valid pmd: page frame struct is # f63e5d80; *virtual (page_address) # (null) (is_vmalloc_addr 0 virt_addr_valid 0 virt_to_phys 0x40000000) page_to_pfn 639ec page_to_phys 0x639ec000
[ 1106.383067] callmodule: walk_ 0x8048474 callmodule: Valid pgd : Valid pud: Valid pmd: page frame struct is # f63e5d80; *virtual (page_address) # (null) (is_vmalloc_addr 0 virt_addr_valid 0 virt_to_phys 0x40000000) page_to_pfn 639ec page_to_phys 0x639ec000
[ 1106.383083] callmodule: physaddr : (0x080483c0 ->) 0x639ec3c0 : (0x08048474 ->) 0x639ec474
[ 1106.383106] callmodule: 0x08048474 id [3]
[ 1106.383113] callmodule: 0x08048475 id [4]
[ 1106.383118] callmodule: (( 0x08048000 is_vmalloc_addr 0 virt_addr_valid 0 ))
[ 1106.383130] callmodule: cont pid task a: eae90ca0 c: wtest p: [9404] s: runnable
[ 1106.383147] initcall callmodule_init+0x0/0x1000 [callmodule] returned with preemption imbalance
[ 1106.518074] callmodule: < exit
... meaning that it mapped the virtual address 0x08048474 to physical address 0x639ec474. However, the physical is not used for hardware breakpoints - there we can supply a virtual address directly to register_user_hw_breakpoint; however, we also need to supply the task_struct of the process too. With that, I can get something like this in the ftrace output:
...
597.907256 | 1) wtest-5339 | | handle_mm_fault() {
...
597.907310 | 1) wtest-5339 | + 35.627 us | }
597.907311 | 1) wtest-5339 | + 46.245 us | }
597.907312 | 1) wtest-5339 | + 56.143 us | }
597.907313 | 1) wtest-5339 | 1.039 us | up_read();
597.907317 | 1) wtest-5339 | 1.285 us | native_get_debugreg();
597.907319 | 1) wtest-5339 | 1.075 us | native_set_debugreg();
597.907322 | 1) wtest-5339 | 1.129 us | native_get_debugreg();
597.907324 | 1) wtest-5339 | 1.189 us | native_set_debugreg();
597.907329 | 1) wtest-5339 | | () {
597.907333 | 1) wtest-5339 | | /* callmodule: hwbp hit: id [3] */
597.907334 | 1) wtest-5339 | 5.567 us | }
597.907336 | 1) wtest-5339 | 1.123 us | native_set_debugreg();
597.907339 | 1) wtest-5339 | 1.130 us | native_get_debugreg();
597.907341 | 1) wtest-5339 | 1.075 us | native_set_debugreg();
597.907343 | 1) wtest-5339 | 1.075 us | native_get_debugreg();
597.907345 | 1) wtest-5339 | 1.081 us | native_set_debugreg();
597.907348 | 1) wtest-5339 | | () {
597.907350 | 1) wtest-5339 | | /* callmodule: hwbp hit: id [4] */
597.907351 | 1) wtest-5339 | 3.033 us | }
597.907352 | 1) wtest-5339 | 1.105 us | native_set_debugreg();
597.907358 | 1) wtest-5339 | 1.315 us | down_read_trylock();
597.907360 | 1) wtest-5339 | 1.123 us | _cond_resched();
597.907362 | 1) wtest-5339 | 1.027 us | find_vma();
597.907364 | 1) wtest-5339 | | handle_mm_fault() {
...
... where the traces corresponding to the assembly are marked by breakpoint id. Thankfully, they are right after another, as expected; however, ftrace has also captured some debug commands in-between. In any case, this is what I wanted to see.
Here are some notes about the module:
Most of the module is from Execute/invoke user-space program, and get its pid, from a kernel module ; where a user process is started and pid obtained
Since we have to get to the task_struct to get to the pid; here I save both (which is kind of redundant)
Where functions symbols are not exported; if the symbol is in kallsyms, then I use a function pointer to the address; else other needed functions are copied from source
I didn't know how to start the user-space process stopped, so after spawning I issue a SIGSTOP (which on its own, seems kind of unreliable at that point), and set state to __TASK_STOPPED).
I may still get status "runnable" where I don't expect it sometimes - however, if the init exits early with an error, I've noticed wtest hanging in process list long after it would have terminated naturally, so I guess that works.
To get absolute/physical addresses, I used Walking page tables of a process in Linux to get to the page corresponding to a virtual address, and then digging through kernel sources I found page_to_phys() to get to the address (internally via page frame number); LDD3 ch.15 helps with understanding relationship between pfn and physical address.
Since here I expect to have physical address, I don't use PAGE_SHIFT, but calculate offsets directly from objdump's assembly output - I am not 100% sure this is correct, though.
Note, ( see also How to get a struct page from any address in the Linux kernel ), the module output says that the virtual address 0x08048000 is neither is_vmalloc_addr nor virt_addr_valid; I guess, this should tell me, one couldn't have used neither vmalloc_to_pfn() nor virt_to_page() to get to its physical address !?
Setting up kprobes for ftrace from kernel space is kinda tricky (needs functions copied)
Trying to set a kprobe on the physical addresses I get (e.g. 0x639ec474), always results with "Could not insert probe(-22)"
Just to see if the format is parsed, I'm trying with the kallsyms address of the tracing_on() function (0xc10bcf60) below; that seems to work - because it raises a fatal "BUG: scheduling while atomic" (apparently, we're not meant to set breakpoints in module_init?). Bug is fatal, because it makes the kprobes directory dissapear from the ftrace debug directory
Just creating the kprobe would not make it appear in the ftrace log - it also needs to be enabled; the necessary code for enabling is there - but I've never tried it, because of the previous bug
Finally, the breakpoint setting is from Watch a variable (memory address) change in Linux kernel, and print stack trace when it changes?
I've never seen an example for setting an executable hardware breakpoint; it kept failing for me, until through kernel source search, I found that for HW_BREAKPOINT_X, attr.bp_len need to be set to sizeof(long)
If I try to printk the attr variable(s) - from _init or from the handler - something gets seriously messed up, and whatever variable I try to print next, I get value 0x5 (or 0x48) for it (?!)
Since I'm trying to use a single handler function for both breakpoints, the only reliable piece of info that survives from _init to the handler, able to differentiate between the two, seems to be bp->id
These id's are autoassigned, and seems they are not re-claimed if you unregister the breakpoints (I do not unregister them to avoid extra ftrace printouts).
As far as the randomness goes, I think this is because the process is not started in a stopped state; and by the time it gets stopped, it ends up in a different state (or, quite possibly, I'm missing some locking somewhere). Anyways, you can also expect in syslog:
[ 1661.815114] callmodule: Trying to walk page table; addr task 0xEAF68CA0 ->mm ->start_code: 0x08048000 ->end_code: 0x080485F4
[ 1661.815319] callmodule: walk_ 0x8048000 callmodule: Valid pgd : Valid pud: Valid pmd: page frame struct is # f5772000; *virtual (page_address) # c0000000 (is_vmalloc_addr 0 virt_addr_valid 1 virt_to_phys 0x0) page_to_pfn 0 page_to_phys 0x0
[ 1661.815837] callmodule: walk_ 0x80483c0 callmodule: Valid pgd : Valid pud: Valid pmd: page frame struct is # f5772000; *virtual (page_address) # c0000000 (is_vmalloc_addr 0 virt_addr_valid 1 virt_to_phys 0x0) page_to_pfn 0 page_to_phys 0x0
[ 1661.816846] callmodule: walk_ 0x8048474 callmodule: Valid pgd : Valid pud: Valid pmd: page frame struct is # f5772000; *virtual (page_address) # c0000000 (is_vmalloc_addr 0 virt_addr_valid 1 virt_to_phys 0x0) page_to_pfn 0 page_to_phys 0x0
... that is, even with a proper task pointer (judging by start_code), only 0x0 is obtained as physical address. Sometimes you get the same outcome, but with start_code: 0x00000000 ->end_code: 0x00000000. And sometimes, a task_struct cannot be obtained, even if pid can:
[ 833.380417] callmodule:c: pid 7663
[ 833.380424] callmodule: everything all right; pid 7663 (7663)
[ 833.380430] callmodule: p is NULL - exiting
[ 833.516160] callmodule: < exit
Well, hopefully someone will comment and clarify some of the behavior of this module :)
Hope this helps someone,
Cheers!
Makefile:
EXTRA_CFLAGS=-g -O0
obj-m += callmodule.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
callmodule.c:
#include <linux/module.h>
#include <linux/slab.h> //kzalloc
#include <linux/syscalls.h> // SIGCHLD, ... sys_wait4, ...
#include <linux/kallsyms.h> // kallsyms_lookup, print_symbol
#include <linux/highmem.h> // ‘kmap_atomic’ (via pte_offset_map)
#include <asm/io.h> // page_to_phys (arch/x86/include/asm/io.h)
struct subprocess_infoB; // forward declare
// global variable - to avoid intervening too much in the return of call_usermodehelperB:
static int callmodule_pid;
static struct subprocess_infoB* callmodule_infoB;
#define TRY_USE_KPROBES 0 // 1 // enable/disable kprobes usage code
#include <linux/kprobes.h> // enable_kprobe
// for hardware breakpoint:
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
// define a modified struct (with extra fields) here:
struct subprocess_infoB {
struct work_struct work;
struct completion *complete;
char *path;
char **argv;
char **envp;
int wait; //enum umh_wait wait;
int retval;
int (*init)(struct subprocess_info *info);
void (*cleanup)(struct subprocess_info *info);
void *data;
pid_t pid;
struct task_struct *task;
unsigned long long last_page_physaddr;
};
struct subprocess_infoB *call_usermodehelper_setupB(char *path, char **argv,
char **envp, gfp_t gfp_mask);
static inline int
call_usermodehelper_fnsB(char *path, char **argv, char **envp,
int wait, //enum umh_wait wait,
int (*init)(struct subprocess_info *info),
void (*cleanup)(struct subprocess_info *), void *data)
{
struct subprocess_info *info;
struct subprocess_infoB *infoB;
gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
int ret;
populate_rootfs_wait();
infoB = call_usermodehelper_setupB(path, argv, envp, gfp_mask);
printk(KBUILD_MODNAME ":a: pid %d\n", infoB->pid);
info = (struct subprocess_info *) infoB;
if (info == NULL)
return -ENOMEM;
call_usermodehelper_setfns(info, init, cleanup, data);
printk(KBUILD_MODNAME ":b: pid %d\n", infoB->pid);
// this must be called first, before infoB->pid is populated (by __call_usermodehelperB):
ret = call_usermodehelper_exec(info, wait);
// assign global pid (and infoB) here, so rest of the code has it:
callmodule_pid = infoB->pid;
callmodule_infoB = infoB;
printk(KBUILD_MODNAME ":c: pid %d\n", callmodule_pid);
return ret;
}
static inline int
call_usermodehelperB(char *path, char **argv, char **envp, int wait) //enum umh_wait wait)
{
return call_usermodehelper_fnsB(path, argv, envp, wait,
NULL, NULL, NULL);
}
static void __call_usermodehelperB(struct work_struct *work)
{
struct subprocess_infoB *sub_infoB =
container_of(work, struct subprocess_infoB, work);
int wait = sub_infoB->wait; // enum umh_wait wait = sub_info->wait;
pid_t pid;
struct subprocess_info *sub_info;
// hack - declare function pointers
int (*ptrwait_for_helper)(void *data);
int (*ptr____call_usermodehelper)(void *data);
// assign function pointers to verbatim addresses as obtained from /proc/kallsyms
int killret;
struct task_struct *spawned_task;
ptrwait_for_helper = (void *)0xc1065b60;
ptr____call_usermodehelper = (void *)0xc1065ed0;
sub_info = (struct subprocess_info *)sub_infoB;
if (wait == UMH_WAIT_PROC)
pid = kernel_thread((*ptrwait_for_helper), sub_info, //(wait_for_helper, sub_info,
CLONE_FS | CLONE_FILES | SIGCHLD);
else
pid = kernel_thread((*ptr____call_usermodehelper), sub_info, //(____call_usermodehelper, sub_info,
CLONE_VFORK | SIGCHLD);
spawned_task = pid_task(find_vpid(pid), PIDTYPE_PID);
// stop/suspend/pause task
killret = kill_pid(find_vpid(pid), SIGSTOP, 1);
if (spawned_task!=NULL) {
// does this stop the process really?
spawned_task->state = __TASK_STOPPED;
printk(KBUILD_MODNAME ": : exst %d exco %d exsi %d diex %d inex %d inio %d\n", spawned_task->exit_state, spawned_task->exit_code, spawned_task->exit_signal, spawned_task->did_exec, spawned_task->in_execve, spawned_task->in_iowait);
}
printk(KBUILD_MODNAME ": : (kr: %d)\n", killret);
printk(KBUILD_MODNAME ": : pid %d (%p) (%s)\n", pid, spawned_task,
(spawned_task!=NULL)?((spawned_task->state==-1)?"unrunnable":((spawned_task->state==0)?"runnable":"stopped")):"null" );
// grab and save the pid (and task_struct) here:
sub_infoB->pid = pid;
sub_infoB->task = spawned_task;
switch (wait) {
case UMH_NO_WAIT:
call_usermodehelper_freeinfo(sub_info);
break;
case UMH_WAIT_PROC:
if (pid > 0)
break;
/* FALLTHROUGH */
case UMH_WAIT_EXEC:
if (pid < 0)
sub_info->retval = pid;
complete(sub_info->complete);
}
}
struct subprocess_infoB *call_usermodehelper_setupB(char *path, char **argv,
char **envp, gfp_t gfp_mask)
{
struct subprocess_infoB *sub_infoB;
sub_infoB = kzalloc(sizeof(struct subprocess_infoB), gfp_mask);
if (!sub_infoB)
goto out;
INIT_WORK(&sub_infoB->work, __call_usermodehelperB);
sub_infoB->path = path;
sub_infoB->argv = argv;
sub_infoB->envp = envp;
out:
return sub_infoB;
}
#if TRY_USE_KPROBES
// copy from /kernel/trace/trace_probe.c (is unexported)
int traceprobe_command(const char *buf, int (*createfn)(int, char **))
{
char **argv;
int argc, ret;
argc = 0;
ret = 0;
argv = argv_split(GFP_KERNEL, buf, &argc);
if (!argv)
return -ENOMEM;
if (argc)
ret = createfn(argc, argv);
argv_free(argv);
return ret;
}
// copy from kernel/trace/trace_kprobe.c?v=2.6.38 (is unexported)
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
struct fetch_param {
fetch_func_t fn;
void *data;
};
typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
enum {
FETCH_MTD_reg = 0,
FETCH_MTD_stack,
FETCH_MTD_retval,
FETCH_MTD_memory,
FETCH_MTD_symbol,
FETCH_MTD_deref,
FETCH_MTD_END,
};
// Fetch type information table * /
struct fetch_type {
const char *name; /* Name of type */
size_t size; /* Byte size of type */
int is_signed; /* Signed flag */
print_type_func_t print; /* Print functions */
const char *fmt; /* Fromat string */
const char *fmttype; /* Name in format file */
// Fetch functions * /
fetch_func_t fetch[FETCH_MTD_END];
};
struct probe_arg {
struct fetch_param fetch;
struct fetch_param fetch_size;
unsigned int offset; /* Offset from argument entry */
const char *name; /* Name of this argument */
const char *comm; /* Command of this argument */
const struct fetch_type *type; /* Type of this argument */
};
struct trace_probe {
struct list_head list;
struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long nhit;
unsigned int flags; /* For TP_FLAG_* */
const char *symbol; /* symbol name */
struct ftrace_event_class class;
struct ftrace_event_call call;
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
};
static int probe_is_return(struct trace_probe *tp)
{
return tp->rp.handler != NULL;
}
static int probe_event_enable(struct ftrace_event_call *call)
{
struct trace_probe *tp = (struct trace_probe *)call->data;
tp->flags |= TP_FLAG_TRACE;
if (probe_is_return(tp))
return enable_kretprobe(&tp->rp);
else
return enable_kprobe(&tp->rp.kp);
}
#define KPROBE_EVENT_SYSTEM "kprobes"
#endif // TRY_USE_KPROBES
// <<<<<<<<<<<<<<<<<<<<<<
static struct page *walk_page_table(unsigned long addr, struct task_struct *intask)
{
pgd_t *pgd;
pte_t *ptep, pte;
pud_t *pud;
pmd_t *pmd;
struct page *page = NULL;
struct mm_struct *mm = intask->mm;
callmodule_infoB->last_page_physaddr = 0ULL; // reset here, in case of early exit
printk(KBUILD_MODNAME ": walk_ 0x%lx ", addr);
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd) || pgd_bad(*pgd))
goto out;
printk(KBUILD_MODNAME ": Valid pgd ");
pud = pud_offset(pgd, addr);
if (pud_none(*pud) || pud_bad(*pud))
goto out;
printk( ": Valid pud");
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd) || pmd_bad(*pmd))
goto out;
printk( ": Valid pmd");
ptep = pte_offset_map(pmd, addr);
if (!ptep)
goto out;
pte = *ptep;
page = pte_page(pte);
if (page) {
callmodule_infoB->last_page_physaddr = (unsigned long long)page_to_phys(page);
printk( ": page frame struct is # %p; *virtual (page_address) # %p (is_vmalloc_addr %d virt_addr_valid %d virt_to_phys 0x%llx) page_to_pfn %lx page_to_phys 0x%llx", page, page_address(page), is_vmalloc_addr((void*)page_address(page)), virt_addr_valid(page_address(page)), (unsigned long long)virt_to_phys(page_address(page)), page_to_pfn(page), callmodule_infoB->last_page_physaddr);
}
//~ pte_unmap(ptep);
out:
printk("\n");
return page;
}
static void sample_hbp_handler(struct perf_event *bp,
struct perf_sample_data *data,
struct pt_regs *regs)
{
trace_printk(KBUILD_MODNAME ": hwbp hit: id [%llu]\n", bp->id );
//~ unregister_hw_breakpoint(bp);
}
// ----------------------
static int __init callmodule_init(void)
{
int ret = 0;
char userprog[] = "/path/to/wtest";
char *argv[] = {userprog, "2", NULL };
char *envp[] = {"HOME=/", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
struct task_struct *p;
struct task_struct *par;
struct task_struct *pc;
struct list_head *children_list_head;
struct list_head *cchildren_list_head;
char *state_str;
unsigned long offset, taddr;
int (*ptr_create_trace_probe)(int argc, char **argv);
struct trace_probe* (*ptr_find_probe_event)(const char *event, const char *group);
//int (*ptr_probe_event_enable)(struct ftrace_event_call *call); // not exported, copy
#if TRY_USE_KPROBES
char trcmd[256] = "";
struct trace_probe *tp;
#endif //TRY_USE_KPROBES
struct perf_event *sample_hbp, *sample_hbpb;
struct perf_event_attr attr, attrb;
printk(KBUILD_MODNAME ": > init %s\n", userprog);
ptr_create_trace_probe = (void *)0xc10d5120;
ptr_find_probe_event = (void *)0xc10d41e0;
print_symbol(KBUILD_MODNAME ": symbol # 0xc1065b60 is %s\n", 0xc1065b60); // shows wait_for_helper+0x0/0xb0
print_symbol(KBUILD_MODNAME ": symbol # 0xc1065ed0 is %s\n", 0xc1065ed0); // shows ____call_usermodehelper+0x0/0x90
print_symbol(KBUILD_MODNAME ": symbol # 0xc10d5120 is %s\n", 0xc10d5120); // shows create_trace_probe+0x0/0x590
ret = call_usermodehelperB(userprog, argv, envp, UMH_WAIT_EXEC);
if (ret != 0)
printk(KBUILD_MODNAME ": error in call to usermodehelper: %i\n", ret);
else
printk(KBUILD_MODNAME ": everything all right; pid %d (%d)\n", callmodule_pid, callmodule_infoB->pid);
tracing_on(); // earlier, so trace_printk of handler is caught!
// find the task:
rcu_read_lock();
p = pid_task(find_vpid(callmodule_pid), PIDTYPE_PID);
rcu_read_unlock();
if (p == NULL) {
printk(KBUILD_MODNAME ": p is NULL - exiting\n");
return 0;
}
state_str = (p->state==-1)?"unrunnable":((p->state==0)?"runnable":"stopped");
printk(KBUILD_MODNAME ": pid task a: %p c: %s p: [%d] s: %s\n",
p, p->comm, p->pid, state_str);
// find parent task:
par = p->parent;
if (par == NULL) {
printk(KBUILD_MODNAME ": par is NULL - exiting\n");
return 0;
}
state_str = (par->state==-1)?"unrunnable":((par->state==0)?"runnable":"stopped");
printk(KBUILD_MODNAME ": parent task a: %p c: %s p: [%d] s: %s\n",
par, par->comm, par->pid, state_str);
// iterate through parent's (and our task's) child processes:
rcu_read_lock(); // read_lock(&tasklist_lock);
list_for_each(children_list_head, &par->children){
p = list_entry(children_list_head, struct task_struct, sibling);
printk(KBUILD_MODNAME ": - %s [%d] \n", p->comm, p->pid);
if (p->pid == callmodule_pid) {
list_for_each(cchildren_list_head, &p->children){
pc = list_entry(cchildren_list_head, struct task_struct, sibling);
printk(KBUILD_MODNAME ": - - %s [%d] \n", pc->comm, pc->pid);
}
}
}
rcu_read_unlock(); //~ read_unlock(&tasklist_lock);
// NOTE: here p == callmodule_infoB->task !!
printk(KBUILD_MODNAME ": Trying to walk page table; addr task 0x%X ->mm ->start_code: 0x%08lX ->end_code: 0x%08lX \n", (unsigned int) callmodule_infoB->task, callmodule_infoB->task->mm->start_code, callmodule_infoB->task->mm->end_code);
walk_page_table(0x08048000, callmodule_infoB->task);
// 080483c0 is start of .text; 08048474 start of main; for objdump -S wtest
walk_page_table(0x080483c0, callmodule_infoB->task);
walk_page_table(0x08048474, callmodule_infoB->task);
if (callmodule_infoB->last_page_physaddr != 0ULL) {
printk(KBUILD_MODNAME ": physaddr ");
taddr = 0x080483c0; // .text
offset = taddr - callmodule_infoB->task->mm->start_code;
printk(": (0x%08lx ->) 0x%08llx ", taddr, callmodule_infoB->last_page_physaddr+offset);
taddr = 0x08048474; // main
offset = taddr - callmodule_infoB->task->mm->start_code;
printk(": (0x%08lx ->) 0x%08llx ", taddr, callmodule_infoB->last_page_physaddr+offset);
printk("\n");
#if TRY_USE_KPROBES // can't use this here (BUG: scheduling while atomic, if probe inserts)
//~ sprintf(trcmd, "p:myprobe 0x%08llx", callmodule_infoB->last_page_physaddr+offset);
// try symbol for c10bcf60 - tracing_on
sprintf(trcmd, "p:myprobe 0x%08llx", (unsigned long long)0xc10bcf60);
ret = traceprobe_command(trcmd, ptr_create_trace_probe); //create_trace_probe);
printk("%s -- ret: %d\n", trcmd, ret);
// try find probe and enable it (compiles, but untested):
tp = ptr_find_probe_event("myprobe", KPROBE_EVENT_SYSTEM);
if (tp != NULL) probe_event_enable(&tp->call);
#endif //TRY_USE_KPROBES
}
hw_breakpoint_init(&attr);
attr.bp_len = sizeof(long); //HW_BREAKPOINT_LEN_1;
attr.bp_type = HW_BREAKPOINT_X ;
attr.bp_addr = 0x08048474; // main
sample_hbp = register_user_hw_breakpoint(&attr, (perf_overflow_handler_t)sample_hbp_handler, p);
printk(KBUILD_MODNAME ": 0x08048474 id [%llu]\n", sample_hbp->id); //
if (IS_ERR((void __force *)sample_hbp)) {
int ret = PTR_ERR((void __force *)sample_hbp);
printk(KBUILD_MODNAME ": Breakpoint registration failed (%d)\n", ret);
//~ return ret;
}
hw_breakpoint_init(&attrb);
attrb.bp_len = sizeof(long);
attrb.bp_type = HW_BREAKPOINT_X ;
attrb.bp_addr = 0x08048475; // first instruction after main
sample_hbpb = register_user_hw_breakpoint(&attrb, (perf_overflow_handler_t)sample_hbp_handler, p);
printk(KBUILD_MODNAME ": 0x08048475 id [%llu]\n", sample_hbpb->id); //45
if (IS_ERR((void __force *)sample_hbpb)) {
int ret = PTR_ERR((void __force *)sample_hbpb);
printk(KBUILD_MODNAME ": Breakpoint registration failed (%d)\n", ret);
//~ return ret;
}
printk(KBUILD_MODNAME ": (( 0x08048000 is_vmalloc_addr %d virt_addr_valid %d ))\n", is_vmalloc_addr((void*)0x08048000), virt_addr_valid(0x08048000));
kill_pid(find_vpid(callmodule_pid), SIGCONT, 1); // resume/continue/restart task
state_str = (p->state==-1)?"unrunnable":((p->state==0)?"runnable":"stopped");
printk(KBUILD_MODNAME ": cont pid task a: %p c: %s p: [%d] s: %s\n",
p, p->comm, p->pid, state_str);
return 0;
}
static void __exit callmodule_exit(void)
{
tracing_off(); //corresponds to the user space /sys/kernel/debug/tracing/tracing_on file
printk(KBUILD_MODNAME ": < exit\n");
}
module_init(callmodule_init);
module_exit(callmodule_exit);
MODULE_LICENSE("GPL");

Resources