I use the readelf utility to check (-h) an executable file, and i see the e_entry field has the value: 0x8048530 . Then i recompile the checked program and have it to print its own program entry by adding the line: printf("%p\n", (void*)main) and outputs: 0x80485e4. Why do i have this difference? (OS: Linux 32-bit)
The entry point of an executable is usually not main itself but a platform specific function (that we'll call _start) which performs initialization before calling main.
Answering the question "Can i access the _start label from the main body?":
#include <stdio.h>
int main()
{
void* res;
#if defined(__i386__)
asm("movl _start, %%eax" : "=a" (res));
#elif defined(__x86_64__)
asm("movq _start, %%rax" : "=a" (res));
#else
#error Unsupported architecture
#endif
printf("%p\n", res);
return 0;
}
Related
I created a simple code in c++ that need to be crashed. I am getting back a backtrace with this error:
/prbsft/bins/Main(_Z5FuncCv+0x14)[0x5571ea64dd80]
Now I'm trying to use addr2line to get the error line in the function.
So I used:
addr2line -e /prbsft/bins/prbMain 0x5594262a8d80
But all I got is 0:??.
I have also tried to use 0x14 address instead of 0x5594262a8d80 but it returns the same result.
I'm using Ubuntu. addr2line version is:
GNU addr2line (GNU Binutils for Ubuntu) 2.30
Any idea?
Here is the output:
Program received signal SIGSEGV, Segmentation fault. 0x0000555555554d80 in FuncC () at main.cpp:34
warning: Source file is more recent than executable.
34 std::cout << k->n << std::endl;
(gdb) bt
#0 0x0000555555554d80 in FuncC () at main.cpp:34
#1 0x0000555555554db1 in FuncB () at main.cpp:39
#2 0x0000555555554dbd in FuncA () at main.cpp:44
#3 0x0000555555554dda in main () at main.cpp:53
The 0x55XXXXXXXXXX address you got is most likely the memory address of the function after the EXE is loaded from disk. addr2line only recognizes VMA addresses like the ones that you got from disassembling with objdump.
Let's call your function Foo. addr2line expects FooVMA or if you're using --section=.text, then Foofile - textfile. Functions like backtrace returns Foomem. One easy way that works most of the time is to calculate FooVMA = Foofile = Foomem - ELFmem. But that assumes VMAbase = 0, which is not true for all linkers (i.e. linker scripts). Examples would be the GCC 5.4 on Ubuntu 16 (0x400000) and clang 11 on MacOS (0x100000000). Here's an example that uses dladdr & dladdr1 to translate it to the VMA address.
#include <execinfo.h>
#include <link.h>
#include <stdlib.h>
#include <stdio.h>
// converts a function's address in memory to its VMA address in the executable file. VMA is what addr2line expects
size_t ConvertToVMA(size_t addr)
{
Dl_info info;
link_map* link_map;
dladdr1((void*)addr,&info,(void**)&link_map,RTLD_DL_LINKMAP);
return addr-link_map->l_addr;
}
void PrintCallStack()
{
void *callstack[128];
int frame_count = backtrace(callstack, sizeof(callstack)/sizeof(callstack[0]));
for (int i = 0; i < frame_count; i++)
{
char location[1024];
Dl_info info;
if(dladdr(callstack[i],&info))
{
// use addr2line; dladdr itself is rarely useful (see doc)
char command[256];
size_t VMA_addr=ConvertToVMA((size_t)callstack[i]);
//if(i!=crash_depth)
VMA_addr-=1; // https://stackoverflow.com/questions/11579509/wrong-line-numbers-from-addr2line/63841497#63841497
snprintf(command,sizeof(command),"addr2line -e %s -Ci %zx",info.dli_fname,VMA_addr);
system(command);
}
}
}
void Foo()
{
PrintCallStack();
}
int main()
{
Foo();
return 0;
}
I'm trying to create a linux kernel module that will disable the data cache. I'm trying to use the v7_exit_coherency_flush(all) function in arch/arm/include/asm/cacheflush.h, and this function calls v7_flush_dcache_all, which I found is in arch/arm/mm/arch-v7.S.
My issue is that when I try to make my module, I get a warning
WARNING: "v7_flush_dcache_all [/home/pi/Documents/ARMHammer/kern/my_kernel/cache_disable.ko] undefined!
and when I try to insert the module I get an error
insmod: ERROR: could not insert module cache_disable.ko: Unknown symbol in module
So it looks like that ach-v7.S file isn't being read. I tried simply including it in my main file, but that produced a lot of errors, probably because its an assembly file.
I'm pretty much stuck at this point, is there someway I can include the assembly file in the Makefile, or maybe I'm not including all of the necessary .h files?
For what its worth, here's my main file
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h> /* For current */
#include <linux/tty.h> /* For the tty declarations */
#include <linux/version.h> /* For LINUX_VERSION_CODE */
#include <linux/mm.h>
#include <asm/cp15.h>
#include <asm/cacheflush.h>
#include <asm/glue-cache.h>
#include <asm/shmparam.h>
#include <asm/cachetype.h>
#include <asm/outercache.h>
// #include "my_cache-v7.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Peter Jay Salzman");
static void print_string(char *str)
{
struct tty_struct *my_tty;
#if ( LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,5) )
my_tty = current->tty;
#else
my_tty = current->signal->tty;
#endif
if (my_tty != NULL) {
((my_tty->ops)->write) (my_tty, /* The tty itself */
#if ( LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,9) )
0, /* Don't take the string
from user space */
#endif
str, /* String */
strlen(str)); /* Length */
#if ( LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,9) )
((my_tty->ops)->write) (my_tty, 0, "\015\012", 2);
#else
((my_tty->ops)->write) (my_tty, "\015\012", 2);
#endif
}
}
static int __init print_string_init(void)
{
v7_exit_coherency_flush(all);
print_string("The module has been inserted. Hello world!");
return 0;
}
static void __exit print_string_exit(void)
{
print_string("The module has been removed. Farewell world!");
}
module_init(print_string_init);
module_exit(print_string_exit);
and my Makefile
obj-m += cache_disable.o
KDIR = /home/pi/linux/
all:
make -C $(KDIR) M=$(PWD) modules
clean:
make -C $(KDIR) M=$(PWD) clean
Also, if anybody knows an easier way to disable the cache, I'm all ears!
v7_exit_coherency_flush() is for power management code to take a CPU out of the kernel cleanly in order to power it off - it's not callable from random modules for very good reason. If you really want to lose data and crash the machine in weird and subtle ways, you might as well just bypass kernel functions entirely and use a trivial inline asm to hit the SCTLR directly*.
I dread to imagine what you're trying to achieve, but if you really want to run Linux (painfully slowly) with the cache off, you'll need to rebuild the kernel, turning off CONFIG_SMP in order to turn on CONFIG_CPU_DCACHE_DISABLE. That's the only vaguely supported method which might work.
* I'm not even going to explain that, it's that terrible an idea.
I am attempting to do the following - write a wrapper for the pthreads library that will log some information whenever each of its APIs it called.
One piece of info I would like to record is the stack trace.
Below is the minimal snippet from the original code that can be compiled and run AS IS.
Initializations (file libmutex.c):
#include <execinfo.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <dlfcn.h>
static int (*real_mutex_lock)(pthread_mutex_t *) __attribute__((__may_alias__));
static void *pthread_libhandle;
#ifdef _BIT64
#define PTHREAD_PATH "/lib64/libpthread.so.0"
#else
#define PTHREAD_PATH "/lib/libpthread.so.0"
#endif
static inline void load_real_function(char* function_name, void** real_func) {
char* msg;
*(void**) (real_func) = dlsym(pthread_libhandle, function_name);
msg = dlerror();
if (msg != NULL)
printf("init: real_%s load error %s\n", function_name, msg);
}
void __attribute__((constructor)) my_init(void) {
printf("init: trying to dlopen '%s'\n", PTHREAD_PATH);
pthread_libhandle = dlopen(PTHREAD_PATH, RTLD_LAZY);
if (pthread_libhandle == NULL) {
fprintf(stderr, "%s\n", dlerror());
exit(EXIT_FAILURE);
}
load_real_function("pthread_mutex_lock", (void**) &real_mutex_lock);
}
The wrapper and the call to backtrace.
I have chopped as much as possible from the methods, so yes, I know that I never call the original pthread_mutex_lock for example.
void my_backtrace(void) {
#define SIZE 100
void *buffer[SIZE];
int nptrs;
nptrs = backtrace(buffer, SIZE);
printf("backtrace() returned %d addresses\n", nptrs);
}
int pthread_mutex_lock(pthread_mutex_t *mutex) {
printf("In pthread_mutex_lock\n"); fflush(stdout);
my_backtrace();
return 0;
}
To test this I use this binary (file tst_mutex.c):
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
int main (int argc, char *argv[]) {
pthread_mutex_t x;
printf("Before mutex\n"); fflush(stdout);
pthread_mutex_lock(&x);
printf("after mutex\n");fflush(stdout);
return 0;
}
Here is the way all this is compiled:
rm -f *.o *.so tst_mutex
cc -Wall -D_BIT64 -c -m64 -fPIC libmutex.c
cc -m64 -o libmutex.so -shared -fPIC -ldl -lpthread libmutex.o
cc -Wall -m64 tst_mutex.c -o tst_mutex
and run
LD_PRELOAD=$(pwd)/libmutex.so ./tst_mutex
This crashes with segmentation fault on Linux x86.
On Linux PPC everything works flawlessly.
I have tried a few versions of GCC compilers, GLIBC libraries and Linux distros - all fail.
The output is
init: trying to dlopen '/lib64/libpthread.so.0'
Before mutex
In pthread_mutex_lock
In pthread_mutex_lock
In pthread_mutex_lock
...
...
./run.sh: line 1: 25023 Segmentation fault LD_PRELOAD=$(pwd)/libmutex.so ./tst_mutex
suggesting that there is a recursion here.
I have looked at the source code for backtrace() - there is no call in it to locking mechanism. All it does is a simple walk over the stack frame linked list.
I have also, checked the library code with objdump, but that hasn't revealed anything out of the ordinary.
What is happening here?
Any solution/workaround?
Oh, and maybe the most important thing. This only happens with the pthread_mutex_lock function!!
Printing the stack from any other overridden pthread_* function works just fine ...
It is a stack overflow, caused by an endless recursion (as remarked by #Chris Dodd).
The backtrace() function runs different system calls being called from programs compiled with pthread library and without. Even if no pthread functions are called explicitly by the program.
Here is a simple program that uses the backtrace() function and does not use any pthread function.
#include <stdio.h>
#include <stdlib.h>
#include <execinfo.h>
int main(void)
{
void* buffer[100];
int num_ret_addr;
num_ret_addr=backtrace(buffer, 100);
printf("returned number of addr %d\n", num_ret_addr);
return 0;
}
Lets compile it without linking to the pthread and inspect the program system calls with the strace utility. No mutex related system call appears in the output.
$ gcc -o backtrace_no_thread backtrace.c
$ strace -o backtrace_no_thread.out backtrace_no_thread
No lets compile the same code linking it to the pthread library, run the strace and look at its output.
$ gcc -o backtrace_with_thread backtrace.c -lpthread
$ strace -o backtrace_with_thread.out backtrace_with_thread
This time the output contains mutex related system calls (their names may depend on the platform). Here is a fragment of the strace output file obtained on an X86 Linux machine.
futex(0x3240553f80, FUTEX_WAKE_PRIVATE, 2147483647) = 0
futex(0x324480d350, FUTEX_WAKE_PRIVATE, 2147483647) = 0
I'm trying to write some simple test code as a demonstration of hooking the system call table.
"sys_call_table" is no longer exported in 2.6, so I'm just grabbing the address from the System.map file, and I can see it is correct (Looking through the memory at the address I found, I can see the pointers to the system calls).
However, when I try to modify this table, the kernel gives an "Oops" with "unable to handle kernel paging request at virtual address c061e4f4" and the machine reboots.
This is CentOS 5.4 running 2.6.18-164.10.1.el5. Is there some sort of protection or do I just have a bug? I know it comes with SELinux, and I've tried putting it in to permissive mode, but it doesn't make a difference
Here's my code:
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/unistd.h>
void **sys_call_table;
asmlinkage int (*original_call) (const char*, int, int);
asmlinkage int our_sys_open(const char* file, int flags, int mode)
{
printk("A file was opened\n");
return original_call(file, flags, mode);
}
int init_module()
{
// sys_call_table address in System.map
sys_call_table = (void*)0xc061e4e0;
original_call = sys_call_table[__NR_open];
// Hook: Crashes here
sys_call_table[__NR_open] = our_sys_open;
}
void cleanup_module()
{
// Restore the original call
sys_call_table[__NR_open] = original_call;
}
I finally found the answer myself.
http://www.linuxforums.org/forum/linux-kernel/133982-cannot-modify-sys_call_table.html
The kernel was changed at some point so that the system call table is read only.
cypherpunk:
Even if it is late but the Solution
may interest others too: In the
entry.S file you will find: Code:
.section .rodata,"a"
#include "syscall_table_32.S"
sys_call_table -> ReadOnly You have to
compile the Kernel new if you want to
"hack" around with sys_call_table...
The link also has an example of changing the memory to be writable.
nasekomoe:
Hi everybody. Thanks for replies. I
solved the problem long ago by
modifying access to memory pages. I
have implemented two functions that do
it for my upper level code:
#include <asm/cacheflush.h>
#ifdef KERN_2_6_24
#include <asm/semaphore.h>
int set_page_rw(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ | VM_WRITE;
return change_page_attr(pg, 1, prot);
}
int set_page_ro(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ;
return change_page_attr(pg, 1, prot);
}
#else
#include <linux/semaphore.h>
int set_page_rw(long unsigned int _addr)
{
return set_memory_rw(_addr, 1);
}
int set_page_ro(long unsigned int _addr)
{
return set_memory_ro(_addr, 1);
}
#endif // KERN_2_6_24
Here's a modified version of the original code that works for me.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/unistd.h>
#include <asm/semaphore.h>
#include <asm/cacheflush.h>
void **sys_call_table;
asmlinkage int (*original_call) (const char*, int, int);
asmlinkage int our_sys_open(const char* file, int flags, int mode)
{
printk("A file was opened\n");
return original_call(file, flags, mode);
}
int set_page_rw(long unsigned int _addr)
{
struct page *pg;
pgprot_t prot;
pg = virt_to_page(_addr);
prot.pgprot = VM_READ | VM_WRITE;
return change_page_attr(pg, 1, prot);
}
int init_module()
{
// sys_call_table address in System.map
sys_call_table = (void*)0xc061e4e0;
original_call = sys_call_table[__NR_open];
set_page_rw(sys_call_table);
sys_call_table[__NR_open] = our_sys_open;
}
void cleanup_module()
{
// Restore the original call
sys_call_table[__NR_open] = original_call;
}
Thanks Stephen, your research here was helpful to me. I had a few problems, though, as I was trying this on a 2.6.32 kernel, and getting WARNING: at arch/x86/mm/pageattr.c:877 change_page_attr_set_clr+0x343/0x530() (Not tainted) followed by a kernel OOPS about not being able to write to the memory address.
The comment above the mentioned line states:
// People should not be passing in unaligned addresses
The following modified code works:
int set_page_rw(long unsigned int _addr)
{
return set_memory_rw(PAGE_ALIGN(_addr) - PAGE_SIZE, 1);
}
int set_page_ro(long unsigned int _addr)
{
return set_memory_ro(PAGE_ALIGN(_addr) - PAGE_SIZE, 1);
}
Note that this still doesn't actually set the page as read/write in some situations. The static_protections() function, which is called inside of set_memory_rw(), removes the _PAGE_RW flag if:
It's in the BIOS area
The address is inside .rodata
CONFIG_DEBUG_RODATA is set and the kernel is set to read-only
I found this out after debugging why I still got "unable to handle kernel paging request" when trying to modify the address of kernel functions. I was eventually able to solve that problem by finding the page table entry for the address myself and manually setting it to writable. Thankfully, the lookup_address() function is exported in version 2.6.26+. Here is the code I wrote to do that:
void set_addr_rw(unsigned long addr) {
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
if (pte->pte &~ _PAGE_RW) pte->pte |= _PAGE_RW;
}
void set_addr_ro(unsigned long addr) {
unsigned int level;
pte_t *pte = lookup_address(addr, &level);
pte->pte = pte->pte &~_PAGE_RW;
}
Finally, while Mark's answer is technically correct, it'll case problem when ran inside Xen. If you want to disable write-protect, use the read/write cr0 functions. I macro them like this:
#define GPF_DISABLE write_cr0(read_cr0() & (~ 0x10000))
#define GPF_ENABLE write_cr0(read_cr0() | 0x10000)
Hope this helps anyone else who stumbles upon this question.
Note that the following will also work instead of using change_page_attr and cannot be depreciated:
static void disable_page_protection(void) {
unsigned long value;
asm volatile("mov %%cr0,%0" : "=r" (value));
if (value & 0x00010000) {
value &= ~0x00010000;
asm volatile("mov %0,%%cr0": : "r" (value));
}
}
static void enable_page_protection(void) {
unsigned long value;
asm volatile("mov %%cr0,%0" : "=r" (value));
if (!(value & 0x00010000)) {
value |= 0x00010000;
asm volatile("mov %0,%%cr0": : "r" (value));
}
}
If you are dealing with kernel 3.4 and later (it can also work with earlier kernels, I didn't test it) I would recommend a smarter way to acquire the system callы table location.
For example
#include <linux/module.h>
#include <linux/kallsyms.h>
static unsigned long **p_sys_call_table;
/* Aquire system calls table address */
p_sys_call_table = (void *) kallsyms_lookup_name("sys_call_table");
That's it. No addresses, it works fine with every kernel I've tested.
The same way you can use a not exported Kernel function from your module:
static int (*ref_access_remote_vm)(struct mm_struct *mm, unsigned long addr,
void *buf, int len, int write);
ref_access_remote_vm = (void *)kallsyms_lookup_name("access_remote_vm");
Enjoy!
As others have hinted, the whole story is a bit different now on modern kernels. I'll be covering x86-64 here, for syscall hijacking on modern arm64 refer to this other answer of mine. Also NOTE: this is plain and simple syscall hijacking. Non-invasive hooking can be done in a much nicer way using kprobes.
Since Linux v4.17, x86 (both 64 and 32 bit) now uses syscall wrappers that take a struct pt_regs * as the only argument (see commit 1, commit 2). You can see arch/x86/include/asm/syscall.h for the definitions.
Additionally, as others have described already in different answers, the simplest way to modify sys_call_table is to temporarily disable CR0 WP (Write-Protect) bit, which could be done using read_cr0() and write_cr0(). However, since Linux v5.3, [native_]write_cr0 will check sensitive bits that should never change (like WP) and refuse to change them (commit). In order to work around this, we need to write CR0 manually using inline assembly.
Here is a working kernel module (tested on Linux 5.10 and 5.18) that does syscall hijacking on modern Linux x86-64 considering the above caveats and assuming that you already know the address of sys_call_table (if you also want to find that in the module, see Proper way of getting the address of non-exported kernel symbols in a Linux kernel module):
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
/**
* Test syscall table hijacking on x86-64. This module will replace the `read`
* syscall with a simple wrapper which logs every invocation of `read` using
* printk().
*
* Tested on Linux x86-64 v5.10, v5.18.
*
* Usage:
*
* sudo cat /proc/kallsyms | grep sys_call_table # grab address
* sudo insmod syscall_hijack.ko sys_call_table_addr=0x<address_here>
*/
#include <linux/init.h> // module_{init,exit}()
#include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h> // printk(), pr_*()
#include <asm/special_insns.h> // {read,write}_cr0()
#include <asm/processor-flags.h> // X86_CR0_WP
#include <asm/unistd.h> // __NR_*
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
typedef long (*sys_call_ptr_t)(const struct pt_regs *);
static sys_call_ptr_t *real_sys_call_table;
static sys_call_ptr_t original_read;
static unsigned long sys_call_table_addr;
module_param(sys_call_table_addr, ulong, 0);
MODULE_PARM_DESC(sys_call_table_addr, "Address of sys_call_table");
// Since Linux v5.3 [native_]write_cr0 won't change "sensitive" CR0 bits, need
// to re-implement this ourselves.
static void write_cr0_unsafe(unsigned long val)
{
asm volatile("mov %0,%%cr0": "+r" (val) : : "memory");
}
static long myread(const struct pt_regs *regs)
{
pr_info("read(%ld, 0x%lx, %lx)\n", regs->di, regs->si, regs->dx);
return original_read(regs);
}
static int __init modinit(void)
{
unsigned long old_cr0;
real_sys_call_table = (typeof(real_sys_call_table))sys_call_table_addr;
pr_info("init\n");
// Temporarily disable CR0 WP to be able to write to read-only pages
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Overwrite syscall and save original to be restored later
original_read = real_sys_call_table[__NR_read];
real_sys_call_table[__NR_read] = myread;
// Restore CR0 WP
write_cr0_unsafe(old_cr0);
pr_info("init done\n");
return 0;
}
static void __exit modexit(void)
{
unsigned long old_cr0;
pr_info("exit\n");
old_cr0 = read_cr0();
write_cr0_unsafe(old_cr0 & ~(X86_CR0_WP));
// Restore original syscall
real_sys_call_table[__NR_read] = original_read;
write_cr0_unsafe(old_cr0);
pr_info("goodbye\n");
}
module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Test syscall table hijacking on x86-64.");
MODULE_AUTHOR("Marco Bonelli");
MODULE_LICENSE("Dual MIT/GPL");
I'm writing a kernel module under Scientific Linux 6.3 x86_64 and I'm looking to use kprobes. In this module, I need access to the first argument of a function on return, so jprobes are out.
I found this very helpful post: Getting function arguments using kprobes
However, when I try accessing regs->rdi inside my probe, the compiler complains with
error: ‘struct pt_regs’ has no member named ‘rdi’
During my module initialization, I run this check with no problems:
#ifndef CONFIG_X86_64
printk(KERN_ALERT "Error: this module only supports x86_64!\n");
return -EINVAL;
#endif
Is there anything else I should be looking at?
uname -r returns 2.6.32-279.14.1.el6.x86_64.debug
Here is a MWE:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include <linux/blkdev.h>
static int kprobe_test(struct kprobe *p, struct pt_regs *regs) {
printk(KERN_INFO "rdi: %p\n", regs->rdi);
return 0;
}
static struct kprobe myprobe = {
.pre_handler = NULL,
.post_handler = kprobe_test,
.fault_handler = NULL,
.addr = (kprobe_opcode_t *) generic_make_request,
};
int init_module(void) {
register_kprobe(&myprobe);
return 0;
}
void cleanup_module(void) {
unregister_kprobe(&myprobe);
}
Which results in:
...
/home/user/kmod/kprobe_64_mwe/kprobe_mwe.c:7: error: ‘struct pt_regs’ has no member named ‘rdi’
...
The definition of pt_reg changes when __KERNEL__ is defined. Try using di instead.