Why does this LLVM IR code lead to a segfault? - malloc

I'm making a programming language and I'm trying to implementing arrays.
This is the IR code that my compiler generated and I'm having trouble figuring out what might be wrong with it.
I allocate 8 bytes with malloc and I store the pointer as i32.
Then convert it to a i32* and attempt to write 65 (ascii 'A') to it
After that I try to read back the value and write it to stdout.
declare i32 #putchar(i32)
define i32 #readByte(){
%out = call i32 #getchar()
ret i32 %out
}
declare i32 #getchar()
define void #writeByte(i32 %value){
call i32 #putchar(i32 %value)
ret void
}
declare i32 #malloc(i32)
define i32 #main() {
%returnValue = alloca i32
%id910082450.val = add i32 0, 0
store i32 %id910082450.val, i32* %returnValue
%arr = alloca i32
%id399214524.val = add i32 0, 8
%id594130153.val = call i32 #malloc(i32 %id399214524.val)
store i32 %id594130153.val, i32* %arr
%id137404173.val = add i32 0, 65
%id174826494.tmp = load i32, i32* %arr
%id174826494.tmp2 = inttoptr i32 %id174826494.tmp to i32*
store i32 %id137404173.val, i32* %id174826494.tmp2
%id723212344.tmp = load i32, i32* %arr
%id723212344.tmp2 = inttoptr i32 %id723212344.tmp to i32*
%id723212344.val = load i32, i32* %id723212344.tmp2
call void #writeByte(i32 %id723212344.val)
%returnValue.tmp = load i32, i32* %returnValue
ret i32 %returnValue.tmp
}
The error I get is
2265 Segmentation fault ./out.bin

Related

Rust compiler generating intrinsic llvm.add call instruction while clang generates normal add?

While working with llvm ir I noticed that when compiling a simple addition in c, clang will generate a normal llvm add instruction. However when I compile the same code written in rust, rustc generates a call to
%38 = call { i32, i1 } #llvm.ssub.with.overflow.i32(i32 %37, i32 5), !dbg !597
%39 = extractvalue { i32, i1 } %38, 0, !dbg !597
%40 = extractvalue { i32, i1 } %38, 1, !dbg !597
%41 = call i1 #llvm.expect.i1(i1 %40, i1 false), !dbg !597
br i1 %41, label %panic1, label %bb9, !dbg !597
followed by two extractvalue instructions and some according error handling if an overflow has occurred.
why does it do that? As far as I understand, there is overflow handling with the normal add instruction as well through the nsw keyword:
If the nuw and/or nsw keywords are present, the result value of the add is a poison value if unsigned and/or signed overflow, respectively, occurs.
as I understand, when the IR is further lowered to assembly, it will result in the same code?
TL;DR:
as I understand, when the IR is further lowered to assembly, it will result in the same code?
No, it will not. rustc (in debug mode) ~= clang + undefined behaviour sanitiser UBSAN.
Explanation
In debug mode rustc generates code to capture and panic on integer overflows. e.g.
pub fn bad_add(num: i32) -> i32 {
num + i32::MAX
}
Results in;
define i32 #_ZN7example7bad_add17ha9c5f96e25ec3c52E(i32 %num) unnamed_addr #0 !dbg !5 {
start:
%0 = call { i32, i1 } #llvm.sadd.with.overflow.i32(i32 %num, i32 2147483647), !dbg !10
%_3.0 = extractvalue { i32, i1 } %0, 0, !dbg !10
%_3.1 = extractvalue { i32, i1 } %0, 1, !dbg !10
%1 = call i1 #llvm.expect.i1(i1 %_3.1, i1 false), !dbg !10
br i1 %1, label %panic, label %bb1, !dbg !10
bb1: ; preds = %start
ret i32 %_3.0, !dbg !11
panic: ; preds = %start
call void #_ZN4core9panicking5panic17hab046c3856b52f65E([0 x i8]* align 1 bitcast ([28 x i8]* #str.0 to [0 x i8]*), i64 28, %"core::panic::location::Location"* align 8 bitcast (<{ i8*, [16 x i8] }>* #alloc7 to %"core::panic::location::Location"*)) #4, !dbg !10
unreachable, !dbg !10
}
However in release mode e.g. adding -C opt-level=3 we get
define i32 #_ZN7example7bad_add17ha9c5f96e25ec3c52E(i32 %num) unnamed_addr #0 !dbg !5 {
%0 = add i32 %num, 2147483647, !dbg !10
ret i32 %0, !dbg !11
}
Note that the checks and calls to panic are now removed.
With C/clang we won't get exactly the same result, e.g.
#include <limits.h>
// Type your code here, or load an example.
int bad_add(int num) {
return INT_MAX + num;
}
Will result in;
define dso_local i32 #bad_add(i32 %0) #0 {
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = add nsw i32 2147483647, %3
ret i32 %4
}
To generate similar code in C you can enable UBSAN. e.g. add -fsanitize=undefined, or more specifically just the signed integer checker with -fsanitize=signed-integer-overflow to your command line. This is usually enabled, when running fuzz tests.
Enabling UBSAN with clang we get very similar (though not identical) output to rustc in debug mode;
define dso_local i32 #bad_add(i32 %0) #0 {
%2 = alloca i32, align 4
store i32 %0, i32* %2, align 4
%3 = load i32, i32* %2, align 4
%4 = call { i32, i1 } #llvm.sadd.with.overflow.i32(i32 2147483647, i32 %3), !nosanitize !2
%5 = extractvalue { i32, i1 } %4, 0, !nosanitize !2
%6 = extractvalue { i32, i1 } %4, 1, !nosanitize !2
%7 = xor i1 %6, true, !nosanitize !2
br i1 %7, label %10, label %8, !prof !3, !nosanitize !2
8: ; preds = %1
%9 = zext i32 %3 to i64, !nosanitize !2
call void #__ubsan_handle_add_overflow(i8* bitcast ({ { [10 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }* }* #1 to i8*), i64 2147483647, i64 %9) #3, !nosanitize !2
br label %10, !nosanitize !2
10: ; preds = %8, %1
ret i32 %5
}
Note that we now get the same llvm call to llvm.sadd.with.overflow for the C function with UBSAN enabled. Also, you'll notice that __ubsan_handle_add_overflow essentially prints the problem with a backtrace and then exits. This is effectively the same behaviour as rusts panic.

Why does reference comparison work on a constant, but not on a variable with the same value?

In the below code:
fn is_five(x: &i32) -> bool {
x as *const i32 == &5 as *const i32
}
fn main() {
let x = 5;
assert!(!is_five(&x));
assert!(is_five(&5));
assert!(!is_five(&6));
println!("Success!");
}
Why is_five(&x) is false, while is_five(&5) is true?
Code demo in Rust playground
Writing &5 to take the address of a constant could seem odd, but in this case the compiler decides to use a storage (in the constant section) and store this value in order to be able to take its address.
If this happens several times in the code, there is no need to have separate storages, all of them holding the same value.
If you try the following code in godbolt, you will see that the constant 5 is placed once in memory and the linker refers to it from several places.
In is_five(): lea rax, [rip + .L__unnamed_1]
In test2(): lea rdi, [rip + .L__unnamed_1]
Thus &5 refers to the same address, but &x, which is a local variable, refers to another storage for x initialised with the same value (5).
In test1(): lea rdi, [rsp + 4]
pub fn is_five(x: &i32) -> bool {
x as *const i32 == &5 as *const i32
}
pub fn test1() -> bool {
let x = 5;
is_five(&x)
}
pub fn test2() -> bool {
is_five(&5)
}
If distinction between reference and pointer comparisons is not clear, this documentation can help.

cat file to terminal and color a specific word [duplicate]

This question already has answers here:
Can you colorize specific lines that are grepped from a file?
(5 answers)
Closed 4 years ago.
I've read several posts that handle syntax highlighting, cat and grep (like this one and this one too) but haven't yet found what I'm looking for. I want to print an entire text file to terminal, and have a specific (repeating) word colored in red. Lines that do not have this word should be printed out too. Here's something that comes close, but only
prints lines that contain the specific word. This post doesn't show it, but the load word is colored red like it should.
$ cat input.ll | grep "load"
%tmp = load %class.A*, %class.A** %p, align 8
%vtable = load i32 (%class.A*, i32, i32, i32)**, i32 (%class.A*, i32, i32, i32)*** %tmp1, align 8
%tmp2 = load i32 (%class.A*, i32, i32, i32)*, i32 (%class.A*, i32, i32, i32)** %vfn, align 8
%tmp3 = load i32, i32* #x, align 4
And here is the original file:
$ cat input.ll
%tmp = load %class.A*, %class.A** %p, align 8
%tmp1 = bitcast %class.A* %tmp to i32 (%class.A*, i32, i32, i32)***
%vtable = load i32 (%class.A*, i32, i32, i32)**, i32 (%class.A*, i32, i32, i32)*** %tmp1, align 8
%vfn = getelementptr inbounds i32 (%class.A*, i32, i32, i32)*, i32 (%class.A*, i32, i32, i32)** %vtable, i64 2
%tmp2 = load i32 (%class.A*, i32, i32, i32)*, i32 (%class.A*, i32, i32, i32)** %vfn, align 8
%tmp3 = load i32, i32* #x, align 4
%call = call i32 %tmp2(%class.A* %tmp, i32 3, i32 %tmp3, i32 7)
I suggest with GNU grep:
grep --color -E 'load|$' file

Why does Rust store i64s captured by a closure as i64*s in the LLVM IR closure environment?

In this simple example
#[inline(never)]
fn apply<F, A, B>(f: F, x: A) -> B
where F: FnOnce(A) -> B {
f(x)
}
fn main() {
let y: i64 = 1;
let z: i64 = 2;
let f = |x: i64| x + y + z;
print!("{}", apply(f, 42));
}
the closure passed to apply is passed as a LLVM IR {i64*, i64*}*:
%closure = type { i64*, i64* }
define internal fastcc i64 #apply(%closure* noalias nocapture readonly dereferenceable(16)) unnamed_addr #0 personality i32 (i32, i32, i64, %"8.unwind::libunwind::_Unwind_Exception"*, %"8.unwind::libunwind::_Unwind_Context"*)* #rust_eh_personality {
entry-block:
%1 = getelementptr inbounds %closure, %closure* %0, i64 0, i32 1
%2 = getelementptr inbounds %closure, %closure* %0, i64 0, i32 0
%3 = load i64*, i64** %2, align 8
%4 = load i64*, i64** %1, align 8
%.idx.val.val.i = load i64, i64* %3, align 8, !noalias !1
%.idx1.val.val.i = load i64, i64* %4, align 8, !noalias !1
%5 = add i64 %.idx.val.val.i, 42
%6 = add i64 %5, %.idx1.val.val.i
ret i64 %6
}
(apply actually has a more complicated name in the generated LLVM code.)
This causes two loads to get to each of the captured variables. Why isn't %closure just {i64, i64} (which would make the argument to apply {i64, i64}*)?
Closures capture by reference by default. You can change that behavior to capture by value by adding the move keyword before the parameter list:
let f = move |x: i64| x + y + z;
This generates much leaner code:
define internal fastcc i64 #apply(i64 %.0.0.val, i64 %.0.1.val) unnamed_addr #0 personality i32 (i32, i32, i64, %"8.unwind::libunwind::_Unwind_Exception"*, %"8.unwind::libunwind::_Unwind_Context"*)* #rust_eh_personality {
entry-block:
%0 = add i64 %.0.0.val, 42
%1 = add i64 %0, %.0.1.val
ret i64 %1
}
Adding the move keyword means that any value that the closure uses will be moved into the closure's environment. In the case of integers, which are Copy, it doesn't make much difference, but in the case of other types like String, it means that you can't use the String anymore in the outer scope after creating the closure. It's an all-or-nothing deal, but you can manually take references to individual variables outside a move closure and have the closure use these references instead of the original values to get manual capture-by-reference behavior.
Can you observe the value vs ref difference somehow in this code?
If you take the address of the captured variable, you can observe the difference. Notice how the first and second output lines are the same, and the third is different.

What does the "box" keyword do?

In Rust, we can use the Box<T> type to allocate things on the heap. This type is used to safely abstract pointers to heap memory. Box<T> is provided by the Rust standard library.
I was curious about how Box<T> allocation is implemented, so I found its source code. Here is the code for Box<T>::new (as of Rust 1.0):
impl<T> Box<T> {
/// Allocates memory on the heap and then moves `x` into it.
/// [...]
#[stable(feature = "rust1", since = "1.0.0")]
#[inline(always)]
pub fn new(x: T) -> Box<T> {
box x
}
}
The only line in the implementation returns the value box x. This box keyword is not explained anywhere in the official documentation; in fact, it is only mentioned briefly on the std::boxed documentation page.
NOTE: This reply is a bit old. Since it talks about internals and unstable features, things have changed a little bit. The basic mechanism remains the same though, so the answer is still capable of explaining the underlying mechanisms of box.
What does box x usually uses to allocate and free memory?
The answer is the functions marked with lang items exchange_malloc for allocation and exchange_free for freeing. You can see the implementation of those in the default standard library at heap.rs#L112 and heap.rs#L125.
In the end the box x syntax depends on the following lang items:
owned_box on a Box struct to encapsulate the allocated pointer. This struct does not need a Drop implementation, it is implemented automatically by the compiler.
exchange_malloc to allocate the memory.
exchange_free to free the previously allocated memory.
This can be effectively seen in the lang items chapter of the unstable rust book using this no_std example:
#![feature(lang_items, box_syntax, start, no_std, libc)]
#![no_std]
extern crate libc;
extern {
fn abort() -> !;
}
#[lang = "owned_box"]
pub struct Box<T>(*mut T);
#[lang = "exchange_malloc"]
unsafe fn allocate(size: usize, _align: usize) -> *mut u8 {
let p = libc::malloc(size as libc::size_t) as *mut u8;
// malloc failed
if p as usize == 0 {
abort();
}
p
}
#[lang = "exchange_free"]
unsafe fn deallocate(ptr: *mut u8, _size: usize, _align: usize) {
libc::free(ptr as *mut libc::c_void)
}
#[start]
fn main(argc: isize, argv: *const *const u8) -> isize {
let x = box 1;
0
}
#[lang = "stack_exhausted"] extern fn stack_exhausted() {}
#[lang = "eh_personality"] extern fn eh_personality() {}
#[lang = "panic_fmt"] fn panic_fmt() -> ! { loop {} }
Notice how Drop was not implemented for the Box struct? Well let's see the LLVM IR generated for main:
define internal i64 #_ZN4main20hbd13b522fdb5b7d4ebaE(i64, i8**) unnamed_addr #1 {
entry-block:
%argc = alloca i64
%argv = alloca i8**
%x = alloca i32*
store i64 %0, i64* %argc, align 8
store i8** %1, i8*** %argv, align 8
%2 = call i8* #_ZN8allocate20hf9df30890c435d76naaE(i64 4, i64 4)
%3 = bitcast i8* %2 to i32*
store i32 1, i32* %3, align 4
store i32* %3, i32** %x, align 8
call void #"_ZN14Box$LT$i32$GT$9drop.103617h8817b938807fc41eE"(i32** %x)
ret i64 0
}
The allocate (_ZN8allocate20hf9df30890c435d76naaE) was called as expected to build the Box, meanwhile... Look! A Drop method for the Box (_ZN14Box$LT$i32$GT$9drop.103617h8817b938807fc41eE)! Let's see the IR for this method:
define internal void #"_ZN14Box$LT$i32$GT$9drop.103617h8817b938807fc41eE"(i32**) unnamed_addr #0 {
entry-block:
%1 = load i32** %0
%2 = ptrtoint i32* %1 to i64
%3 = icmp ne i64 %2, 2097865012304223517
br i1 %3, label %cond, label %next
next: ; preds = %cond, %entry- block
ret void
cond: ; preds = %entry-block
%4 = bitcast i32* %1 to i8*
call void #_ZN10deallocate20he2bff5e01707ad50VaaE(i8* %4, i64 4, i64 4)
br label %next
}
There it is, deallocate (ZN10deallocate20he2bff5e01707ad50VaaE) being called on the compiler generated Drop!
Notice even on the standard library the Drop trait is not implemented by user-code. Indeed Box is a bit of a magical struct.
Before box was marked as unstable, it was used as a shorthand for calling Box::new. However, it's always been intended to be able to allocate arbitrary types, such as Rc, or to use arbitrary allocators. Neither of these have been finalized, so it wasn't marked as stable for the 1.0 release. This is done to prevent supporting a bad decision for all of Rust 1.x.
For further reference, you can read the RFC that changed the "placement new" syntax and also feature gated it.
box does exactly what Box::new() does - it creates an owned box.
I believe that you can't find implementation of box keyword because currently it is hardcoded to work with owned boxes, and Box type is a lang item:
#[lang = "owned_box"]
#[stable(feature = "rust1", since = "1.0.0")]
#[fundamental]
pub struct Box<T>(Unique<T>);
Because it is a lang item, the compiler has special logic to handle its instantiation which it can link with box keyword.
I believe that the compiler delegates box allocation to functions in alloc::heap module.
As for what box keyword does and supposed to do in general, Shepmaster's answer describes perfectly.

Resources