Rust stackoverflow - find large variables on the stack - rust

I'm currently experiencing a very perculiar stack overflow.
fn generate_all_possible_attacks_for(
all_relevant_moves: &U64PerSquare,
number_of_all_relevant_moves: &U64PerSquare,
magic_numbers: &U64PerSquare,
calculate_attacks_for: &dyn Fn(&dyn BoardPos, u64) -> u64,
) -> Box<[U64PerSquare; 4096]> {
let mut all_attacks = Box::new([U64PerSquare::default(); 4096]);
//...
}
I can add a break point on the function itself, but just after (like when I put a break point on the first line in the function, or add a print) is never reached and the program experiences a stack overflow.
Exception thrown at 0x00007FF69C048067 in chess_ai.exe: 0xC00000FD: Stack overflow (parameters: 0x0000000000000001, 0x000000E296606000).
thread 'main' has overflowed its stack
I'm suspecting a couple of too large variables are placed on the stack, since the stack trace itself is fairly short:
chess_ai.exe!__chkstk() Line 109 (d:\a01\_work\6\s\src\vctools\crt\vcstartup\src\misc\amd64\chkstk.asm:109)
chess_ai.exe!chess_logic::magic_bit_board::generate_all_possible_attacks_for(unsigned __int64[64] * all_relevant_moves, unsigned __int64[64] * number_of_all_relevant_moves, unsigned __int64[64] * magic_numbers, ref$<dyn$<core::ops::function::Fn<tuple$<ref$<dyn$<chess_logic::square::BoardPos>>,u64>,assoc$<Output,u64>>>>) Line 302 (c:\Users\elias\Documents\Projects\chess_ai\chess_logic\src\magic_bit_board.rs:302)
chess_ai.exe!chess_logic::magic_bit_board::generate_all_possible_rook_attacks() Line 294 (c:\Users\elias\Documents\Projects\chess_ai\chess_logic\src\magic_bit_board.rs:294)
chess_ai.exe!core::ops::function::FnOnce::call_once<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)(),tuple$<>>(unsigned __int64[4096][64] *(*)()) Line 227 (c:\rustc\fe5b13d681f25ee6474be29d748c65adcd91f69e\library\core\src\ops\function.rs:227)
chess_ai.exe!once_cell::sync::impl$11::force::closure$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>(once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>) Line 1212 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\lib.rs:1212)
chess_ai.exe!once_cell::sync::impl$6::get_or_init::closure$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>(once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>) Line 1023 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\lib.rs:1023)
chess_ai.exe!once_cell::imp::impl$4::initialize::closure$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>,enum$<once_cell::sync::impl$6::get_or_init::Void>>(once_cell::imp::impl$4::initialize::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>,enum$<once_cell::sync::impl$6::get_or_init::Void>> *) Line 85 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\imp_std.rs:85)
chess_ai.exe!core::ops::function::impls::impl$3::call_mut<tuple$<>,dyn$<core::ops::function::FnMut<tuple$<>,assoc$<Output,bool>>>>(ref_mut$<dyn$<core::ops::function::FnMut<tuple$<>,assoc$<Output,bool>>>> * self) Line 269 (c:\rustc\fe5b13d681f25ee6474be29d748c65adcd91f69e\library\core\src\ops\function.rs:269)
chess_ai.exe!once_cell::imp::initialize_or_wait(core::sync::atomic::AtomicUsize * queue, enum$<core::option::Option<ref_mut$<dyn$<core::ops::function::FnMut<tuple$<>,assoc$<Output,bool>>>>>, 1, 18446744073709551615, Some>) Line 213 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\imp_std.rs:213)
chess_ai.exe!once_cell::imp::OnceCell<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>>::initialize<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>,enum$<once_cell::sync::impl$6::get_or_init::Void>>(once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>> self) Line 81 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\imp_std.rs:81)
chess_ai.exe!once_cell::sync::OnceCell<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>>::get_or_try_init<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>,enum$<once_cell::sync::impl$6::get_or_init::Void>>(once_cell::sync::impl$6::get_or_init::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>> self) Line 1063 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\lib.rs:1063)
chess_ai.exe!once_cell::sync::OnceCell<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>>::get_or_init<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>>(once_cell::sync::impl$11::force::closure_env$0<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()> self) Line 1023 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\lib.rs:1023)
chess_ai.exe!once_cell::sync::Lazy<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>::force<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>() Line 1211 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\lib.rs:1211)
chess_ai.exe!once_cell::sync::impl$12::deref<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()>(once_cell::sync::Lazy<alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global>,alloc::boxed::Box<array$<array$<u64,64>,4096>,alloc::alloc::Global> (*)()> * self) Line 1221 (c:\Users\elias\.cargo\registry\src\github.com-1ecc6299db9ec823\once_cell-1.12.0\src\lib.rs:1221)
chess_ai.exe!chess_logic::magic_bit_board::get_rook_attacks_for(ref$<dyn$<chess_logic::square::BoardPos>> blockers, unsigned __int64) Line 48 (c:\Users\elias\Documents\Projects\chess_ai\chess_logic\src\magic_bit_board.rs:48)
chess_ai.exe!chess_ai::main() Line 19 (c:\Users\elias\Documents\Projects\chess_ai\src\main.rs:19)
chess_ai.exe!core::ops::function::FnOnce::call_once<void (*)(),tuple$<>>(void(*)()) Line 227 (c:\rustc\fe5b13d681f25ee6474be29d748c65adcd91f69e\library\core\src\ops\function.rs:227)
chess_ai.exe!std::sys_common::backtrace::__rust_begin_short_backtrace<void (*)(),tuple$<>>(void(*)() f) Line 125 (c:\rustc\fe5b13d681f25ee6474be29d748c65adcd91f69e\library\std\src\sys_common\backtrace.rs:125)
chess_ai.exe!std::rt::lang_start::closure$0<tuple$<>>(std::rt::lang_start::closure_env$0<tuple$<>> *) Line 145 (c:\rustc\fe5b13d681f25ee6474be29d748c65adcd91f69e\library\std\src\rt.rs:145)
[Inline Frame] chess_ai.exe!core::ops::function::impls::impl$2::call_once() Line 259 (c:\rustc\fe5b13d681f25ee6474be29d748c65adcd91f69e\library\core\src\ops\function.rs:259)
Is there any way to figure out what takes up space in the stack?
When trying to debug with gdb (which apparently completely kicks the bucket) (I'm also really inexperienced with gdb):
GNU gdb (GDB) 8.1
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law. Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-w64-mingw32".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from .\target\debug\chess_ai.exe...(no debugging symbols found)...done.
(gdb) run
Starting program: C:\Users\elias\Documents\Projects\chess_ai\target\debug\chess_ai.exe
[New Thread 29132.0x4d98]
[New Thread 29132.0x954]
[New Thread 29132.0x793c]
[New Thread 29132.0x603c]
Thread 1 received signal SIGSEGV, Segmentation fault.
0x00007ff69c048067 in ?? ()
(gdb) bt
#0 0x00007ff69c048067 in ?? ()
Backtrace stopped: previous frame identical to this frame (corrupt stack?)
The program actually works perfectly fine on Linux (the crash occurs on windows). I was told that the stack is considerably larger on Linux. So if there is any gdb trick to find large variables, I could do that on Linux.

A boxed array is basically a vector with a fixed size, but with one caveat: when you create it using Box::new([...]), the array is created on the stack, then moved to the heap by Box::new(), whereas a vector's element data is created directly on the heap. If you find yourself creating huge boxed arrays like this one then you might be better off using vec![U64PerSquare::default(); 4096] instead, which allocates directly on the heap. The problem is most likely that you have multiple nested levels of function calls that create these boxed arrays -- that or 4096 * size_of::<U64PerSquare>() is larger than the stack already.
If you really do want the fixed-size type, there is a three-step process that gets you there without a stack allocation:
Create the Vec<T>.
Convert it to a Box<[T]>, which is an infallible operation.
Convert that to a Box<[T; N]>, which fails if the length of the boxed slice is not exactly equal to N.
In your case, that would look something like:
fn new_board() -> Box<[U64PerSquare; 4096]> {
vec![Default::default(); 4096]
.into_boxed_slice()
.try_into()
.unwrap()
}
You can generalize this pattern with generics:
fn new_boxed_array<T: Clone + Default, const N: usize>() -> Box<[T; N]> {
vec![Default::default(); N]
.into_boxed_slice()
.try_into()
// Mapping the error to a string prevents the requirement T:Debug,
// and This Should Never Fail(TM) anyway.
.map_err(|_| "conversion from slice to array failed")
.unwrap()
}

Related

Compile-time packet construction in Rust

I have a C++ program where I've used template metaprogramming to generate small binary-format packets to be sent "over the wire", which gives better safety and clarity than a more naive approach of allocating a fixed-size buffer and copying various data items into it using calculated-by-hand offsets.
int foo(int fd, long long data1, float f1)
{
auto m = membuf()
.append<char>(0xAA).append<char>(0xBB) // header
.append(data1)
.append(f1)
.append(555); // in the pipe (arbitary extra data)
return write(fd, m.data(), m.size());
}
This would send a packet consisting of the two bytes 0xAA and 0xBB, (for example) 8 bytes from data1, 4 bytes from f1, and 4 bytes forming 555. (Actual sizes of int and so on would depend on compiler/architecture details of course, but I could also use eg uint64_t type for precise control).
(Note: The full implementation of membuf isn't really relevant to the question, but you can see it here if interested: https://godbolt.org/z/sr0Cuu)
The important characteristics in this case are that:
no heap allocation is involved, and
the size of the data packet and the offsets of each of the values are calculated at compile time
it's all standard C++, no extensions, no experimental features (in fact it's C++11)
As it happens, this is compiled into a very efficient sequence of instructions which simply allocates the buffer on the stack and writes each value to the correct place within it:
foo(int, long long, float):
subq $40, %rsp
movl $-17494, %eax
movl $18, %edx
movq %rsi, 2(%rsp)
movq %rsp, %rsi
movw %ax, (%rsp)
movl $555, 14(%rsp)
movd %xmm0, 10(%rsp)
call write
addq $40, %rsp
ret
What I am looking for is a Rust solution to achieve the same thing. I don't mind necessarily if the Rust compiler can't currently produce code that is quite as efficient as above, but it is important that the above requirements are met: no heap allocation, no dynamic calculation of packet size or data offsets, no use of experimental/"unstable" language features.
I've been reading the Rust book and trying to understand if and how I could do this in Rust, but so far I've gotten nowhere:
Generic types don't seem to help, as they are more like "templates" in the original sense of the word than in the C++ sense. They also don't seem to allow parameterising by anything but types.
Macros seem to be the metaprogramming tool of choice in Rust, but unless I'm not understanding correctly they operate on token streams and, unless there's a way I'm missing, they can't do the kind of thing that the membuf example does.
Essentially: I want a generic type, parameterised by buffer size, which can take a value and return a larger, fixed-size buffer, with the data appended at the end. But maybe that specification is too C++-centric and there's another tack that can be taken in Rust - I just need to figure out what it is!
Slightly enlarging my comment with actual code: The restruct-crate can do what you ask for; it does, however, require nightly as of now, as packing und unpacking are const-functions, which are not stable yet.
Given your example, after adding restruct and restruct_derive to the dependencies:
#![feature(const_int_conversion)]
#![feature(const_fn)]
#![feature(const_slice_len)]
#![feature(const_transmute)]
/// A packer/unpacker for two unsigned bytes, a group of eight unsigned bytes, a group of
/// four unsigned bytes and four padding bytes; all in little endian.
#[derive(restruct_derive::Struct)]
#[fmt="< 2B 8s 4s 4x"]
struct Foo;
fn main() {
let data = (0xAA, 0xBB, [1,2,3,4,5,6,7,8], [4,3,2,1]);
println!("The buffer has a size of {}", Foo::SIZE);
let buf: [u8; Foo::SIZE] = Foo::pack(data);
println!("Packed as {:?}", buf);
let unpacked: <Foo as restruct::Struct>::Unpacked = Foo::unpack(buf);
println!("Unpacked as {:?}", unpacked);
}

Why do Rust programs use so much more memory than the C, Haskell and OCaml versions?

I looked at how much RAM was used by Rust programs (RES column from top command) and I wonder why they use so much memory.
Here is an example:
use std::io;
fn main() {
println!("What's your name?");
let mut input = String::new();
io::stdin().read_line(&mut input).unwrap();
println!("Hello {}!", input);
}
I saw that 6 MB of memory was used before I input something.
Here is how I compiled and executed the program:
cargo build --release
./target/release/main
The equivalent C program:
#include <stdio.h>
int main(void) {
printf("What's your name?\n");
char input[100] = {0};
scanf("%s", input);
printf("Hello %s!\n", input);
return 0;
}
only uses 0.6 MB. In this case, the Rust program uses 10 times more memory. In other cases, I saw that the Rust program uses 5 times more memory.
I also tested with other languages to compare.
The OCaml version:
let () =
print_endline "What's your name?";
let line = read_line () in
print_string "Hello ";
print_endline line
uses 1 MB.
The Haskell version:
main = do
putStrLn "What's your name?"
name <- getLine
putStrLn ("Hello " ++ name ++ "!")
uses 3 MB.
The Python version:
print("What's your name?")
name = input()
print("Hello", name, "!")
uses 7 MB, almost the same as the Rust version!
Update
I'm running Linux (ArchLinux) with Rust 1.3 (I also tried the nightly with similar results).
Update 2
Here is more data from the htop command:
VIRT RES SHR MEM% Command
15572 2936 804 0.1 ocaml
21728 2732 2528 0.1 haskell
22540 7480 4308 0.2 python
4056 668 600 0.0 c
24180 6164 1928 0.2 rust
Update 3
I did more tests with massif to see the memory usage.
For every program, I ran massif twice, as following:
valgrind --tool=massif --time-unit=B ./program
valgrind --tool=massif --pages-as-heap=yes --time-unit=B ./program
Here are the results with all the programs (as shown by ms_print):
C versions:
https://framabin.org/?dd243f8ec99155bc#Af5cPrcHnz3DsWiOStfwgW8Qq6BTVhogz/46L+sMuSs=
https://framabin.org/?261b9366c3749469#1ztDBkgVly9CanrrWWrJdh3yBFL5PEIW3OI5OLnze/Q=
Rust versions:
https://framabin.org/?0f1bac1c750e97bf#AXwlFYYPHeazq9LfsTOpRBaUTTkb1NfN9ExPorDJud0=
https://framabin.org/?c24b21b01af36782#OLFWdwLjVG2t7eoLqLFhe0Pp8Q8pA2S/oq4jdRRWPzI=
OCaml versions:
https://framabin.org/?060f05bea318109c#/OJQ8reHCU3CzzJ5NCOCLOYJQFnA1VgxqAIVjgQWX9I=
https://framabin.org/?8ff1ffb6d03cb37a#GN8bq3Wrm6tNWaINIhMAr4ieltLtOPjuZ4Ynof9bV4w=
Haskell versions:
https://framabin.org/?b204bd978b8c1fd8#DyQH862AM8NEPTKlzEcZgoapPaZLdlF9W3dRn47K5yU=
https://framabin.org/?ac1aa89fcaeb782c#TQ+uAiqerjHuuEEIhehVitjm63nc3wu5wfivAeBH5uI=
Python versions:
https://framabin.org/?197e8b90df5373ec#aOi0+tEj32Na5jW66Kl97q2lsjSZ2x7Cwl/pOt0lYIM=
https://framabin.org/?397efa22484e3992#1ylOrmjKaA9Hg7gw7H7rKGM0MyxuvKwPNN1J/jLEMrk=
Summary (ram usage):
|------------|----------|----------|----------|----------|----------|
| | C | Haskell | OCaml | Rust | Python |
|------------|----------|----------|----------|----------|----------|
| First run | 1 B | 63.12 KB | 5.993 MB | 816 B | 1.321 MB |
|------------|----------|----------|----------|----------|----------|
| Second run | 6.031 MB | 24.20 MB | 17.14 MB | 25.60 MB | 27.43 MB |
|------------|----------|----------|----------|----------|----------|
The first run is without the --pages-as-heap=yes parameter.
I also ran massif with the --stacks=yes option for C and Rust.
C version:
https://framabin.org/?b3009d198ccfdee1#HxR6LPPAzt15K+wIFdaqlfSJjBrJvhV2ZHWdElg3ezc=
(3.141 KB)
Rust version:
https://framabin.org/?b446d8d76c279007#tHnGiOnRstTA2krhz6cgfvTjI+FclcZS3rqyZvquWdQ=
(8.602 KB)
What does explain such a huge difference between heap block allocation and page allocation in Rust?
Because the standard library is statically linked.
You can overcome this by compiling with the -C prefer-dynamic option.
As to the reason behind having the standard library statically linked: it increases executable portability (ie: no need for the standard library to be installed in target system).
Since this question is on top results from google, I would like to give an update for anybody looking at this in 2022. I ran the exact same program and measured rust RSS from htop. It shows 924KB. That is 0.92MB. Apparently rust has improved a lot in these years.
This article has a very good discussion of the topic. Some of the largest and most common culprits are cargo's default to debug builds (not relevant in your case) and statically including libraries by default.

Can malloc_trim() release memory from the middle of the heap?

I am confused about the behaviour of malloc_trim as implemented in the glibc.
man malloc_trim
[...]
malloc_trim - release free memory from the top of the heap
[...]
This function cannot release free memory located at places other than the top of the heap.
When I now look up the source of malloc_trim() (in malloc/malloc.c) I see that it calls mtrim() which is utilizing madvise(x, MADV_DONTNEED) to release memory back to the operating system.
So I wonder if the man-page is wrong or if I misinterpret the source in malloc/malloc.c.
Can malloc_trim() release memory from the middle of the heap?
There are two usages of madvise with MADV_DONTNEED in glibc now: http://code.metager.de/source/search?q=MADV_DONTNEED&path=%2Fgnu%2Fglibc%2Fmalloc%2F&project=gnu
H A D arena.c 643 __madvise ((char *) h + new_size, diff, MADV_DONTNEED);
H A D malloc.c 4535 __madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
There was https://sourceware.org/git/?p=glibc.git;a=commit;f=malloc/malloc.c;h=68631c8eb92ff38d9da1ae34f6aa048539b199cc commit by Ulrich Drepper on 16 Dec 2007 (part of glibc 2.9 and newer):
malloc/malloc.c (public_mTRIm): Iterate over all arenas and call
mTRIm for all of them.
(mTRIm): Additionally iterate over all free blocks and use madvise
to free memory for all those blocks which contain at least one
memory page.
mTRIm (now mtrim) implementation was changed. Unused parts of chunks, aligned on page size and having size more than page may be marked as MADV_DONTNEED:
/* See whether the chunk contains at least one unused page. */
char *paligned_mem = (char *) (((uintptr_t) p
+ sizeof (struct malloc_chunk)
+ psm1) & ~psm1);
assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
assert ((char *) p + size > paligned_mem);
/* This is the size we could potentially free. */
size -= paligned_mem - (char *) p;
if (size > psm1)
madvise (paligned_mem, size & ~psm1, MADV_DONTNEED);
Man page of malloc_trim is there: https://github.com/mkerrisk/man-pages/blob/master/man3/malloc_trim.3 and it was committed by kerrisk in 2012: https://github.com/mkerrisk/man-pages/commit/a15b0e60b297e29c825b7417582a33e6ca26bf65
As I can grep the glibc's git, there are no man pages in the glibc, and no commit to malloc_trim manpage to document this patch. The best and the only documentation of glibc malloc is its source code: https://sourceware.org/git/?p=glibc.git;a=blob;f=malloc/malloc.c
Additional functions:
malloc_trim(size_t pad);
609 /*
610 malloc_trim(size_t pad);
611
612 If possible, gives memory back to the system (via negative
613 arguments to sbrk) if there is unused memory at the `high' end of
614 the malloc pool. You can call this after freeing large blocks of
615 memory to potentially reduce the system-level memory requirements
616 of a program. However, it cannot guarantee to reduce memory. Under
617 some allocation patterns, some large free blocks of memory will be
618 locked between two used chunks, so they cannot be given back to
619 the system.
620
621 The `pad' argument to malloc_trim represents the amount of free
622 trailing space to leave untrimmed. If this argument is zero,
623 only the minimum amount of memory to maintain internal data
624 structures will be left (one page or less). Non-zero arguments
625 can be supplied to maintain enough trailing space to service
626 future expected allocations without having to re-obtain memory
627 from the system.
628
629 Malloc_trim returns 1 if it actually released any memory, else 0.
630 On systems that do not support "negative sbrks", it will always
631 return 0.
632 */
633 int __malloc_trim(size_t);
634
Freeing from the middle of the chunk is not documented as text in malloc/malloc.c (and malloc_trim description in commend was not updated in 2007) and not documented in man-pages project. Man page from 2012 may be the first man page of the function, written not by authors of glibc. Info page of glibc only mentions M_TRIM_THRESHOLD of 128 KB:
https://www.gnu.org/software/libc/manual/html_node/Malloc-Tunable-Parameters.html#Malloc-Tunable-Parameters and don't list malloc_trim function https://www.gnu.org/software/libc/manual/html_node/Summary-of-Malloc.html#Summary-of-Malloc (and it also don't document memusage/memusagestat/libmemusage.so).
You may ask Drepper and other glibc developers again as you already did in https://sourceware.org/ml/libc-help/2015-02/msg00022.html "malloc_trim() behaviour", but there is still no reply from them. (Only wrong answers from other users like https://sourceware.org/ml/libc-help/2015-05/msg00007.html https://sourceware.org/ml/libc-help/2015-05/msg00008.html)
Or you may test the malloc_trim with this simple C program (test_malloc_trim.c) and strace/ltrace:
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <malloc.h>
int main()
{
int *m1,*m2,*m3,*m4;
printf("%s\n","Test started");
m1=(int*)malloc(20000);
m2=(int*)malloc(40000);
m3=(int*)malloc(80000);
m4=(int*)malloc(10000);
printf("1:%p 2:%p 3:%p 4:%p\n", m1, m2, m3, m4);
free(m2);
malloc_trim(0); // 20000, 2000000
sleep(1);
free(m1);
free(m3);
free(m4);
// malloc_stats(); malloc_info(0, stdout);
return 0;
}
gcc test_malloc_trim.c -o test_malloc_trim, strace ./test_malloc_trim
write(1, "Test started\n", 13Test started
) = 13
brk(0) = 0xcca000
brk(0xcef000) = 0xcef000
write(1, "1:0xcca010 2:0xccee40 3:0xcd8a90"..., 441:0xcca010 2:0xccee40 3:0xcd8a90 4:0xcec320
) = 44
madvise(0xccf000, 36864, MADV_DONTNEED) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({1, 0}, 0x7ffffafbfff0) = 0
brk(0xceb000) = 0xceb000
So, there is madvise with MADV_DONTNEED for 9 pages after malloc_trim(0) call, when there was hole of 40008 bytes in the middle of the heap.
... utilizing madvise(x, MADV_DONTNEED) to release memory back to the
operating system.
madvise(x, MADV_DONTNEED) does not release memory. man madvise:
MADV_DONTNEED
Do not expect access in the near future. (For the time being,
the application is finished with the given range, so the kernel
can free resources associated with it.) Subsequent accesses of
pages in this range will succeed, but will result either in
reloading of the memory contents from the underlying mapped file
(see mmap(2)) or zero-fill-on-demand pages for mappings without
an underlying file.
So, the usage of madvise(x, MADV_DONTNEED) does not contradict man malloc_trim's statement:
This function cannot release free memory located at places other than the top of the heap.

SIGSEGV when using pthreads in Stop-and-Wait Protocol implementation

I'm a college student and as part of a Networks Assignment I need to do an implementation of the Stop-and-Wait Protocol. The problem statement requires using 2 threads. I am a novice to threading but after going through the man pages for the pthreads API, I wrote the basic code. However, I get a segmentation fault after the thread is created successfully (on execution of the first line of the function passed to pthread_create() as an argument).
typedef struct packet_generator_args
{
int max_pkts;
int pkt_len;
int pkt_gen_rate;
} pktgen_args;
/* generates and buffers packets at a mean rate given by the
pkt_gen_rate field of its argument; runs in a separate thread */
void *generate_packets(void *arg)
{
pktgen_args *opts = (pktgen_args *)arg; // error occurs here
buffer = (char **)calloc((size_t)opts->max_pkts, sizeof(char *));
if (buffer == NULL)
handle_error("Calloc Error");
//front = back = buffer;
........
return 0;
}
The main thread reads packets from this bufffer and runs the stop-and wait algorithm.
pktgen_args thread_args;
thread_args.pkt_len = DEF_PKT_LEN;
thread_args.pkt_gen_rate = DEF_PKT_GEN_RATE;
thread_args.max_pkts = DEF_MAX_PKTS;
/* initialize sockets and other data structures */
.....
pthread_t packet_generator;
pktgen_args *thread_args1 = (pktgen_args *)malloc(sizeof(pktgen_args));
memcpy((void *)thread_args1, (void *)&thread_args, sizeof(pktgen_args));
retval = pthread_create(&packet_generator, NULL, &generate_packets, (void *)thread_args1);
if (retval != 0)
handle_error_th(retval, "Thread Creation Error");
.....
/* send a fixed no of packets to the receiver wating for ack for each. If
the ack is not received till timeout occurs resend the pkt */
.....
I have tried debugging using gdb but am unable to understand why a segmentation fault is occuring at the first line of my generate_packets() function. Hopefully, one of you can help. If anyone needs additional context, the entire code can be obtained at http://pastebin.com/Z3QtEJpQ. I am in a real jam here having spent hours over this. Any help will be appreciated.
You initialize your buffer as NULL:
char **buffer = NULL;
and then in main() without further do, you try to address it:
while (!buffer[pkts_ackd]); /* wait as long as the next pkt has not
Basically my semi-educated guess is that your thread hasn't generated any packets yet and you crash on trying to access an element in NULL.
[162][04:34:17] vlazarenko#alluminium (~/tests) > cc -ggdb -o pthr pthr.c 2> /dev/null
[163][04:34:29] vlazarenko#alluminium (~/tests) > gdb pthr
GNU gdb 6.3.50-20050815 (Apple version gdb-1824) (Thu Nov 15 10:42:43 UTC 2012)
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "x86_64-apple-darwin"...Reading symbols for shared libraries .. done
(gdb) run
Starting program: /Users/vlazarenko/tests/pthr
Reading symbols for shared libraries +............................. done
Program received signal EXC_BAD_ACCESS, Could not access memory.
Reason: KERN_INVALID_ADDRESS at address: 0x0000000000000000
0x000000010000150d in main (argc=1, argv=0x7fff5fbffb10) at pthr.c:205
205 while (!buffer[pkts_ackd]); /* wait as long as the next pkt has not
(gdb)

SIGSEGV segmentation fault at strftime_l lib64/libc.so.6

I'am porting pro*c codes from UNIX to LINUX. The codes are compiled and created executables successfully. But during run time its raising segmentation fault. I debugged the code step by step and the below is the output of GDB debug.
Breakpoint 4 at 0x3b19690f50
(gdb) n
525 strftime (buf, MAX_STRING_LEN, "%d/%b/%Y:%H:%M:%S", dummy_time);
(gdb) n
Breakpoint 4, 0x0000003b19690f50 in strftime () from /lib64/libc.so.6
(gdb) n
Single stepping until exit from function strftime,
which has no line number information.
0x0000003b19690f70 in strftime_l () from /lib64/libc.so.6
(gdb) n
Single stepping until exit from function strftime_l,
which has no line number information.
Program received signal SIGSEGV, Segmentation fault.
0x0000003b19690f8b in strftime_l () from /lib64/libc.so.6
Actually in code the function strftime() is called. But I have no idea why it is reaching strftime_l() in /lib64/libc.so.6.
This issue is not coming in UNIX. please help on this. code is
static void speed_hack_libs(void)
{
time_t dummy_time_t = time(NULL);
struct tm *dummy_time = localtime (&dummy_time_t);
struct tm *other_dummy_time = gmtime (&dummy_time_t);
char buf[MAX_STRING_LEN];
strftime (buf, MAX_STRING_LEN, "%d/%b/%Y:%H:%M:%S", dummy_time);
}
struct tm *dummy_time = localtime (&dummy_time_t);
struct tm *other_dummy_time = gmtime (&dummy_time_t);
This is not gonna work. From the man page:
The localtime() function converts the calendar time timep to broken-down time representation, expressed relative to the user's specified time-zone. ... The return value points to a statically allocated struct which might be overwritten by
subsequent calls to any of the date and time functions.
The gmtime() function converts the calendar time timep to broken-down time representation, expressed in Coordinated Universal Time (UTC). It
may return NULL when the year does not fit into an integer. The return value points to a statically allocated struct which might be overwritten by subsequent calls to any of the date and time functions.
So, *dummy_time will probably be overwritten by the time you use it, and contain unpredictable garbage. You should copy the data to your buffer like this:
struct tm dummy_time ;
memcpy(&dummy_time, localtime (&dummy_time_t), sizeof(struct tm));
Although I'm not sure how could this cause a SIGSEGV (might be something with getting the month names etc. - check if the problem persists with LC_ALL=C), you must fix this before you can move on. Also, check (in the debugger) the contents of *dummy_time.
It is calling strftime_l because you compiled 64 bit - that is the 64 bit library entry point for strftime. You have two pointers in strftime - a string and a struct tm pointer. One of them is pointing to invalid memory. jpalacek gave you where to look first.
Did you add the time.h header file? I think you have missed it.

Resources