Get C FILE pointer from bytes::Bytes in Rust - rust

I would like to read a GRIB file downloaded from server using ecCodes library in Rust. However, my current solution results in segmentation fault. The extracted example, replicating the problem, is below.
I download the file using reqwest crate and get the response as Bytes1 using bytes(). To read the file with ecCodes I need to create a codes_handle using codes_grib_handle_new_from_file()2, which as argument requires *FILE usually get from fopen(). However, I would like to skip IO operations. So I figured I could use libc::fmemopen() to get *FILE from Bytes. But when I pass the *mut FILE from fmemopen() to codes_grib_handle_new_from_file() segmentation fault occurs.
I suspect the issue is when I get from Bytes a *mut c_void required by fmemopen(). I figured I can do this like that:
//get a *mut c_void pointer fom Bytes
//file has &Bytes type
let mut buf = BytesMut::from(file.as_ref());
let ptr = buf.as_mut_ptr();
let ptr = ptr as *mut c_void;
Because *mut is required, I create BytesMut from which I can then get mut pointer. I think those conversion are problematic, because in debugger info ptr contains a diffrent memory adress than ptr field of file.
Using *FILE got from libc::fopen() for the same file does not result in segfault. So the problem is somwhere around fmemopen().
The ecCodes library is correctly built (passes all tests and works in C) and linked (the calls in callstack are correct).
The full extracted example:
#![allow(unused)]
#![allow(non_camel_case_types)]
use bytes::{Bytes, BytesMut};
use libc::{c_char, c_void, fmemopen, size_t, FILE};
use reqwest;
use tokio;
// generated by bindgen
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct codes_handle {
_unused: [u8; 0],
}
// generated by bindgen
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct codes_context {
_unused: [u8; 0],
}
#[tokio::main]
async fn main() {
// download the grib file from server
// then get response as bytes
let url = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/gfs.20210612/00/atmos/gfs.t00z.pgrb2.1p00.f000";
let file = reqwest::get(url).await.unwrap().bytes().await.unwrap();
// get Bytes from *FILE with fmemopen
// file must outlive the pointer so it is borrowed here
let file_handle = open_with_fmemopen(&file);
let grib_handle = open_with_codes(file_handle);
}
pub fn open_with_fmemopen(file: &Bytes) -> *mut FILE {
// size of buffer and mode to be read with
let size = file.len() as size_t;
let mode = "r".as_ptr() as *const c_char;
// get a *mut c_void pointer fom Bytes
let mut buf = BytesMut::from(file.as_ref());
let ptr = buf.as_mut_ptr();
let ptr = ptr as *mut c_void;
// get *FILE with fmemopen
let obj;
unsafe {
obj = fmemopen(ptr, size, mode);
}
obj
}
pub fn open_with_codes(file_handle: *mut FILE) -> *mut codes_handle {
// default context for ecCodes
let context: *mut codes_context = std::ptr::null_mut();
// variable to hold error code
let mut error: i32 = 0;
// get codes_handle from *FILE
let grib_handle;
unsafe {
// segmentation fault occurs here
grib_handle = codes_grib_handle_new_from_file(context, file_handle, &mut error as *mut i32);
}
grib_handle
}
// binding to ecCodes C library
#[link(name = "eccodes")]
extern "C" {
pub fn codes_grib_handle_new_from_file(
c: *mut codes_context,
f: *mut FILE,
error: *mut i32,
) -> *mut codes_handle;
}
And because the example might require considerable effort to set up I also attach the call stack from GDB of the seg fault:
__memmove_avx_unaligned_erms 0x00007f738b415fa6
fmemopen_read 0x00007f738b31c9b4
_IO_new_file_underflow 0x00007f738b31fd51
__GI___underflow 0x00007f738b32142e
__GI___underflow 0x00007f738b32142e
__GI__IO_default_xsgetn 0x00007f738b32142e
__GI__IO_fread 0x00007f738b312493
stdio_read 0x00007f738bb8db37
_read_any 0x00007f738bb8cf1b
read_any 0x00007f738bb8cfa3
_wmo_read_any_from_file_malloc 0x00007f738bb8e6f7
wmo_read_grib_from_file_malloc 0x00007f738bb8e7d7
grib_handle_new_from_file_no_multi 0x00007f738bb872a2
grib_new_from_file 0x00007f738bb8678f
grib_handle_new_from_file 0x00007f738bb85998
codes_grib_handle_new_from_file 0x00007f738bb8532b
example::open_with_codes main.rs:68
example::main::{{closure}} main.rs:34
core::future::from_generator::{{impl}}::poll<generator-0> mod.rs:80
tokio::park::thread::{{impl}}::block_on::{{closure}}<core::future::from_generator::GenFuture<generator-0>> thread.rs:263
tokio::coop::with_budget::{{closure}}<core::task::poll::Poll<()>,closure-0> coop.rs:106
std::thread::local::LocalKey<core::cell::Cell<tokio::coop::Budget>>::try_with<core::cell::Cell<tokio::coop::Budget>,closure-0,core::task::poll::Poll<()>> local.rs:272
std::thread::local::LocalKey<core::cell::Cell<tokio::coop::Budget>>::with<core::cell::Cell<tokio::coop::Budget>,closure-0,core::task::poll::Poll<()>> local.rs:248
tokio::coop::with_budget<core::task::poll::Poll<()>,closure-0> coop.rs:99
tokio::coop::budget<core::task::poll::Poll<()>,closure-0> coop.rs:76
tokio::park::thread::CachedParkThread::block_on<core::future::from_generator::GenFuture<generator-0>> thread.rs:263
tokio::runtime::enter::Enter::block_on<core::future::from_generator::GenFuture<generator-0>> enter.rs:151
tokio::runtime::thread_pool::ThreadPool::block_on<core::future::from_generator::GenFuture<generator-0>> mod.rs:71
tokio::runtime::Runtime::block_on<core::future::from_generator::GenFuture<generator-0>> mod.rs:452
example::main main.rs:34
core::ops::function::FnOnce::call_once<fn(),()> function.rs:227
std::sys_common::backtrace::__rust_begin_short_backtrace<fn(),()> backtrace.rs:125
std::rt::lang_start::{{closure}}<()> rt.rs:66
core::ops::function::impls::{{impl}}::call_once<(),Fn<()>> function.rs:259
std::panicking::try::do_call<&Fn<()>,i32> panicking.rs:379
std::panicking::try<i32,&Fn<()>> panicking.rs:343
std::panic::catch_unwind<&Fn<()>,i32> panic.rs:431
std::rt::lang_start_internal rt.rs:51
std::rt::lang_start<()> rt.rs:65
main 0x0000560f1d93c76c
__libc_start_main 0x00007f738b2bb565
_start 0x0000560f1d935f0e
1 From bytes crate, not std::io
2 grib_handle returned by the function is just an alias of codes_handle

1- Try changing
let mode = "r".as_ptr() as *const c_char;
to
let mode = "r\0".as_ptr() as *const c_char;
Rust's &str is not null-terminated, while you're passing it to C where string literals are expected to be null-terminated.
2- Try the following implementation for open_with_fmemopen:
pub fn open_with_fmemopen(file: &Bytes) -> *mut FILE {
unsafe {
let obj = fmemopen(file.as_ref() as *const _ as _, file.len(), "r\0".as_ptr() as _);
obj
}
}

Related

Make Rust code more DRY, calling C bindings is always almost identical

Out of a C header file I successfully created Rust bindings with bindgen.
It is used to read many different data out of a byte array.
Each property has its own handle that has to be created in the first step and then initialize in the second step.
Because of the always same steps the code became long and unclear.
let mut handle_a: SDHandleA = std::ptr::null_mut();
unsafe {
SDCreateHandleA(&mut handle_a);
SDGetPropertyHandleA(base_handle, SD_PROPERTY_NAME_HANDLE_A, handle_a);
}
let mut handle_b: SDHandleB = std::ptr::null_mut();
unsafe {
SDCreateHandleB(&mut handle_b);
SDGetPropertyHandleB(base_handle, SD_PROPERTY_NAME_HANDLE_B, handle_b);
}
let mut handle_c: SDHandleC = std::ptr::null_mut();
unsafe {
SDCreateHandleC(&mut handle_c);
SDGetPropertyHandleC(base_handle, SD_PROPERTY_NAME_HANDLE_C, handle_c);
}
let mut handle_d: SDHandleD = std::ptr::null_mut();
unsafe {
SDCreateHandleD(&mut handle_d);
SDGetPropertyHandleD(base_handle, SD_PROPERTY_NAME_HANDLE_D, handle_d);
}
All SDHandle.. are simply type aliases of *mut ::std::os::raw::c_void.
pub type SDHandle = *mut ::std::os::raw::c_void;
pub type SDHandleA = SDHandle;
pub type SDHandleB = SDHandle;
pub type SDHandleC = SDHandle;
pub type SDHandleD = SDHandle;
As Rust is capable of higher order functions, I've tried a creation-method:
fn create_handle<F, G, H>(
target_type: F,
create_step: G,
init_step: H,
base_handle: c_void,
property: *mut i8
) -> F
where
F: SDHandleA,
G: Fn(F),
H: Fn(c_void, *mut i8, F) {
..
..
}
And I called it with:
let mut handle_a: SDHandleA = create_handle(SDHandleA, SDCreateHandleA, SDGetPropertyHandleA, base_handle, SD_PROPERTY_NAME_HANDLE_A);
But Rust complains:
error[E0404]: expected trait, found type alias `SDHandleA`
--> src/lib.rs:379:12
|
123 | F: SDHandleA,
| ^^^^^^^^^ type aliases cannot be used as traits
That's right. SDHandleA is not a trait.
Is there any way to achieve this?
I would do something like this:
unsafe fn create_handle(
creator: unsafe extern "C" fn(*mut SDHandle),
initializer: unsafe extern "C" fn(c_void, *mut c_char, SDHandle),
base_handle: c_void,
property: *mut c_char
) -> SDHandle {
let mut handle: SDHandle = std::ptr::null_mut();
unsafe {
creator(&mut handle);
initializer(base_handle, property, handle);
}
handle
}
Note that type aliases type X = Y do NOT count as separate types. They are just another name for the same type. So SDHandleA, etc are the same type as SDHandle. This means you do not need a generic for the handle output.
If you want to have separate types, you should investigate the newtype pattern.

If an ffi function modifies a pointer, should the owning struct be referenced mutable?

I am currently experimenting with the FFI functionality of Rust and implemented a simble HTTP request using libcurl as an exercise. Consider the following self-contained example:
use std::ffi::c_void;
#[repr(C)]
struct CURL {
_private: [u8; 0],
}
// Global CURL codes
const CURL_GLOBAL_DEFAULT: i64 = 3;
const CURLOPT_WRITEDATA: i64 = 10001;
const CURLOPT_URL: i64 = 10002;
const CURLOPT_WRITEFUNCTION: i64 = 20011;
// Curl types
type CURLcode = i64;
type CURLoption = i64;
// Curl function bindings
#[link(name = "curl")]
extern "C" {
fn curl_easy_init() -> *mut CURL;
fn curl_easy_setopt(handle: *mut CURL, option: CURLoption, value: *mut c_void) -> CURLcode;
fn curl_easy_perform(handle: *mut CURL) -> CURLcode;
fn curl_global_init(flags: i64) -> CURLcode;
}
// Curl callback for data retrieving
extern "C" fn callback_writefunction(
data: *mut u8,
size: usize,
nmemb: usize,
user_data: *mut c_void,
) -> usize {
let slice = unsafe { std::slice::from_raw_parts(data, size * nmemb) };
let mut vec = unsafe { Box::from_raw(user_data as *mut Vec<u8>) };
vec.extend_from_slice(slice);
Box::into_raw(vec);
nmemb * size
}
type Result<T> = std::result::Result<T, CURLcode>;
// Our own curl handle
pub struct Curl {
handle: *mut CURL,
data_ptr: *mut Vec<u8>,
}
impl Curl {
pub fn new() -> std::result::Result<Curl, CURLcode> {
let ret = unsafe { curl_global_init(CURL_GLOBAL_DEFAULT) };
if ret != 0 {
return Err(ret);
}
let handle = unsafe { curl_easy_init() };
if handle.is_null() {
return Err(2); // CURLE_FAILED_INIT according to libcurl-errors(3)
}
// Set data callback
let ret = unsafe {
curl_easy_setopt(
handle,
CURLOPT_WRITEFUNCTION,
callback_writefunction as *mut c_void,
)
};
if ret != 0 {
return Err(2);
}
// Set data pointer
let data_buf = Box::new(Vec::new());
let data_ptr = Box::into_raw(data_buf);
let ret = unsafe {
curl_easy_setopt(handle, CURLOPT_WRITEDATA, data_ptr as *mut std::ffi::c_void)
};
match ret {
0 => Ok(Curl { handle, data_ptr }),
_ => Err(2),
}
}
pub fn set_url(&self, url: &str) -> Result<()> {
let url_cstr = std::ffi::CString::new(url.as_bytes()).unwrap();
let ret = unsafe {
curl_easy_setopt(
self.handle,
CURLOPT_URL,
url_cstr.as_ptr() as *mut std::ffi::c_void,
)
};
match ret {
0 => Ok(()),
x => Err(x),
}
}
pub fn perform(&self) -> Result<String> {
let ret = unsafe { curl_easy_perform(self.handle) };
if ret == 0 {
let b = unsafe { Box::from_raw(self.data_ptr) };
let data = (*b).clone();
Box::into_raw(b);
Ok(String::from_utf8(data).unwrap())
} else {
Err(ret)
}
}
}
fn main() -> Result<()> {
let my_curl = Curl::new().unwrap();
my_curl.set_url("https://www.example.com")?;
my_curl.perform().and_then(|data| Ok(println!("{}", data)))
// No cleanup code in this example for the sake of brevity.
}
While this works, I found it surprising that my_curl does not need to be declared mut, since none of the methods use &mut self, even though they pass a mut* pointer to the FFI function
s.
Should I change the declaration of perform to use &mut self instead of &self (for safety), since the internal buffer gets modified? Rust does not enforce this, but of course Rust does not know that the buffer gets modified by libcurl.
This small example runs fine, but I am unsure if I would be facing any kind of issues in larger programs, when the compiler might optimize for non-mutable access on the Curl struct, even though the instance of the struct is getting modified - or at least the data the pointer is pointing to.
Contrary to popular belief, there is absolutely no borrowchecker-induced restriction in Rust on passing *const/*mut pointers. There doesn't need to be, because dereferencing pointers is inherently unsafe, and can only be done in such blocks, with the programmer verifying all necessary invariants manually. In your case, you need to tell the compiler that is a mutable reference, as you already suspected.
The interested reader should definitely give the ffi section of the nomicon a read, to find out about some interesting ways to shoot yourself in the foot with it.

How do I convert *mut *mut c_void to &str without Box::from_raw?

I've been playing around with writing Redis Modules in Rust. This is my first attempt at using Rust FFI and bindings. How do I call this method and end up with a data value in Rust without destroying the Redis pointer?
extern "C" {
pub static mut RedisModule_GetTimerInfo: ::std::option::Option<
unsafe extern "C" fn(
ctx: *mut RedisModuleCtx,
id: RedisModuleTimerID,
remaining: *mut u64,
data: *mut *mut ::std::os::raw::c_void,
) -> ::std::os::raw::c_int,
>;
}
See the RedisModule_GetTimerInfo API Docs for more details.
I ended up getting this to work, but it throws an error if I call it with the same id twice:
let mut ms = 0 as u64;
let val = "";
let ptr = Box::into_raw(Box::new(&mut val)) as *mut *mut c_void;
let ok = unsafe { RedisModule_GetTimerInfo.unwrap()(ctx, id, &mut ms, ptr) };
let mut data: Option<String> = None;
if ok == 0 {
let val = unsafe { Box::from_raw(*ptr as *mut &str) };
// trim nul bytes
data = Some(val.trim_matches(char::from(0)).to_string());
}
This didn't work because of how Box::from_raw owns the raw pointer and the pointer is destroyed when the box is dropped.
I tried countless ways to make this work without using Box::into_raw & Box::from_raw and all of times they either end up crashing Redis or end up as a pointer that I don't know how to convert to &str.
Update: I originally had an example of using RedisModule_StopTimer which was a mistake. Corrected to use the method I was asking about.
I'm one of the maintainers of the redismodule-rs crate, which provides a high-level Rust API for writing Redis modules.
Prompted by your question, I looked into adding these timer APIs to the crate in a safe manner, and will push the code to the repo once I'm done with it.
The following code shows how to retrieve the data safely:
// Create local variables to hold the returned values
let mut remaining: u64 = 0;
let mut data: *mut c_void = std::ptr::null_mut();
// Call the API and retrieve the values into the local variables
let status = unsafe {
RedisModule_GetTimerInfo.unwrap()(ctx, timer_id, &mut remaining, &mut data)
};
if status == REDISMODULE_OK {
// Cast the *mut c_void supplied by the Redis API to
// a raw pointer of our custom type:
let data = data as *mut T; // T is the type of the data, e.g. String
// Dereference the raw pointer (we know this is safe,
// since Redis should return our original pointer which
// we know to be good), and turn in into a safe reference:
let data = unsafe { &*data };
println!("Remaining: {}, data: {:?}", remaining, data);
}
Using one of the links #Shepmaster added, I was finally able to figure this out. I swear I tried some variation of this but didn't think to try double boxing...
Here's what I did:
let val = Box::new(Box::new("") as Box<&str>);
let ptr = Box::into_raw(val);
let ok = unsafe { RedisModule_GetTimerInfo.unwrap()(ctx, id, &mut ms, ptr as *mut *mut c_void) };
let mut data: Option<String> = None;
if ok == 0 {
let val = unsafe {**ptr as &str};
data = Some(val.trim_matches(char::from(0)).to_string());
}
Thanks all for your help!

How to use ioctl + nix macros to get a variable size buffer

This is related to How to use nix's ioctl? but it is not the same question.
I want to retrieve a variable size buffer. There is another ioctl that tells me that I need to read X bytes. The C header tells me the following too:
#define HID_MAX_DESCRIPTOR_SIZE 4096
#define HIDIOCGRDESC _IOR('H', 0x02, struct hidraw_report_descriptor)
struct hidraw_report_descriptor {
__u32 size;
__u8 value[HID_MAX_DESCRIPTOR_SIZE];
};
I define the macro in the following way:
ioctl_read_buf!(hid_read_descr, b'H', 0x02, u8);
And later call:
let mut desc_raw = [0u8; 4 + 4096];
let err = unsafe { hid_read_descr(file.as_raw_fd(), &mut desc_raw); };
When doing this, desc_raw is full of zeros. I would have expected the first 4 bytes to contain size based on the struct definition.
The alternative, does not seem to work either
ioctl_read!(hid_read_descr2, b'H', 0x02, [u8; 4+4096]);
// ...
let mut desc_raw = [0xFFu8; 4 + 4096];
let err = unsafe { hid_read_descr2(file.as_raw_fd(), &mut desc_raw); };
In both cases, I have tried initializing desc_raw with 0xFF and after the call, it seems untouched.
Am I using the ioctl_read_buf macro incorrectly?
Now that Digikata has thoughtfully provided enough code to drive the program...
Am I using the ioctl_read_buf macro incorrectly?
I'd say that using it at all is incorrect here. You don't want to read an array of data, you want to read a single instance of a specific type. That's what ioctl_read! is for.
We define a repr(C) struct that mimics the C definition. This ensures that important details like alignment, padding, field ordering, etc., all match one-to-one with the code we are calling.
We can then construct an uninitialized instance of this struct and pass it to the newly-defined function.
use libc; // 0.2.66
use nix::ioctl_read; // 0.16.1
use std::{
fs::OpenOptions,
mem::MaybeUninit,
os::unix::{fs::OpenOptionsExt, io::AsRawFd},
};
const HID_MAX_DESCRIPTOR_SIZE: usize = 4096;
#[repr(C)]
pub struct hidraw_report_descriptor {
size: u32,
value: [u8; HID_MAX_DESCRIPTOR_SIZE],
}
ioctl_read!(hid_read_sz, b'H', 0x01, libc::c_int);
ioctl_read!(hid_read_descr, b'H', 0x02, hidraw_report_descriptor);
fn main() -> Result<(), Box<dyn std::error::Error>> {
let file = OpenOptions::new()
.read(true)
.write(true)
.custom_flags(libc::O_NONBLOCK)
.open("/dev/hidraw0")?;
unsafe {
let fd = file.as_raw_fd();
let mut size = 0;
hid_read_sz(fd, &mut size)?;
println!("{}", size);
let mut desc_raw = MaybeUninit::<hidraw_report_descriptor>::uninit();
(*desc_raw.as_mut_ptr()).size = size as u32;
hid_read_descr(file.as_raw_fd(), desc_raw.as_mut_ptr())?;
let desc_raw = desc_raw.assume_init();
let data = &desc_raw.value[..desc_raw.size as usize];
println!("{:02x?}", data);
}
Ok(())
}
I think you've got a couple of issues here. Some on the Rust side, and some with using the HIDIOCGRDESC ioctl incorrectly. If you look in a Linux kernel distribution at the hidraw.txt and hid-example.c code, the use of the struct is as follows:
struct hidraw_report_descriptor rpt_desc;
memset(&rpt_desc, 0x0, sizeof(rpt_desc));
/* Get Report Descriptor */
rpt_desc.size = desc_size;
res = ioctl(fd, HIDIOCGRDESC, &rpt_desc);
desc_size comes from a previous HIDIOCGRDESCSIZE ioctl call. Unless I fill in the correct size parameter, the ioctl returns an error (ENOTTY or EINVAL).
There are also issues with passing the O_NONBLOCK flag to open a HID device without using libc::open. I ended up with this:
#[macro_use]
extern crate nix;
extern crate libc;
ioctl_read!(hid_read_sz, b'H', 0x01, i32);
ioctl_read_buf!(hid_read_descr, b'H', 0x02, u8);
fn main() {
// see /usr/include/linux/hidraw.h
// and hid-example.c
extern crate ffi;
use std::ffi::CString;
let fname = CString::new("/dev/hidraw0").unwrap();
let fd = unsafe { libc::open(fname.as_ptr(), libc::O_NONBLOCK | libc::O_RDWR) };
let mut sz = 0i32;
let err = unsafe { hid_read_sz(fd, &mut sz) };
println!("{:?} size is {:?}", err, sz);
let mut desc_raw = [0x0u8; 4 + 4096];
// sz on my system ended up as 52 - this handjams in the value
// w/ a little endian swizzle into the C struct .size field, but
// really we should properly define the struct
desc_raw[0] = sz as u8;
let err = unsafe { hid_read_descr(fd, &mut desc_raw) };
println!("{:?}", err);
for (i, &b) in desc_raw.iter().enumerate() {
if b != 0 {
println!("{:4} {:?}", i, b);
}
}
}
In the end, you shouldn't be sizing the struct to a variable size, the ioctl header indicates there is a fixed max expected. The variability is all on the system ioctl to deal with, it just needs the expected size hint from another ioctl call.

How to read a struct from a file in Rust?

Is there a way I can read a structure directly from a file in Rust? My code is:
use std::fs::File;
struct Configuration {
item1: u8,
item2: u16,
item3: i32,
item4: [char; 8],
}
fn main() {
let file = File::open("config_file").unwrap();
let mut config: Configuration;
// How to read struct from file?
}
How would I read my configuration directly into config from the file? Is this even possible?
Here you go:
use std::io::Read;
use std::mem;
use std::slice;
#[repr(C, packed)]
#[derive(Debug, Copy, Clone)]
struct Configuration {
item1: u8,
item2: u16,
item3: i32,
item4: [char; 8],
}
const CONFIG_DATA: &[u8] = &[
0xfd, // u8
0xb4, 0x50, // u16
0x45, 0xcd, 0x3c, 0x15, // i32
0x71, 0x3c, 0x87, 0xff, // char
0xe8, 0x5d, 0x20, 0xe7, // char
0x5f, 0x38, 0x05, 0x4a, // char
0xc4, 0x58, 0x8f, 0xdc, // char
0x67, 0x1d, 0xb4, 0x64, // char
0xf2, 0xc5, 0x2c, 0x15, // char
0xd8, 0x9a, 0xae, 0x23, // char
0x7d, 0xce, 0x4b, 0xeb, // char
];
fn main() {
let mut buffer = CONFIG_DATA;
let mut config: Configuration = unsafe { mem::zeroed() };
let config_size = mem::size_of::<Configuration>();
unsafe {
let config_slice = slice::from_raw_parts_mut(&mut config as *mut _ as *mut u8, config_size);
// `read_exact()` comes from `Read` impl for `&[u8]`
buffer.read_exact(config_slice).unwrap();
}
println!("Read structure: {:#?}", config);
}
Try it here (Updated for Rust 1.38)
You need to be careful, however, as unsafe code is, well, unsafe. After the slice::from_raw_parts_mut() invocation, there exist two mutable handles to the same data at the same time, which is a violation of Rust aliasing rules. Therefore you would want to keep the mutable slice created out of a structure for the shortest possible time. I also assume that you know about endianness issues - the code above is by no means portable, and will return different results if compiled and run on different kinds of machines (ARM vs x86, for example).
If you can choose the format and you want a compact binary one, consider using bincode. Otherwise, if you need e.g. to parse some pre-defined binary structure, byteorder crate is the way to go.
As Vladimir Matveev mentions, using the byteorder crate is often the best solution. This way, you account for endianness issues, don't have to deal with any unsafe code, or worry about alignment or padding:
use byteorder::{LittleEndian, ReadBytesExt}; // 1.2.7
use std::{
fs::File,
io::{self, Read},
};
struct Configuration {
item1: u8,
item2: u16,
item3: i32,
}
impl Configuration {
fn from_reader(mut rdr: impl Read) -> io::Result<Self> {
let item1 = rdr.read_u8()?;
let item2 = rdr.read_u16::<LittleEndian>()?;
let item3 = rdr.read_i32::<LittleEndian>()?;
Ok(Configuration {
item1,
item2,
item3,
})
}
}
fn main() {
let file = File::open("/dev/random").unwrap();
let config = Configuration::from_reader(file);
// How to read struct from file?
}
I've ignored the [char; 8] for a few reasons:
Rust's char is a 32-bit type and it's unclear if your file has actual Unicode code points or C-style 8-bit values.
You can't easily parse an array with byteorder, you have to parse N values and then build the array yourself.
The following code does not take into account any endianness or padding issues and is intended to be used with POD types. struct Configuration should be safe in this case.
Here is a function that can read a struct (of a POD type) from a file:
use std::io::{self, Read};
use std::slice;
fn read_struct<T, R: Read>(mut read: R) -> io::Result<T> {
let num_bytes = ::std::mem::size_of::<T>();
unsafe {
let mut s = ::std::mem::uninitialized();
let buffer = slice::from_raw_parts_mut(&mut s as *mut T as *mut u8, num_bytes);
match read.read_exact(buffer) {
Ok(()) => Ok(s),
Err(e) => {
::std::mem::forget(s);
Err(e)
}
}
}
}
// use
// read_struct::<Configuration>(reader)
If you want to read a sequence of structs from a file, you can execute read_struct multiple times or read all the file at once:
use std::fs::{self, File};
use std::io::BufReader;
use std::path::Path;
fn read_structs<T, P: AsRef<Path>>(path: P) -> io::Result<Vec<T>> {
let path = path.as_ref();
let struct_size = ::std::mem::size_of::<T>();
let num_bytes = fs::metadata(path)?.len() as usize;
let num_structs = num_bytes / struct_size;
let mut reader = BufReader::new(File::open(path)?);
let mut r = Vec::<T>::with_capacity(num_structs);
unsafe {
let buffer = slice::from_raw_parts_mut(r.as_mut_ptr() as *mut u8, num_bytes);
reader.read_exact(buffer)?;
r.set_len(num_structs);
}
Ok(r)
}
// use
// read_structs::<StructName, _>("path/to/file"))

Resources