How to add partition in kafka rust configuratgion - rust

I want to config this file to add a number of partition option here as by default it is creating only 1 partition , but I need 10 for my data .
I dont have much knowledge of rdkafka library in rust , as I am directly using this plugin file
Can anyone guide me where can I find solution to this or what direction .
Thanks
use rdkafka::error::{KafkaError};
use rdkafka::{ClientConfig};
use rdkafka::producer::{FutureProducer, FutureRecord};
use std::fmt::Error;
use std::os::raw::{c_char, c_int, c_void};
use std::sync::mpsc::TrySendError;
use suricata::conf::ConfNode;
use suricata::{SCLogError, SCLogNotice};
const DEFAULT_BUFFER_SIZE: &str = "65535";
const DEFAULT_CLIENT_ID: &str = "rdkafka";
#[derive(Debug, Clone)]
struct ProducerConfig {
brokers: String,
topic: String,
client_id: String,
buffer: usize,
}
impl ProducerConfig {
fn new(conf: &ConfNode) -> Result<Self,Error> {
let brokers = if let Some(val) = conf.get_child_value("brokers"){
val.to_string()
}else {
SCLogError!("brokers parameter required!");
panic!();
};
let topic = if let Some(val) = conf.get_child_value("topic"){
val.to_string()
}else {
SCLogError!("topic parameter required!");
panic!();
};
let client_id = conf.get_child_value("client-id").unwrap_or(DEFAULT_CLIENT_ID);
let buffer_size = match conf
.get_child_value("buffer-size")
.unwrap_or(DEFAULT_BUFFER_SIZE)
.parse::<usize>()
{
Ok(size) => size,
Err(_) => {
SCLogError!("invalid buffer-size!");
panic!();
},
};
let config = ProducerConfig {
brokers: brokers.into(),
topic: topic.into(),
client_id: client_id.into(),
buffer: buffer_size,
};
Ok(config)
}
}
struct KafkaProducer {
producer: FutureProducer,
config: ProducerConfig,
rx: std::sync::mpsc::Receiver<String>,
count: usize,
}
impl KafkaProducer {
fn new(
config: ProducerConfig,
rx: std::sync::mpsc::Receiver<String>,
) -> Result<Self,KafkaError> {
let producer: FutureProducer = ClientConfig::new()
.set("bootstrap.servers", &config.brokers)
.set("client.id",&config.client_id)
.set("message.timeout.ms", "5000")
.create()?;
Ok(Self {
config,
producer,
rx,
count: 0,
})
}
fn run(&mut self) {
// Get a peekable iterator from the incoming channel. This allows us to
// get the next message from the channel without removing it, we can
// then remove it once its been sent to the server without error.
//
// Not sure how this will work with pipe-lining tho, will probably have
// to do some buffering here, or just accept that any log records
// in-flight will be lost.
let mut iter = self.rx.iter().peekable();
loop {
if let Some(buf) = iter.peek() {
self.count += 1;
if let Err(err) = self.producer.send_result(
FutureRecord::to(&self.config.topic)
.key("")
.payload(&buf),
) {
SCLogError!("Failed to send event to Kafka: {:?}", err);
break;
} else {
// Successfully sent. Pop it off the channel.
let _ = iter.next();
}
} else {
break;
}
}
SCLogNotice!("Producer finished: count={}", self.count,);
}
}
struct Context {
tx: std::sync::mpsc::SyncSender<String>,
count: usize,
dropped: usize,
}
unsafe extern "C" fn output_open(conf: *const c_void, init_data: *mut *mut c_void) -> c_int {
// Load configuration.
let config = ProducerConfig::new(&ConfNode::wrap(conf)).unwrap();
let (tx, rx) = std::sync::mpsc::sync_channel(config.buffer);
let mut kafka_producer = match KafkaProducer::new(config, rx) {
Ok(producer) => {
SCLogNotice!(
"KafKa Producer initialize success with brokers:{:?} | topic: {:?} | client_id: {:?} | buffer-size: {:?}",
producer.config.brokers,
producer.config.topic,
producer.config.client_id,
producer.config.buffer
);
producer
}
Err(err) => {
SCLogError!("Failed to initialize Kafka Producer: {:?}", err);
panic!()
}
};
let context = Context {
tx,
count: 0,
dropped: 0,
};
std::thread::spawn(move || {kafka_producer.run()});
// kafka_producer.run();
*init_data = Box::into_raw(Box::new(context)) as *mut _;
0
}
unsafe extern "C" fn output_close(init_data: *const c_void) {
let context = Box::from_raw(init_data as *mut Context);
SCLogNotice!(
"Kafka produce finished: count={}, dropped={}",
context.count,
context.dropped
);
std::mem::drop(context);
}
unsafe extern "C" fn output_write(
buffer: *const c_char,
buffer_len: c_int,
init_data: *const c_void,
) -> c_int {
let context = &mut *(init_data as *mut Context);
let buf = if let Ok(buf) = ffi::str_from_c_parts(buffer, buffer_len) {
buf
} else {
return -1;
};
context.count += 1;
if let Err(err) = context.tx.try_send(buf.to_string()) {
context.dropped += 1;
match err {
TrySendError::Full(_) => {
SCLogError!("Eve record lost due to full buffer");
}
TrySendError::Disconnected(_) => {
SCLogError!("Eve record lost due to broken channel{}",err);
}
}
}
00
}
unsafe extern "C" fn init_plugin() {
let file_type =
ffi::SCPluginFileType::new("kafka", output_open, output_close, output_write);
ffi::SCPluginRegisterFileType(file_type);
}
#[no_mangle]
extern "C" fn SCPluginRegister() -> *const ffi::SCPlugin {
// Rust plugins need to initialize some Suricata internals so stuff like logging works.
suricata::plugin::init();
// Register our plugin.
ffi::SCPlugin::new("Kafka Eve Filetype", "GPL-2.0", "1z3r0", init_plugin)
}

Related

Future created by async block is not `Send` because of *mut u8 [duplicate]

This question already has an answer here:
How to send a pointer to another thread?
(1 answer)
Closed 5 months ago.
I was able to proceed forward to implement my asynchronous udp server. However I have this error showing up twice because my variable data has type *mut u8 which is not Send:
error: future cannot be sent between threads safely
help: within `impl std::future::Future`, the trait `std::marker::Send` is not implemented for `*mut u8`
note: captured value is not `Send`
And the code (MRE):
use std::error::Error;
use std::time::Duration;
use std::env;
use tokio::net::UdpSocket;
use tokio::{sync::mpsc, task, time}; // 1.4.0
use std::alloc::{alloc, Layout};
use std::mem;
use std::mem::MaybeUninit;
use std::net::SocketAddr;
const UDP_HEADER: usize = 8;
const IP_HEADER: usize = 20;
const AG_HEADER: usize = 4;
const MAX_DATA_LENGTH: usize = (64 * 1024 - 1) - UDP_HEADER - IP_HEADER;
const MAX_CHUNK_SIZE: usize = MAX_DATA_LENGTH - AG_HEADER;
const MAX_DATAGRAM_SIZE: usize = 0x10000;
/// A wrapper for [ptr::copy_nonoverlapping] with different argument order (same as original memcpy)
unsafe fn memcpy(dst_ptr: *mut u8, src_ptr: *const u8, len: usize) {
std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, len);
}
// Different from https://doc.rust-lang.org/std/primitive.u32.html#method.next_power_of_two
// Returns the [exponent] from the smallest power of two greater than or equal to n.
const fn next_power_of_two_exponent(n: u32) -> u32 {
return 32 - (n - 1).leading_zeros();
}
async fn run_server(socket: UdpSocket) {
let mut missing_indexes: Vec<u16> = Vec::new();
let mut peer_addr = MaybeUninit::<SocketAddr>::uninit();
let mut data = std::ptr::null_mut(); // ptr for the file bytes
let mut len: usize = 0; // total len of bytes that will be written
let mut layout = MaybeUninit::<Layout>::uninit();
let mut buf = [0u8; MAX_DATA_LENGTH];
let mut start = false;
let (debounce_tx, mut debounce_rx) = mpsc::channel::<(usize, SocketAddr)>(3300);
let (network_tx, mut network_rx) = mpsc::channel::<(usize, SocketAddr)>(3300);
loop {
// Listen for events
let debouncer = task::spawn(async move {
let duration = Duration::from_millis(3300);
loop {
match time::timeout(duration, debounce_rx.recv()).await {
Ok(Some((size, peer))) => {
eprintln!("Network activity");
}
Ok(None) => {
if start == true {
eprintln!("Debounce finished");
break;
}
}
Err(_) => {
eprintln!("{:?} since network activity", duration);
}
}
}
});
// Listen for network activity
let server = task::spawn({
// async{
let debounce_tx = debounce_tx.clone();
async move {
while let Some((size, peer)) = network_rx.recv().await {
// Received a new packet
debounce_tx.send((size, peer)).await.expect("Unable to talk to debounce");
eprintln!("Received a packet {} from: {}", size, peer);
let packet_index: u16 = (buf[0] as u16) << 8 | buf[1] as u16;
if start == false { // first bytes of a new file: initialization // TODO: ADD A MUTEX to prevent many initializations
start = true;
let chunks_cnt: u32 = (buf[2] as u32) << 8 | buf[3] as u32;
let n: usize = MAX_DATAGRAM_SIZE << next_power_of_two_exponent(chunks_cnt);
unsafe {
layout.as_mut_ptr().write(Layout::from_size_align_unchecked(n, mem::align_of::<u8>()));
// /!\ data has type `*mut u8` which is not `Send`
data = alloc(layout.assume_init());
peer_addr.as_mut_ptr().write(peer);
}
let a: Vec<u16> = vec![0; chunks_cnt as usize]; //(0..chunks_cnt).map(|x| x as u16).collect(); // create a sorted vector with all the required indexes
missing_indexes = a;
}
missing_indexes[packet_index as usize] = 1;
unsafe {
let dst_ptr = data.offset((packet_index as usize * MAX_CHUNK_SIZE) as isize);
memcpy(dst_ptr, &buf[AG_HEADER], size - AG_HEADER);
};
println!("receiving packet {} from: {}", packet_index, peer);
}
}
});
// Prevent deadlocks
drop(debounce_tx);
match socket.recv_from(&mut buf).await {
Ok((size, src)) => {
network_tx.send((size, src)).await.expect("Unable to talk to network");
}
Err(e) => {
eprintln!("couldn't recieve a datagram: {}", e);
}
}
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let addr = env::args().nth(1).unwrap_or_else(|| "127.0.0.1:8080".to_string());
let socket = UdpSocket::bind(&addr).await?;
println!("Listening on: {}", socket.local_addr()?);
run_server(socket);
Ok(())
}
Since I was converting from synchronous to asynchronous code I know that, potentially, multiple thread would be writing to data, and that is probably why I encounter such error. But I don't know which syntax I could use to "clone" the mut ptr and make it unique for each thread (and same for the buffer).
As suggested by user4815162342 I think the best would be
to make pointer Send by wrapping it in a struct and declaring unsafe impl Send for NewStruct {}.
Any help strongly appreciated!
PS: Full code can be found on my github repository
Short version
Thanks to the comment of user4815162342 I decided to add an implementation for the mut ptr to be able to use it with Send and Sync, which allowed me to solve this part (there are still other issues, but beyond the scope of this question):
pub struct FileBuffer {
data: *mut u8
}
unsafe impl Send for FileBuffer {}
unsafe impl Sync for FileBuffer {}
//let mut data = std::ptr::null_mut(); // ptr for the file bytes
let mut fileBuffer: FileBuffer = FileBuffer { data: std::ptr::null_mut() };
Long version
use std::error::Error;
use std::time::Duration;
use std::env;
use tokio::net::UdpSocket;
use tokio::{sync::mpsc, task, time}; // 1.4.0
use std::alloc::{alloc, Layout};
use std::mem;
use std::mem::MaybeUninit;
use std::net::SocketAddr;
const UDP_HEADER: usize = 8;
const IP_HEADER: usize = 20;
const AG_HEADER: usize = 4;
const MAX_DATA_LENGTH: usize = (64 * 1024 - 1) - UDP_HEADER - IP_HEADER;
const MAX_CHUNK_SIZE: usize = MAX_DATA_LENGTH - AG_HEADER;
const MAX_DATAGRAM_SIZE: usize = 0x10000;
/// A wrapper for [ptr::copy_nonoverlapping] with different argument order (same as original memcpy)
unsafe fn memcpy(dst_ptr: *mut u8, src_ptr: *const u8, len: usize) {
std::ptr::copy_nonoverlapping(src_ptr, dst_ptr, len);
}
// Different from https://doc.rust-lang.org/std/primitive.u32.html#method.next_power_of_two
// Returns the [exponent] from the smallest power of two greater than or equal to n.
const fn next_power_of_two_exponent(n: u32) -> u32 {
return 32 - (n - 1).leading_zeros();
}
pub struct FileBuffer {
data: *mut u8
}
unsafe impl Send for FileBuffer {}
unsafe impl Sync for FileBuffer {}
async fn run_server(socket: UdpSocket) {
let mut missing_indexes: Vec<u16> = Vec::new();
let mut peer_addr = MaybeUninit::<SocketAddr>::uninit();
//let mut data = std::ptr::null_mut(); // ptr for the file bytes
let mut fileBuffer: FileBuffer = FileBuffer { data: std::ptr::null_mut() };
let mut len: usize = 0; // total len of bytes that will be written
let mut layout = MaybeUninit::<Layout>::uninit();
let mut buf = [0u8; MAX_DATA_LENGTH];
let mut start = false;
let (debounce_tx, mut debounce_rx) = mpsc::channel::<(usize, SocketAddr)>(3300);
let (network_tx, mut network_rx) = mpsc::channel::<(usize, SocketAddr)>(3300);
loop {
// Listen for events
let debouncer = task::spawn(async move {
let duration = Duration::from_millis(3300);
loop {
match time::timeout(duration, debounce_rx.recv()).await {
Ok(Some((size, peer))) => {
eprintln!("Network activity");
}
Ok(None) => {
if start == true {
eprintln!("Debounce finished");
break;
}
}
Err(_) => {
eprintln!("{:?} since network activity", duration);
}
}
}
});
// Listen for network activity
let server = task::spawn({
// async{
let debounce_tx = debounce_tx.clone();
async move {
while let Some((size, peer)) = network_rx.recv().await {
// Received a new packet
debounce_tx.send((size, peer)).await.expect("Unable to talk to debounce");
eprintln!("Received a packet {} from: {}", size, peer);
let packet_index: u16 = (buf[0] as u16) << 8 | buf[1] as u16;
if start == false { // first bytes of a new file: initialization // TODO: ADD A MUTEX to prevent many initializations
start = true;
let chunks_cnt: u32 = (buf[2] as u32) << 8 | buf[3] as u32;
let n: usize = MAX_DATAGRAM_SIZE << next_power_of_two_exponent(chunks_cnt);
unsafe {
layout.as_mut_ptr().write(Layout::from_size_align_unchecked(n, mem::align_of::<u8>()));
// /!\ data has type `*mut u8` which is not `Send`
fileBuffer.data = alloc(layout.assume_init());
peer_addr.as_mut_ptr().write(peer);
}
let a: Vec<u16> = vec![0; chunks_cnt as usize]; //(0..chunks_cnt).map(|x| x as u16).collect(); // create a sorted vector with all the required indexes
missing_indexes = a;
}
missing_indexes[packet_index as usize] = 1;
unsafe {
let dst_ptr = fileBuffer.data.offset((packet_index as usize * MAX_CHUNK_SIZE) as isize);
memcpy(dst_ptr, &buf[AG_HEADER], size - AG_HEADER);
};
println!("receiving packet {} from: {}", packet_index, peer);
}
}
});
// Prevent deadlocks
drop(debounce_tx);
match socket.recv_from(&mut buf).await {
Ok((size, src)) => {
network_tx.send((size, src)).await.expect("Unable to talk to network");
}
Err(e) => {
eprintln!("couldn't recieve a datagram: {}", e);
}
}
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let addr = env::args().nth(1).unwrap_or_else(|| "127.0.0.1:8080".to_string());
let socket = UdpSocket::bind(&addr).await?;
println!("Listening on: {}", socket.local_addr()?);
run_server(socket);
Ok(())
}

rust bindgen unaligned tcache chunk error

I am working on binding for the avahi lib in rust, and I am encountering a runtime error:
malloc(): unaligned tcache chunk detected
the code at fault:
pub fn register_service(
&mut self,
name: String,
svc_type: String,
port: u16,
txt: &[String],
) -> Result<(), AvahiError> {
let group = match self.group {
Some(group) => group,
None => {
let group = unsafe {
ffi::avahi_entry_group_new(
self.client_inner,
Some(group_callback),
std::ptr::null_mut() as *mut c_void,
)
};
if group.is_null() {
return Err(AvahiError::GroupCreateError);
}
self.group.replace(group);
return self.register_service(name, svc_type, port, txt);
}
};
// avahi_entry_group_is_empty or any other function that uses group causes this error
if unsafe { ffi::avahi_entry_group_is_empty(group) != 0 } {
let name = CString::new(name).unwrap();
let svc_type = CString::new(svc_type).unwrap();
let ret = unsafe {
ffi::avahi_entry_group_add_service(
group,
ffi::AVAHI_IF_UNSPEC,
ffi::AVAHI_PROTO_UNSPEC,
0,
name.as_ptr(),
svc_type.as_ptr(),
std::ptr::null_mut(),
std::ptr::null_mut(),
port,
std::ptr::null_mut() as *mut i8,
)
};
if ret < 0 {
let msg = unsafe { ffi::avahi_strerror(ret) };
return Err(AvahiError::CreateService(unsafe {
CString::from_raw(msg as *mut i8)
.to_str()
.unwrap()
.to_owned()
}));
}
if unsafe { ffi::avahi_entry_group_commit(group) == 0 } {
return Err(AvahiError::EntryGroupCommit);
}
}
Ok(())
}
I am using this this example as reference, and I got it to work in C, so I think the error must be comming from the bindings. I am not sure to understand what this error is either.
What am I doing wrong?

Why is my Future implementation blocked after it is polled once and NotReady?

I implemented the future and made a request of it, but it blocked my curl and the log shows that poll was only invoked once.
Did I implement anything wrong?
use failure::{format_err, Error};
use futures::{future, Async};
use hyper::rt::Future;
use hyper::service::{service_fn, service_fn_ok};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use log::{debug, error, info};
use std::{
sync::{Arc, Mutex},
task::Waker,
thread,
};
pub struct TimerFuture {
shared_state: Arc<Mutex<SharedState>>,
}
struct SharedState {
completed: bool,
resp: String,
}
impl Future for TimerFuture {
type Item = Response<Body>;
type Error = hyper::Error;
fn poll(&mut self) -> futures::Poll<Response<Body>, hyper::Error> {
let mut shared_state = self.shared_state.lock().unwrap();
if shared_state.completed {
return Ok(Async::Ready(Response::new(Body::from(
shared_state.resp.clone(),
))));
} else {
return Ok(Async::NotReady);
}
}
}
impl TimerFuture {
pub fn new(instance: String) -> Self {
let shared_state = Arc::new(Mutex::new(SharedState {
completed: false,
resp: String::new(),
}));
let thread_shared_state = shared_state.clone();
thread::spawn(move || {
let res = match request_health(instance) {
Ok(status) => status.clone(),
Err(err) => {
error!("{:?}", err);
format!("{}", err)
}
};
let mut shared_state = thread_shared_state.lock().unwrap();
shared_state.completed = true;
shared_state.resp = res;
});
TimerFuture { shared_state }
}
}
fn request_health(instance_name: String) -> Result<String, Error> {
std::thread::sleep(std::time::Duration::from_secs(1));
Ok("health".to_string())
}
type BoxFut = Box<dyn Future<Item = Response<Body>, Error = hyper::Error> + Send>;
fn serve_health(req: Request<Body>) -> BoxFut {
let mut response = Response::new(Body::empty());
let path = req.uri().path().to_owned();
match (req.method(), path) {
(&Method::GET, path) => {
return Box::new(TimerFuture::new(path.clone()));
}
_ => *response.status_mut() = StatusCode::NOT_FOUND,
}
Box::new(future::ok(response))
}
fn main() {
let endpoint_addr = "0.0.0.0:8080";
match std::thread::spawn(move || {
let addr = endpoint_addr.parse().unwrap();
info!("Server is running on {}", addr);
hyper::rt::run(
Server::bind(&addr)
.serve(move || service_fn(serve_health))
.map_err(|e| eprintln!("server error: {}", e)),
);
})
.join()
{
Ok(e) => e,
Err(e) => println!("{:?}", e),
}
}
After compile and run this code, a server with port 8080 is running. Call the server with curl and it will block:
curl 127.0.0.1:8080/my-health-scope
Did I implement anything wrong?
Yes, you did not read and follow the documentation for the method you are implementing (emphasis mine):
When a future is not ready yet, the Async::NotReady value will be returned. In this situation the future will also register interest of the current task in the value being produced. This is done by calling task::park to retrieve a handle to the current Task. When the future is then ready to make progress (e.g. it should be polled again) the unpark method is called on the Task.
As a minimal, reproducible example, let's use this:
use futures::{future::Future, Async};
use std::{
mem,
sync::{Arc, Mutex},
thread,
time::Duration,
};
pub struct Timer {
data: Arc<Mutex<String>>,
}
impl Timer {
pub fn new(instance: String) -> Self {
let data = Arc::new(Mutex::new(String::new()));
thread::spawn({
let data = data.clone();
move || {
thread::sleep(Duration::from_secs(1));
*data.lock().unwrap() = instance;
}
});
Timer { data }
}
}
impl Future for Timer {
type Item = String;
type Error = ();
fn poll(&mut self) -> futures::Poll<Self::Item, Self::Error> {
let mut data = self.data.lock().unwrap();
eprintln!("poll was called");
if data.is_empty() {
Ok(Async::NotReady)
} else {
let data = mem::replace(&mut *data, String::new());
Ok(Async::Ready(data))
}
}
}
fn main() {
let v = Timer::new("Some text".into()).wait();
println!("{:?}", v);
}
It only prints out "poll was called" once.
You can call task::current (previously task::park) in the implementation of Future::poll, save the resulting value, then use the value with Task::notify (previously Task::unpark) whenever the future may be polled again:
use futures::{
future::Future,
task::{self, Task},
Async,
};
use std::{
mem,
sync::{Arc, Mutex},
thread,
time::Duration,
};
pub struct Timer {
data: Arc<Mutex<(String, Option<Task>)>>,
}
impl Timer {
pub fn new(instance: String) -> Self {
let data = Arc::new(Mutex::new((String::new(), None)));
let me = Timer { data };
thread::spawn({
let data = me.data.clone();
move || {
thread::sleep(Duration::from_secs(1));
let mut data = data.lock().unwrap();
data.0 = instance;
if let Some(task) = data.1.take() {
task.notify();
}
}
});
me
}
}
impl Future for Timer {
type Item = String;
type Error = ();
fn poll(&mut self) -> futures::Poll<Self::Item, Self::Error> {
let mut data = self.data.lock().unwrap();
eprintln!("poll was called");
if data.0.is_empty() {
let v = task::current();
data.1 = Some(v);
Ok(Async::NotReady)
} else {
let data = mem::replace(&mut data.0, String::new());
Ok(Async::Ready(data))
}
}
}
fn main() {
let v = Timer::new("Some text".into()).wait();
println!("{:?}", v);
}
See also:
Why does Future::select choose the future with a longer sleep period first?
Why is `Future::poll` not called repeatedly after returning `NotReady`?
What is the best approach to encapsulate blocking I/O in future-rs?

color_quant::NeuQuant compiled to WebAssembly outputs zero values

I am trying to load an image in the browser and use the NewQuant algorithm to quantize my image buffer it in Rust via WebAssembly. However, the NewQuant output contains zero values, regardless of what PNG I try to feed it.
I expose two Rust methods to WASM:
alloc for allocating a byte buffer
read_img which will read and process the img buffer
I know that I get zero values because I imported a JavaScript method called log_nr for logging simple u8 numbers. The buffer seems to contain valid pixel values.
extern crate color_quant;
extern crate image;
use color_quant::NeuQuant;
use image::{DynamicImage, GenericImage, Pixel, Rgb};
use std::collections::BTreeMap;
use std::mem;
use std::os::raw::c_void;
static NQ_SAMPLE_FACTION: i32 = 10;
static NQ_PALETTE_SIZE: usize = 256;
extern "C" {
fn log(s: &str, len: usize);
fn log_nr(nr: u8);
}
fn get_pixels(img: DynamicImage) -> Vec<u8> {
let mut pixels = Vec::new();
for (_, _, px) in img.pixels() {
let rgba = px.to_rgba();
for channel in px.channels() {
pixels.push(*channel);
}
}
pixels
}
#[no_mangle]
pub extern "C" fn alloc(size: usize) -> *mut c_void {
let mut buf = Vec::with_capacity(size);
let ptr = buf.as_mut_ptr();
mem::forget(buf);
return ptr as *mut c_void;
}
fn process_img(img: DynamicImage) {
let pixels: Vec<u8> = get_pixels(img);
let quantized = NeuQuant::new(NQ_SAMPLE_FACTION, NQ_PALETTE_SIZE, &pixels);
let q = quantized.color_map_rgb();
for c in &q {
unsafe {
log_nr(*c);
}
}
}
#[no_mangle]
pub extern "C" fn read_img(buff_ptr: *mut u8, buff_len: usize) {
let mut img: Vec<u8> = unsafe { Vec::from_raw_parts(buff_ptr, buff_len, buff_len) };
return match image::load_from_memory(&img) {
Ok(img) => {
process_img(img);
}
Err(err) => {
let err_msg: String = err.to_string().to_owned();
let mut ns: String = "[load_from_memory] ".to_owned();
ns.push_str(&err_msg);
unsafe {
log(&ns, ns.len());
}
}
};
}
fn main() {
println!("Hello from rust 2");
}
The JavaScript code is the following:
run('sample.png');
function run(img) {
return compile().then(m => {
return loadImgIntoMem(img, m.instance.exports.memory, m.instance.exports.alloc).then(r => {
return m.instance.exports.read_img(r.imgPtr, r.len);
});
})
}
function compile(wasmFile = 'distil_wasm.gc.wasm') {
return fetch(wasmFile)
.then(r => r.arrayBuffer())
.then(r => {
let module = new WebAssembly.Module(r);
let importObject = {}
for (let imp of WebAssembly.Module.imports(module)) {
if (typeof importObject[imp.module] === "undefined")
importObject[imp.module] = {};
switch (imp.kind) {
case "function": importObject[imp.module][imp.name] = () => {}; break;
case "table": importObject[imp.module][imp.name] = new WebAssembly.Table({ initial: 256, maximum: 256, element: "anyfunc" }); break;
case "memory": importObject[imp.module][imp.name] = new WebAssembly.Memory({ initial: 256 }); break;
case "global": importObject[imp.module][imp.name] = 0; break;
}
}
importObject.env = Object.assign({}, importObject.env, {
log: (ptr, len) => console.log(ptrToStr(ptr, len)),
log_nr: (nr) => console.log(nr),
});
return WebAssembly.instantiate(r, importObject);
});
}
function loadImgIntoMemEmscripten(img) {
return new Promise(resolve => {
fetch(img)
.then(r => r.arrayBuffer())
.then(buff => {
const imgPtr = Module._malloc(buff.byteLength);
const imgHeap = new Uint8Array(Module.HEAPU8.buffer, imgPtr, buff.byteLength);
imgHeap.set(new Uint8Array(buff));
resolve({ imgPtr });
});
});
}

What might cause a difficult-to-reproduce truncation of a Hyper HTTP response?

I am experiencing a bug where my Hyper HTTP response is being truncated to a specific size (7829 bytes). Making the same request with cURL works fine.
The request queries a JSON endpoint for data. The response struct is then shuffled around a lot, because a relatively complex rate-limiting procedure is used to make a number of these requests at once. However, if only one request is made, the response is still truncated.
Before implementing rate-limiting and doing some heavy refactoring, the program made these responses properly.
I made the minimal example below, but it fails to reproduce the problem. At this point I'm not sure where to look. The codebase is moderately complicated and iteratively expanding the reproduction example is difficult, especially when I don't know what might possibly cause this.
What are some ways that Hyper's Response body might get truncated? The response body is acquired as in the handle function below.
#![feature(use_nested_groups)]
extern crate futures;
extern crate hyper;
extern crate hyper_tls;
extern crate tokio_core;
use futures::{Future, Stream};
use hyper::{Body, Chunk, Client, Method, Request, Response};
use hyper_tls::HttpsConnector;
use tokio_core::reactor::Core;
use std::env;
fn main() {
let mut core = Core::new().unwrap();
let client = Client::configure()
.connector(HttpsConnector::new(4, &core.handle()).unwrap())
.build(&core.handle());
fn handle(response: Response<Body>) -> Box<Future<Item = usize, Error = hyper::Error>> {
Box::new(
response
.body()
.concat2()
.map(move |body: Chunk| -> usize { body.len() }),
)
}
let args: Vec<String> = env::args().collect();
let uri = &args[1];
let req = Request::new(Method::Get, uri.parse().unwrap());
let response_body_length = {
let future = Box::new(client.request(req).map(handle).flatten());
core.run(future).unwrap()
};
println!("response body length: {}", response_body_length);
}
Offending code:
extern crate serde;
extern crate serde_json;
use futures::{future, stream, Future, Stream};
use hyper;
use hyper::{client, Body, Chunk, Client, Headers, Method, Request, Response, header::Accept,
header::Date as DateHeader, header::RetryAfter};
use hyper_tls::HttpsConnector;
use tokio_core::reactor::Core;
use models::Bucket;
use std::thread;
use std::time::{Duration, UNIX_EPOCH};
use std::str;
header! { (XRateLimitRemaining, "x-ratelimit-remaining") => [String] }
#[derive(Debug)]
struct Uri(pub String);
const MAX_REQ_SIZE: u32 = 500;
fn make_uri(symbol: &str, page_ix: u32) -> Uri {
Uri(format!(
"https://www.bitmex.com/api/v1/trade/bucketed?\
symbol={symbol}&\
columns={columns}&\
partial=false&\
reverse=true&\
binSize={bin_size}&\
count={count}&\
start={start}",
symbol = symbol,
columns = "close,timestamp",
bin_size = "5m",
count = MAX_REQ_SIZE,
start = 0 + MAX_REQ_SIZE * page_ix
))
}
#[derive(Debug)]
struct RateLimitInfo {
remaining_reqs: u32,
retry_after: Option<Duration>,
}
impl RateLimitInfo {
fn default() -> RateLimitInfo {
RateLimitInfo {
remaining_reqs: 1,
retry_after: None,
}
}
fn from<T>(resp: &Response<T>) -> RateLimitInfo {
let headers = resp.headers();
let remaining_reqs = headers
.get::<XRateLimitRemaining>()
.unwrap_or_else(|| panic!("x-ratelimit-remaining not on request."))
.parse()
.unwrap();
let retry_after = match headers.get::<RetryAfter>() {
Some(RetryAfter::Delay(duration)) => Some(*duration),
_ => None,
};
RateLimitInfo {
remaining_reqs,
retry_after,
}
}
}
fn resp_dated_later<'a>(a: &'a Response<Body>, b: &'a Response<Body>) -> &'a Response<Body> {
let get_date = |resp: &Response<Body>| {
let headers: &Headers = resp.headers();
**headers.get::<DateHeader>().unwrap()
};
if get_date(&a) > get_date(&b) {
a
} else {
b
}
}
#[derive(Debug)]
struct Query {
uri: Uri,
response: Option<Response<Body>>,
}
impl Query {
fn from_uri(uri: Uri) -> Query {
Query {
uri: uri,
response: None,
}
}
}
fn query_good(q: &Query) -> bool {
match &q.response {
Some(response) => response.status().is_success(),
_ => false,
}
}
type HttpsClient = hyper::Client<HttpsConnector<client::HttpConnector>>;
type FutureQuery = Box<Future<Item = Query, Error = hyper::Error>>;
fn to_future(x: Query) -> FutureQuery {
Box::new(future::ok(x))
}
fn exec_if_needed(client: &HttpsClient, query: Query) -> FutureQuery {
fn exec(client: &HttpsClient, q: Query) -> FutureQuery {
println!("exec: {:?}", q);
let uri = q.uri;
let req = {
let mut req = Request::new(Method::Get, uri.0.parse().unwrap());
req.headers_mut().set(Accept::json());
req
};
Box::new(
client
.request(req)
.inspect(|resp| println!("HTTP {}", resp.status()))
.map(|resp| Query {
uri: uri,
response: Some(resp),
}),
)
}
if query_good(&query) {
to_future(query)
} else {
exec(client, query)
}
}
type BoxedFuture<T> = Box<Future<Item = T, Error = hyper::Error>>;
fn do_batch(client: &HttpsClient, queries: Vec<Query>) -> BoxedFuture<Vec<Query>> {
println!("do_batch() {} queries", queries.len());
let exec_if_needed = |q| exec_if_needed(client, q);
let futures = queries.into_iter().map(exec_if_needed);
println!("do_batch() futures {:?}", futures);
Box::new(
stream::futures_ordered(futures).collect(), //future::join_all(futures)
)
}
fn take<T>(right: &mut Vec<T>, suggested_n: usize) -> Vec<T> {
let n: usize = if right.len() < suggested_n {
right.len()
} else {
suggested_n
};
let left = right.drain(0..n);
return left.collect();
}
type BoxedResponses = Box<Vec<Response<Body>>>;
fn batched_throttle(uris: Vec<Uri>) -> BoxedResponses {
println!("batched_throttle({} uris)", uris.len());
let mut core = Core::new().unwrap();
let client = Client::configure()
.connector(HttpsConnector::new(4, &core.handle()).unwrap())
.build(&core.handle());
let mut rate_limit_info = RateLimitInfo::default();
let mut queries_right: Vec<Query> = uris.into_iter().map(Query::from_uri).collect();
loop {
let mut queries_left: Vec<Query> = Vec::with_capacity(queries_right.len());
println!("batched_throttle: starting inner loop");
loop {
// throttle program during testing
thread::sleep(Duration::from_millis(800));
println!("batched_throttle: {:?}", rate_limit_info);
if let Some(retry_after) = rate_limit_info.retry_after {
println!("batched_throttle: retrying after {:?}", retry_after);
thread::sleep(retry_after)
}
if queries_right.is_empty() {
break;
}
let mut queries_mid = {
let ri_count = rate_limit_info.remaining_reqs;
let iter_req_count = if ri_count == 0 { 1 } else { ri_count };
println!("batched_throttle: iter_req_count {}", iter_req_count);
take(&mut queries_right, iter_req_count as usize)
};
println!(
"batched_throttle: \
queries_right.len() {}, \
queries_left.len() {}, \
queries_mid.len() {})",
queries_right.len(),
queries_left.len(),
queries_mid.len()
);
if queries_mid.iter().all(query_good) {
println!("batched_throttle: queries_mid.iter().all(query_good)");
continue;
}
queries_mid = { core.run(do_batch(&client, queries_mid)).unwrap() };
rate_limit_info = {
let create_very_old_response =
|| Response::new().with_header(DateHeader(UNIX_EPOCH.into()));
let very_old_response = create_very_old_response();
let last_resp = queries_mid
.iter()
.map(|q| match &q.response {
Some(r) => r,
_ => panic!("Impossible"),
})
.fold(&very_old_response, resp_dated_later);
RateLimitInfo::from(&last_resp)
};
&queries_left.append(&mut queries_mid);
}
queries_right = queries_left;
if queries_right.iter().all(query_good) {
break;
}
}
println!(
"batched_throttle: finishing. queries_right.len() {}",
queries_right.len()
);
Box::new(
queries_right
.into_iter()
.map(|q| q.response.unwrap())
.collect(),
)
}
fn bucket_count_to_req_count(bucket_count: u32) -> u32 {
let needed_req_count = (bucket_count as f32 / MAX_REQ_SIZE as f32).ceil() as u32;
return needed_req_count;
}
type BoxedBuckets = Box<Vec<Bucket>>;
fn response_to_buckets(response: Response<Body>) -> BoxedFuture<Vec<Bucket>> {
Box::new(response.body().concat2().map(|body: Chunk| -> Vec<Bucket> {
println!("body.len(): {}", body.len());
println!("JSON: {}", str::from_utf8(&body).unwrap());
serde_json::from_slice(&body).unwrap()
}))
}
pub fn get_n_last(symbol: &str, bucket_count: u32) -> BoxedBuckets {
let req_count = bucket_count_to_req_count(bucket_count);
let uris = (0..req_count)
.map(|page_ix| make_uri(symbol, page_ix))
.collect();
let responses = batched_throttle(uris);
let mut core = Core::new().unwrap();
let boxed_buckets = {
let futures = responses.into_iter().map(response_to_buckets);
let future = stream::futures_ordered(futures).collect();
let groups_of_buckets = core.run(future).unwrap();
Box::new(
groups_of_buckets
.into_iter()
.flat_map(|bs| bs.into_iter())
.rev()
.collect(),
)
};
return boxed_buckets;
}
You first create a Core and start lots of requests and gather the Response "results".
After you got all the Responses you start a new Core and try to start reading the data from those Responses - but the server probably closed them long ago due to write timeouts, and you only get partial data.
You shouldn't keep the server waiting; start reading the Responses as soon as possible.

Resources