How can I obtain AMQP message headers values using lapin? - rust

How can I obtain the values in the message headers of an AMQP message via crate lapin (RabbitMQ client)?
I am trying to obtain the values of message headers from the lapin::message::Delivery struct.
I am using Delivery.properties.headers() which returns Option<amq_protocol_types::FieldTable>
How do I read the values in the FieldTable?
Are there any examples that show how to do so?
let mut consumer = channel
.basic_consume(
"hello",
"my_consumer",
BasicConsumeOptions::default(),
FieldTable::default(),
)
.await?;
while let Some(delivery) = consumer.next().await {
let (_, delivery2) = delivery.expect("error in consumer");
message_cnt+=1;
let payload_str:String = match String::from_utf8(delivery2.data.to_owned()) {//delivery.data is of type Vec<u8>
Ok(v) => v,
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
};
let log_message:String=format!("message_cnt is:{}, delivery_tag is:{}, exchange is:{}, routing_key is:{}, redelivered is:{}, properties is:'{:?}', received data is:'{:?}'"
,&message_cnt
,&delivery2.delivery_tag
,&delivery2.exchange
,&delivery2.routing_key
,&delivery2.redelivered
,&delivery2.properties//lapin::BasicProperties Contains the properties and the headers of the message.
,&payload_str
);
let amqp_msg_headers_option:&Option<amq_protocol_types::FieldTable>=delivery2.properties.headers();
let amqp_msg_headers:&amq_protocol_types::FieldTable=match amqp_msg_headers_option{
None=>{
let bt=backtrace::Backtrace::new();
let log_message=format!(">>>>>At receive_message_from_amqp(), message received has no headers, backtrace is '{:?}'",&bt);
slog::error!(my_slog_logger,"{}",log_message);
let custom_error=std::io::Error::new(std::io::ErrorKind::Other, &log_message.to_string()[..]);
return std::result::Result::Err(Box::new(custom_error));
}
,Some(amqp_msg_headers)=>{amqp_msg_headers}
};
if amqp_msg_headers.contains_key("worker_id"){
//let worker_id2:String=amqp_msg_headers.get("worker_id").into();
let amqp_msg_headers_btm:&std::collections::BTreeMap<amq_protocol_types::ShortString, lapin::types::AMQPValue>=amqp_msg_headers.inner();
let worker_id2_option=amqp_msg_headers_btm.get(lapin::types::AMQPValue::ShortString("worker_id".into()));
}
delivery2
.ack(BasicAckOptions::default())
.await?;
}

Well I found a solution of sorts. This entails the use of serde_json. I made use of serde_json to obtain serde_json::Value object from the object of amq_protocol_types::FieldTable obtained from the properties field of lapin::message::Delivery.
I then processed the serde_json::Value to obtain the header key and value.
Below is the function I have written for the above logic.
fn extract_amqp_msg_headers_values(
my_slog_logger:&slog::Logger
,amqp_msg_headers_basic_properties:&lapin::BasicProperties
)->std::result::Result<std::collections::HashMap<String,String>, Box<std::io::Error>> {
let mut amqp_msg_headers_hm:std::collections::HashMap<String,String>=std::collections::HashMap::new();
let amqp_msg_headers_option=amqp_msg_headers_basic_properties.headers();
let amqp_msg_headers=match amqp_msg_headers_option{
None=>{
let bt=backtrace::Backtrace::new();
let log_message=format!(">>>>>At extract_amqp_msg_headers_values(), message received has no headers, backtrace is '{:?}'",&bt);
slog::error!(my_slog_logger,"{}",log_message);
let custom_error=std::io::Error::new(std::io::ErrorKind::Other, &log_message.to_string()[..]);
return std::result::Result::Err(Box::new(custom_error));
}
,Some(amqp_msg_headers)=>{amqp_msg_headers}
};
//let mut worker_id2:String="".to_owned();
let amqp_msg_headers_btm:&std::collections::BTreeMap<amq_protocol_types::ShortString, lapin::types::AMQPValue>=amqp_msg_headers.inner();
let amqp_msg_headers_serde_value_option=serde_json::to_value(&amqp_msg_headers_btm);
let amqp_msg_headers_serde_value:serde_json::value::Value=match amqp_msg_headers_serde_value_option{
Err(err)=>{
let bt=backtrace::Backtrace::new();
let log_message=format!(">>>>>At extract_amqp_msg_headers_values(), pos 2b, some error has been encountered transforming amqp_msg_headers to json, amqp_msg_headers is:'{:?}', error is:'{:?}', backtrace is '{:?}'",&amqp_msg_headers,&err,&bt);
slog::error!(my_slog_logger,"{}",log_message);
let custom_error=std::io::Error::new(std::io::ErrorKind::Other, &log_message.to_string()[..]);
return std::result::Result::Err(Box::new(custom_error));
}
,Ok(serde_json_value)=>{serde_json_value}
};
let serde_json_map:&serde_json::Map<String,serde_json::Value>=amqp_msg_headers_serde_value.as_object().unwrap();
let amqp_msg_headers_serde_value_key_vec2:Vec<String>=serde_json_map.keys().cloned().collect();
for amqp_msg_headers_serde_value_key2 in amqp_msg_headers_serde_value_key_vec2{
let amqp_msg_headers_serde_value2:&serde_json::value::Value=&serde_json_map[&amqp_msg_headers_serde_value_key2];
let serde_json_map3:&serde_json::Map<String,serde_json::Value>=amqp_msg_headers_serde_value2.as_object().unwrap();
let some_header_key:String=remove_quotes(&amqp_msg_headers_serde_value_key2).to_owned();
let amqp_msg_headers_serde_value_key_vec3:Vec<String>=serde_json_map3.keys().cloned().collect();
for amqp_msg_headers_serde_value_key3 in amqp_msg_headers_serde_value_key_vec3{
let some_header_value:String=remove_quotes(&serde_json_map3[&amqp_msg_headers_serde_value_key3].to_string()).to_owned();
amqp_msg_headers_hm.insert(some_header_key.to_owned(),some_header_value.to_owned());
}
}
//slog::info!(my_slog_logger," amqp_msg_headers_hm is:'{:?}'",&amqp_msg_headers_hm);
Ok(amqp_msg_headers_hm)
}
/// This is a general purpose function to strip double quotes surrounding the supplied &str value.
pub fn remove_quotes(some_str:&str)->&str
{
// The trimmed string is a slice to the original string, hence no new
// allocation is performed
let chars_to_trim: &[char] = &['"',' ', ','];
let some_str: &str = some_str.trim_matches(chars_to_trim);
//println!("some_str is:'{}'", some_str);
return some_str;
}

Related

Rust with Datafusion - Trying to Write DataFrame to Json

*Repo w/ WIP code: https://github.com/jmelm93/rust-datafusion-csv-processing
Started programming with Rust 2 days ago, and have been trying to resolve this since ~3 hours into trying out Rust...
Any help would be appreciated.
My goal is to write a Dataframe from Datafusion to JSON (which will eventually be used to respond to HTTP requests in an API with the JSON string).
The DataFrame turns into an "datafusion::arrow::record_batch::RecordBatch" when you collect the data, and this data type is what I'm having trouble converting.
I've tried -
Using json::writer::record_batches_to_json_rows from Arrow, but it won't let me due to "struct datafusion::arrow::record_batch::RecordBatch and struct arrow::record_batch::RecordBatch have similar names, but are actually distinct types". Haven't been able to successfully convert the types to avoid this.
I tried during the Record Batch into a vec and pull out the headers and the values individually. I was able to get the headers out, but haven't had success with the values.
let mut header = Vec::new();
// let mut rows = Vec::new();
for record_batch in data_vec {
// get data
println!("record_batch.columns: : {:?}", record_batch.columns());
for col in record_batch.columns() {
for row in 0..col.len() {
// println!("Cow: {:?}", col);
// println!("Row: {:?}", row);
// let value = col.as_any().downcast_ref::<StringArray>().unwrap().value(row);
// rows.push(value);
}
}
// get headers
for field in record_batch.schema().fields() {
header.push(field.name().to_string());
}
};
Anyone know how to accomplish this?
The full script is below:
// datafusion examples: https://github.com/apache/arrow-datafusion/tree/master/datafusion-examples/examples
// datafusion docs: https://arrow.apache.org/datafusion/
use datafusion::prelude::*;
use datafusion::arrow::datatypes::{Schema};
use arrow::json;
// use serde::{ Deserialize };
use serde_json::to_string;
use std::sync::Arc;
use std::str;
use std::fs;
use std::ops::Deref;
type DFResult = Result<Arc<DataFrame>, datafusion::error::DataFusionError>;
struct FinalObject {
schema: Schema,
// columns: Vec<Column>,
num_rows: usize,
num_columns: usize,
}
// to allow debug logging for FinalObject
impl std::fmt::Debug for FinalObject {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// write!(f, "FinalObject {{ schema: {:?}, columns: {:?}, num_rows: {:?}, num_columns: {:?} }}",
write!(f, "FinalObject {{ schema: {:?}, num_rows: {:?}, num_columns: {:?} }}",
// self.schema, self.columns, self.num_columns, self.num_rows)
self.schema, self.num_columns, self.num_rows)
}
}
fn create_or_delete_csv_file(path: String, content: Option<String>, operation: &str) {
match operation {
"create" => {
match content {
Some(c) => fs::write(path, c.as_bytes()).expect("Problem with writing file!"),
None => println!("The content is None, no file will be created"),
}
}
"delete" => {
// Delete the csv file
fs::remove_file(path).expect("Problem with deleting file!");
}
_ => println!("Invalid operation"),
}
}
async fn read_csv_file_with_inferred_schema(file_name_string: String) -> DFResult {
// create string csv data
let csv_data_string = "heading,value\nbasic,1\ncsv,2\nhere,3".to_string();
// Create a temporary file
create_or_delete_csv_file(file_name_string.clone(), Some(csv_data_string), "create");
// Create a session context
let ctx = SessionContext::new();
// Register a lazy DataFrame using the context
let df = ctx.read_csv(file_name_string.clone(), CsvReadOptions::default()).await.expect("An error occurred while reading the CSV string");
// return the dataframe
Ok(Arc::new(df))
}
#[tokio::main]
async fn main() {
let file_name_string = "temp_file.csv".to_string();
let arc_csv_df = read_csv_file_with_inferred_schema(file_name_string.clone()).await.expect("An error occurred while reading the CSV string (funct: read_csv_file_with_inferred_schema)");
// have to use ".clone()" each time I want to use this ref
let deref_df = arc_csv_df.deref();
// print to console
deref_df.clone().show().await.expect("An error occurred while showing the CSV DataFrame");
// collect to vec
let record_batches = deref_df.clone().collect().await.expect("An error occurred while collecting the CSV DataFrame");
// println!("Data: {:?}", data);
// record_batches == <Vec<RecordBatch>>. Convert to RecordBatch
let record_batch = record_batches[0].clone();
// let json_string = to_string(&record_batch).unwrap();
// let mut writer = datafusion::json::writer::RecordBatchJsonWriter::new(vec![]);
// writer.write(&record_batch).unwrap();
// let json_rows = writer.finish();
let json_rows = json::writer::record_batches_to_json_rows(&[record_batch]);
println!("JSON: {:?}", json_rows);
// get final values from recordbatch
// https://docs.rs/arrow/latest/arrow/record_batch/struct.RecordBatch.html
// https://users.rust-lang.org/t/how-to-use-recordbatch-in-arrow-when-using-datafusion/70057/2
// https://github.com/apache/arrow-rs/blob/6.5.0/arrow/src/util/pretty.rs
// let record_batches_vec = record_batches.to_vec();
let mut header = Vec::new();
// let mut rows = Vec::new();
for record_batch in data_vec {
// get data
println!("record_batch.columns: : {:?}", record_batch.columns());
for col in record_batch.columns() {
for row in 0..col.len() {
// println!("Cow: {:?}", col);
// println!("Row: {:?}", row);
// let value = col.as_any().downcast_ref::<StringArray>().unwrap().value(row);
// rows.push(value);
}
}
// get headers
for field in record_batch.schema().fields() {
header.push(field.name().to_string());
}
};
// println!("Header: {:?}", header);
// Delete temp csv
create_or_delete_csv_file(file_name_string.clone(), None, "delete");
}
I am not sure that Datafusion is the perfect place to convert CSV string into JSON string, however here is a working version of your code:
#[tokio::main]
async fn main() {
let file_name_string = "temp_file.csv".to_string();
let csv_data_string = "heading,value\nbasic,1\ncsv,2\nhere,3".to_string();
// Create a temporary file
create_or_delete_csv_file(file_name_string.clone(), Some(csv_data_string), "create");
// Create a session context
let ctx = SessionContext::new();
// Register the csv file
ctx.register_csv("t1", &file_name_string, CsvReadOptions::new().has_header(false))
.await.unwrap();
let df = ctx.sql("SELECT * FROM t1").await.unwrap();
// collect to vec
let record_batches = df.collect().await.unwrap();
// get json rows
let json_rows = datafusion::arrow::json::writer::record_batches_to_json_rows(&record_batches[..]).unwrap();
println!("JSON: {:?}", json_rows);
// Delete temp csv
create_or_delete_csv_file(file_name_string.clone(), None, "delete");
}
If you encounter arrow and datafusion struct conflicts, use datafusion::arrow instead of just the arrow library.

Split value on array rust

I need to split value of array like
let v: Vec<&str> = token.split("Bearer ").collect();
and log the value of the token the value is Bearer eyJGciOiJSU and I need to get only eyJGciOiJSU
when I print the v[1] to the log I got error index out of range, any idea?
Hard to assess without a minimal example, but this works on the playground:
fn main() {
let token = "Bearer eyJGciOiJSU";
let v: Vec<_> = token.split("Bearer ").collect();
println!("{}", v[1]); // prints "eyJGciOiJSU"
let id = token.strip_prefix("Bearer ").expect(r#"Token ought to start with "Bearer""#);
println!("{}", id); // prints "eyJGciOiJSU"
}

How can I use threads to run this code simultaneously in rust?

I have a rust program that creates temporary email addresses using the mail.tm API, and I want to use threads to create emails simultaneously, to increase the speed. However, what I have tried, only results in printing "Getting email.." x amount of times, and exiting. I am unsure what to do about this. Any help or suggestions are appreciated.
use json;
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use reqwest;
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, CONTENT_TYPE};
use std::{collections::HashMap, io, iter, vec::Vec};
use std::thread;
fn gen_address() -> Vec<String> {
let mut rng = thread_rng();
let address: String = iter::repeat(())
.map(|()| rng.sample(Alphanumeric))
.map(char::from)
.take(10)
.collect();
let password: String = iter::repeat(())
.map(|()| rng.sample(Alphanumeric))
.map(char::from)
.take(5)
.collect();
let body = reqwest::blocking::get("https://api.mail.tm/domains")
.unwrap()
.text()
.unwrap();
let domains = json::parse(&body).expect("Failed to parse domain json.");
let domain = domains["hydra:member"][0]["domain"].to_string();
let email = format!("{}#{}", &address, &domain);
vec![email, password]
}
fn gen_email() -> Vec<String> {
let client = reqwest::blocking::Client::new();
let address_info = gen_address();
let address = &address_info[0];
let password = &address_info[1];
let mut data = HashMap::new();
data.insert("address", &address);
data.insert("password", &password);
let mut headers = HeaderMap::new();
headers.insert(ACCEPT, HeaderValue::from_static("application/ld+json"));
headers.insert(
CONTENT_TYPE,
HeaderValue::from_static("application/ld+json"),
);
let res = client
.post("https://api.mail.tm/accounts")
.headers(headers)
.json(&data)
.send()
.unwrap();
vec![
res.status().to_string(),
address.to_string(),
password.to_string(),
]
}
fn main() {
fn get_amount() -> i32 {
let mut amount = String::new();
loop {
println!("How many emails do you want?");
io::stdin()
.read_line(&mut amount)
.expect("Failed to read line.");
let _amount: i32 = match amount.trim().parse() {
Ok(num) => return num,
Err(_) => {
println!("Please enter a number.");
continue;
}
};
}
}
let amount = get_amount();
let handle = thread::spawn(move || {
for _gen in 0..amount {
let handle = thread::spawn(|| {
println!("Getting email...");
let maildata = gen_email();
println!(
"Status: {}, Address: {}, Password: {}",
maildata[0], maildata[1], maildata[2]);
});
}
});
handle.join().unwrap();
}
Rust Playground example
I see a number of sub-threads being spawned from an outer thread. I think you might want to keep those handles and join them. Unless you join those sub threads the outer thread will exit early. I set up a Rust Playground to demonstrate ^^.
In the playground example, first run the code as-is and note the output of the code - the function it's running is not_joining_subthreads(). Note that it terminates rather abruptly. Then modify the code to call joining_subthreads(). You should then see the subthreads printing out their stdout messages.
let handle = thread::spawn(move || {
let mut handles = vec![];
for _gen in 0..amount {
let handle = thread::spawn(|| {
println!("Getting email...");
let maildata = gen_email();
println!(
"Status: {}, Address: {}, Password: {}",
maildata[0], maildata[1], maildata[2]);
});
handles.push(handle);
}
handles.into_iter().for_each(|h| h.join().unwrap());
});
handle.join().unwrap();

Parallel collection for unique string counting

I need some highly parallelized collection something like java concurrent skiplist. The general task: I am working on a server for counting unique words in all messages the server gets. I don't care what the words are only the count. Once in a while, I get a get_count message I reset the counter and start all over.
But I am always bottlenecked on the post_words function. The same thing in java runs 5s in rust 80s. I have tried the experimental skiplist set from the crossbeam. I got the same result. The other issue is string allocation. Any ideas?
//Dashset from https://docs.rs/dashmap/4.0.2/dashmap/struct.DashSet.html
type Words = DashSet<String>;
let set: Arc<Words> = Arc::new(DashSet::with_capacity(100000));
// for each new socket I create
let set = set.clone();
//Word processing
fn post_words(client: i32, data: Vec<u8>, db: &Words) -> Response {
let mut decoder = GzDecoder::new(data.as_slice());
let mut input = String::new();
decoder.read_to_string(&mut input).unwrap();
//The bottleneck
for word in input.split_whitespace() {
db.insert(String::from(word));
}
let mut response = Response::new();
response.status = Response_Status::OK;
return response
}
Since I don't need to know the words a can only keep a hash. Now I don't have to allocate string for each word. This improved my solution from 80s to 1.7s
type Words = DashSet<u64>;
async fn post_words(client: i32, data: Vec<u8>, db: &Words) -> Response {
let mut decoder = GzDecoder::new(data.as_slice());
let mut input = String::new();
decoder.read_to_string(&mut input).unwrap();
for word in input.split_whitespace() {
let mut s = DefaultHasher::new();
word.hash(&mut s);
db.insert( s.finish());
}
let mut response = Response::new();
response.status = Response_Status::OK;
return response
}

rust - Send data from function argument (from a thread) into mpsc channel

I'm trying to handle TcpStream using threads in a Rust program (a tcp server). I want to keep tracks of currents clients connections in a HashMap using an Arc<Mutex<HashMap>>.
When a thread is finished, to remove the connection from the HashMap, I thought about using an uuid, passed to the function used to handle the stream. Like this it could be send inside a mpsc channel to the main thread and then remove the connection from the HashMap.
The code from the main thread :
let clients_streams = Arc::new(Mutex::new(HashMap::new()));
// Server thread. Create channel
let (server_sender, server_receiver) = mpsc::channel();
loop {
for stream in server_socket.incoming() {
let clients_streams = Arc::clone(&clients_streams);
let server_sender = server_sender.clone();
let mut cs = clients_streams.lock().unwrap();
match stream {
Ok(stream) => {
let mut cs = clients_streams.lock().unwrap();
let uuid = Uuid::new_v4().to_string();
cs.insert(uuid.clone(), stream.try_clone());
thread::spawn(move || {
handle_client(stream, server_sender.clone(), uuid.clone());
})
}
Err(err) => thread::spawn(move || {
println!("Connection failed : {}", err);
}),
};
}
The handle_client function :
fn handle_client<'a>(stream: TcpStream, sender: mpsc::Sender<&'a [u8]>, uuid: String) {
let mut reader = BufReader::new(&stream);
let mut writer = BufWriter::new(&stream);
writer.write(b"Hello\n").unwrap();
writer.flush().unwrap();
// Read from client
loop {
let mut resp = Vec::new();
let read_bytes = reader.read_until(b'\n', &mut resp);
match read_bytes {
Ok(read_bytes) => {
if read_bytes == 0 {
let msg = format!("end of {}", uuid);
println!("connection closed by remote");
sender.send(msg.as_bytes()).unwrap();
break;
};
}
Err(err) => match err.kind() {
io::ErrorKind::Interrupted => continue,
_ => break,
},
}
}
}
This code doesn't compile, it shows this error but I don't understand how to resolve it. The variable msg doesn't live long enough, because of the uuid inside of it, but why ? I had to add the lifetime because I'm sending uuid inside the channel.
fn handle_client<'a>(stream: TcpStream, sender: mpsc::Sender<&'a [u8]>, uuid: String) {
-- lifetime `'a` defined here
sender.send(msg.as_bytes()).unwrap();
------------^^^------------
| |
| borrowed value does not live long enough
argument requires that `msg` is borrowed for `'a`
break;
};
- `msg` dropped here while still borrowed
Uuid does not affect msg because format macro creates a clean new String. You are trying to send a &[u8] (this is what as_bytes() return). You need to find a way to remove this reference. (maybe this something like How to convert from &[u8] to Vec<u8>?) You can also share references with Rc.
PS: this more a comment than an answer but I can't post answers

Resources