How to create a TOML file from Rust? - rust

I have collected all my data into a vector and I need to create a TOML file with that data. I have managed to create and open a file:
let mut file = try!(File::create("servers.toml"));
My vector<(string,(string, u32))> contains the following data, which should look like this in TOML.
[server.A]
Ipaddr="192.168.4.1"
Port no=4476
[server.B]
......
I have a lot of data which needs to be written in TOML and I know TOML is a text file. How is encoder used for?

This uses the TOML crate for the structure and serialization. The main benefit is that values should be properly escaped.
use std::fs;
use toml::{map::Map, Value}; // 0.5.1
fn to_toml(v: Vec<(String, (String, u32))>) -> Value {
let mut servers = Map::new();
for (name, (ip_addr, port)) in v {
let mut server = Map::new();
server.insert("Ipaddr".into(), Value::String(ip_addr));
server.insert("Port no".into(), Value::Integer(port as i64));
servers.insert(name, Value::Table(server));
}
let mut map = Map::new();
map.insert("server".into(), Value::Table(servers));
Value::Table(map)
}
fn main() {
let v = vec![
("A".into(), ("192.168.4.1".into(), 4476)),
("B".into(), ("192.168.4.8".into(), 1234)),
];
let toml_string = toml::to_string(&to_toml(v)).expect("Could not encode TOML value");
println!("{}", toml_string);
fs::write("servers.toml", toml_string).expect("Could not write to file!");
}
You can also use this with Serde's automatic serialization and deserialization to avoid dealing with the low-level details:
use serde::Serialize; // 1.0.91
use std::{collections::BTreeMap, fs};
use toml; // 0.5.1
#[derive(Debug, Default, Serialize)]
struct Servers<'a> {
servers: BTreeMap<&'a str, Server<'a>>,
}
#[derive(Debug, Serialize)]
struct Server<'a> {
#[serde(rename = "Ipaddr")]
ip_addr: &'a str,
#[serde(rename = "Port no")]
port_no: i64,
}
fn main() {
let mut file = Servers::default();
file.servers.insert(
"A",
Server {
ip_addr: "192.168.4.1",
port_no: 4476,
},
);
file.servers.insert(
"B",
Server {
ip_addr: "192.168.4.8",
port_no: 1234,
},
);
let toml_string = toml::to_string(&file).expect("Could not encode TOML value");
println!("{}", toml_string);
fs::write("servers.toml", toml_string).expect("Could not write to file!");
}

Related

How can I get/store span duration with Rust tracing?

I want to capture the duration of execution of a span in rust tracing and send that as metric.
I have found that fmt() helps in printing that as mentioned here:How can I log span duration with Rust tracing?
I have also tried this example about creating layer and implementing on_new_span() and on_event(). I added on_close() as well to check what metadata do we get here. The code for that I wrote is:
use tracing::{info, info_span};
use tracing_subscriber::prelude::*;
mod custom_layer;
use custom_layer::CustomLayer;
fn main() {
tracing_subscriber::registry()
.with(CustomLayer)
.init();
let outer_span = info_span!("Outer", level = 0, other_field = tracing::field::Empty);
let _outer_entered = outer_span.enter();
outer_span.record("other_field", &7);
let inner_span = info_span!("inner", level = 1);
let _inner_entered = inner_span.enter();
info!(a_bool = true, answer = 42, message = "first example");
}
custom_layer.rs:
use std::collections::BTreeMap;
use tracing_subscriber::Layer;
pub struct CustomLayer;
impl<S> Layer<S> for CustomLayer
where
S: tracing::Subscriber,
S: for<'lookup> tracing_subscriber::registry::LookupSpan<'lookup>,
{
fn on_new_span(
&self,
attrs: &tracing::span::Attributes<'_>,
id: &tracing::span::Id,
ctx: tracing_subscriber::layer::Context<'_, S>,
) {
let span = ctx.span(id).unwrap();
let mut fields = BTreeMap::new();
let mut visitor = JsonVisitor(&mut fields);
attrs.record(&mut visitor);
let storage = CustomFieldStorage(fields);
let mut extensions = span.extensions_mut();
extensions.insert(storage);
}
fn on_record(
&self,
id: &tracing::span::Id,
values: &tracing::span::Record<'_>,
ctx: tracing_subscriber::layer::Context<'_, S>,
) {
// Get the span whose data is being recorded
let span = ctx.span(id).unwrap();
// Get a mutable reference to the data we created in new_span
let mut extensions_mut = span.extensions_mut();
let custom_field_storage: &mut CustomFieldStorage =
extensions_mut.get_mut::<CustomFieldStorage>().unwrap();
let json_data: &mut BTreeMap<String, serde_json::Value> = &mut custom_field_storage.0;
// And add to using our old friend the visitor!
let mut visitor = JsonVisitor(json_data);
values.record(&mut visitor);
}
fn on_event(&self, event: &tracing::Event<'_>, ctx: tracing_subscriber::layer::Context<'_, S>) {
// All of the span context
let scope = ctx.event_scope(event).unwrap();
let mut spans = vec![];
for span in scope.from_root() {
let extensions = span.extensions();
let storage = extensions.get::<CustomFieldStorage>().unwrap();
let field_data: &BTreeMap<String, serde_json::Value> = &storage.0;
spans.push(serde_json::json!({
"target": span.metadata().target(),
"name": span.name(),
"level": format!("{:?}", span.metadata().level()),
"fields": field_data,
}));
}
// The fields of the event
let mut fields = BTreeMap::new();
let mut visitor = JsonVisitor(&mut fields);
event.record(&mut visitor);
// And create our output
let output = serde_json::json!({
"target": event.metadata().target(),
"name": event.metadata().name(),
"level": format!("{:?}", event.metadata().level()),
"fields": fields,
"spans": spans,
});
println!("{}", serde_json::to_string_pretty(&output).unwrap());
}
fn on_close(
&self,
id: tracing::span::Id,
ctx: tracing_subscriber::layer::Context<'_, S>,
) {
// Get the span whose data is being recorded
let span = ctx.span(&id).unwrap();
let output = serde_json::json!({
"target": span.metadata().target(),
"name": span.name(),
"level": format!("{:?}", span.metadata().level()),
"fields": format!("{:?}", span.metadata().fields()),
});
println!("On_close{}", serde_json::to_string_pretty(&output).unwrap());
}
}
struct JsonVisitor<'a>(&'a mut BTreeMap<String, serde_json::Value>);
impl<'a> tracing::field::Visit for JsonVisitor<'a> {
fn record_f64(&mut self, field: &tracing::field::Field, value: f64) {
self.0
.insert(field.name().to_string(), serde_json::json!(value));
}
fn record_i64(&mut self, field: &tracing::field::Field, value: i64) {
self.0
.insert(field.name().to_string(), serde_json::json!(value));
}
fn record_u64(&mut self, field: &tracing::field::Field, value: u64) {
self.0
.insert(field.name().to_string(), serde_json::json!(value));
}
fn record_bool(&mut self, field: &tracing::field::Field, value: bool) {
self.0
.insert(field.name().to_string(), serde_json::json!(value));
}
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
self.0
.insert(field.name().to_string(), serde_json::json!(value));
}
fn record_error(
&mut self,
field: &tracing::field::Field,
value: &(dyn std::error::Error + 'static),
) {
self.0.insert(
field.name().to_string(),
serde_json::json!(value.to_string()),
);
}
fn record_debug(&mut self, field: &tracing::field::Field, value: &dyn std::fmt::Debug) {
self.0.insert(
field.name().to_string(),
serde_json::json!(format!("{:?}", value)),
);
}
}
#[derive(Debug)]
struct CustomFieldStorage(BTreeMap<String, serde_json::Value>);
Cargo.toml
[package]
name = "tracing-custom-logging"
version = "0.1.0"
edition = "2021"
[dependencies]
serde_json = "1"
tracing = "0.1"
tracing-subscriber = "0.3.16"
snafu = "0.7.3"
thiserror = "1.0.31"
tracing-opentelemetry = "0.18.0"
Unfortunately I have not been able to get the data about duration of a span anywhere. Can you guys help me identify how/where can I get it from?
You cannot "get" the span duration from the tracing crate because it doesn't store it. It only stores the basic metadata and allows for hooking into framework events in a lightweight way. It is the job of the Subscriber to keep track of any additional data.
You could use the tracing-timing crate if you only need periodic histograms. Otherwise, you can't really use data from an existing layer which may already store timing data, because they often don't expose it. You'll have to keep track of it yourself.
Using the tracing-subscriber crate, you can create a Layer and store additional data using the Registry. Here's an example of how that can be done:
use std::time::Instant;
use tracing::span::{Attributes, Id};
use tracing::Subscriber;
use tracing_subscriber::layer::{Context, Layer};
use tracing_subscriber::registry::LookupSpan;
struct Timing {
started_at: Instant,
}
pub struct CustomLayer;
impl<S> Layer<S> for CustomLayer
where
S: Subscriber,
S: for<'lookup> LookupSpan<'lookup>,
{
fn on_new_span(&self, _attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) {
let span = ctx.span(id).unwrap();
span.extensions_mut().insert(Timing {
started_at: Instant::now(),
});
}
fn on_close(&self, id: Id, ctx: Context<'_, S>) {
let span = ctx.span(&id).unwrap();
let started_at = span.extensions().get::<Timing>().unwrap().started_at;
println!(
"span {} took {}",
span.metadata().name(),
(Instant::now() - started_at).as_micros(),
);
}
}
This just prints out the results where they are calculated, but you can emit the results elsewhere, or store it in some shared resource as you see fit.
Some example usage:
use std::time::Duration;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
#[tracing::instrument]
fn test(n: u64) {
std::thread::sleep(Duration::from_secs(n));
}
fn main() {
tracing_subscriber::registry::Registry::default()
.with(CustomLayer)
.init();
test(1);
test(2);
test(3);
}
span test took 1000081
span test took 2000106
span test took 3000127
You may also need to be aware of on_enter() and on_exit(), which are relevant when using async functions because their execution may be suspended and resumed later, and you can use those functions to be notified when that happens. Depending on what you're looking for, you may need to add filtering so that you only track the spans you're interested in (by name or target or whatever).

Deserializing multiple documents with `serde_yaml`

I am saving in append mode a stream of events on a YAML log file, where each event is represented by an indivual document, like this:
---
type: event
id: 1
---
type: trigger
id: 2
At some point later I want to iterate on these events, parsing each via serde_yaml. To my understanding though, serde_yaml doesn't seem to support parsing multiple documents from a single reader, as none of the available methods mention it, and trying to parse multiple documents at once results in a MoreThanOneDocument error.
use std::io::{self, BufRead};
use serde_yaml;
use serde::{self, Deserialize};
#[derive(Deserialize, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum Message {
Event { id: i32 },
Trigger { id: i32},
}
fn main() -> io::Result<()> {
let yaml = "---\ntype: event\nid: 1\n---\n\ntype: trigger\nid: 2";
let v: Message = serde_yaml::from_reader(yaml.as_bytes()).unwrap();
println!("{:?}", v);
Ok(())
}
I'm totally new to Rust, so maybe I completely missed the point of serde and just did not understand how to do it.
How would you parse such YAML, please?
I cooked up something that looks like a working solution, but I think I'll try to post it among the answers instead, because I don't want to bias other answers too much towards my solution. I kindly encourage you to have a look at it as well however, any feedback is welcome.
The documentation of serde_yaml::Deserializer shows an example very similar to yours. It would work like this:
use serde::Deserialize;
#[derive(Deserialize, Debug)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum Message {
Event { id: i32 },
Trigger { id: i32 },
}
fn main() {
let yaml = "---\ntype: event\nid: 1\n---\ntype: trigger\nid: 2\n";
for document in serde_yaml::Deserializer::from_str(yaml) {
let v = Message::deserialize(document).unwrap();
println!("{:?}", v);
}
}
I really hope to find a native solution by using serde and serde_yaml only, but until then the way I got it working is as follows.
trait BufReaderYamlExt {
fn read_next_yaml(&mut self) -> io::Result<Option<String>>;
}
impl<T: io::Read> BufReaderYamlExt for io::BufReader<T> {
fn read_next_yaml(&mut self) -> io::Result<Option<String>> {
const sep : &str = "\n---\n";
let mut doc = String::with_capacity(200);
while self.read_line(&mut doc)? > 0 {
if doc.len() > sep.len() && doc.ends_with(sep) {
doc.truncate(doc.len() - sep.len());
break;
}
}
if !doc.is_empty() {
doc.shrink_to_fit();
Ok(Some(doc))
} else {
Ok(None)
}
}
}
The trait extends the BufReader with an extra method that returns an optional owned String (or None at the end of the stream) containing just the portion with a single YAML document.
By iterating on it one could then apply serde_json::from_str() to parse the document into a Message struct.
fn main() -> io::Result<()> {
let yaml = "---\ntype: event\nid: 1\n\n---\n\ntype: trigger\nid: 2\n";
let mut r = io::BufReader::new(yaml.as_bytes());
while let Some(next) = r.read_next_yaml()? {
let d: Message = serde_yaml::from_str(&next).unwrap();
println!("parsed: {:?}", d);
}
Ok(())
}
I've made available the full source on the rust playground as well.

How can I put an async function into a Vec in Rust?

I need to put some futures in a Vec for later joining. However if I try to collect it using an iterator, the compiler doesn't seem to be able to determine the type for the vector.
I'm trying to create a command line utility that accepts an arbitrary number of IP addresses, communicates with those remotes and collects the results for printing. The communication function works well, I've cut down the program to show the failure I need to understand.
use futures::future::join_all;
use itertools::Itertools;
use std::net::SocketAddr;
use std::str::from_utf8;
use std::fmt;
#[tokio::main(flavor = "current_thread")]
pub async fn main() -> Result<(), Box<dyn std::error::Error>> {
let socket: Vec<SocketAddr> = vec![
"192.168.20.33:502".parse().unwrap(),
"192.168.20.34:502".parse().unwrap(),];
let async_vec = vec![
MyStruct::get(socket[0]),
MyStruct::get(socket[1]),];
// The above 3 lines happen to work to build a Vec because there are
// 2 sockets. But I need to build a Vec to join_all from an arbitary
// number of addresses. Why doesn't the line below work instead?
//let async_vec = socket.iter().map(|x| MyStruct::get(*x)).collect();
let rt = join_all(async_vec).await;
let results = rt.iter().map(|x| x.as_ref().unwrap().to_string()).join("\n");
let mut rvec: Vec<String> = results.split("\n").map(|x| x.to_string()).collect();
rvec.sort_by(|a, b| a[15..20].cmp(&b[15..20]));
println!("{}", rvec.join("\n"));
Ok(())
}
struct MyStruct {
serial: [u8; 12],
placeholder: String,
}
impl fmt::Display for MyStruct {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let serial = match from_utf8(&self.serial) {
Ok(v) => v,
Err(_) => "(invalid)",
};
let lines = (1..4).map(|x| format!("{}, line{}, {}", serial, x, self.placeholder)).join("\n");
write!(f, "{}", lines)
}
}
impl MyStruct {
pub async fn get(sockaddr: SocketAddr) -> Result<MyStruct, Box<dyn std::error::Error>> {
let char = sockaddr.ip().to_string().chars().last().unwrap();
let rv = MyStruct{serial: [char as u8;12], placeholder: sockaddr.to_string(), };
Ok(rv)
}
}
This line:
let async_vec = socket.iter().map(|x| MyStruct::get(*x)).collect();
doesn't work because the compiler can't know that you want to collect everything into a Vec. You might want to collect into some other container (e.g. a linked list or a set). Therefore you need to tell the compiler the kind of container you want with:
let async_vec = socket.iter().map(|x| MyStruct::get(*x)).collect::<Vec::<_>>();
or:
let async_vec: Vec::<_> = socket.iter().map(|x| MyStruct::get(*x)).collect();

How do I read CSV data without knowing the structure at compile time?

I'm pretty new to Rust and trying to implement some kind of database. Users should create tables by giving a table name, a vector of column names and a vector of column types (realized over an enum). Filling tables should be done by specifying csv files. However, this requires the structure of the table rows to be specified at compile time, like shown in the basic example:
#[derive(Debug, Deserialize, Eq, PartialEq)]
struct Row {
key: u32,
name: String,
comment: String
}
use std::error::Error;
use csv::ReaderBuilder;
use serde::Deserialize;
use std::fs;
fn read_from_file(path: &str) -> Result<(), Box<dyn Error>> {
let data = fs::read_to_string(path).expect("Unable to read file");
let mut rdr = ReaderBuilder::new()
.has_headers(false)
.delimiter(b'|')
.from_reader(data.as_bytes());
let mut iter = rdr.deserialize();
if let Some(result) = iter.next() {
let record:Row = result?;
println!("{:?}", record);
Ok(())
} else {
Err(From::from("expected at least one record but got none"))
}
}
Is there a possibility to use the generic table information instead of the "Row"-struct to cast the results from the deserialization? Is it possible to simply allocate memory according to the combined sizes of the column types and parse the records in? I would do something like this in C...
Is there a possibility to use the generic table information instead of the "Row"-struct to cast the results from the deserialization?
All generics replaced with concrete types at compile time. If you do not know types you will need in runtime, "generics" is not what you need.
Is it possible to simply allocate memory according to the combined sizes of the column types and parse the records in? I would do something like this in C...
I suggest using Box<dyn Any> instead, to be able to store reference of any type and, still, know what type it is.
Maintenance cost for this approach is pretty high. You have to manage each possible value type everywhere you want to use a cell's value. On the other hand, you do not need to parse value each time, just make some type checks in runtime.
I have used std::any::TypeId to identify type, but it can not be used in match expressions. You can consider using custom enum as type identifier.
use std::any::{Any, TypeId};
use std::io::Read;
use csv::Reader;
#[derive(Default)]
struct Table {
name: String,
headers: Vec<(String, TypeId)>,
data: Vec<Vec<Box<dyn Any>>>,
}
impl Table {
fn add_header(&mut self, header: String, _type: TypeId) {
self.headers.push((header, _type));
}
fn populate_data<R: Read>(
&mut self,
rdr: &mut Reader<R>,
) -> Result<(), Box<dyn std::error::Error>> {
for record in rdr.records() {
let record = record?;
let mut row: Vec<Box<dyn Any>> = vec![];
for (&(_, type_id), value) in self.headers.iter().zip(record.iter()) {
if type_id == TypeId::of::<u32>() {
row.push(Box::new(value.parse::<u32>()?));
} else if type_id == TypeId::of::<String>() {
row.push(Box::new(value.to_owned()));
}
}
self.data.push(row);
}
Ok(())
}
}
impl std::fmt::Display for Table {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Table: {}", self.name)?;
for (name, _) in self.headers.iter() {
write!(f, "{}, ", name)?;
}
writeln!(f)?;
for row in self.data.iter() {
for cell in row.iter() {
if let Some(&value) = cell.downcast_ref::<u32>() {
write!(f, "{}, ", value)?;
} else if let Some(value) = cell.downcast_ref::<String>() {
write!(f, "{}, ", value)?;
}
}
writeln!(f)?;
}
Ok(())
}
}
fn main() {
let mut table: Table = Default::default();
table.name = "Foo".to_owned();
table.add_header("key".to_owned(), TypeId::of::<u32>());
table.add_header("name".to_owned(), TypeId::of::<String>());
table.add_header("comment".to_owned(), TypeId::of::<String>());
let data = "\
key,name,comment
1,foo,foo comment
2,bar,bar comment
";
let mut rdr = Reader::from_reader(data.as_bytes());
table.populate_data(&mut rdr).unwrap();
print!("{}", table);
}

How to perform centralized error handling for pipelined method call in Rust?

I want to perform json parsing and save the result to a Hash Set. I want to use iter(), like:
let my_json_list = vec!("{\"name\":\"Tim\", \"age\":18}", "{\"name\":\"Alex\", \"age\":25}")
let mut my_hash_set::HashSet<String> = HashSet::new();
my_hash_set.extend(
my_json_list.iter()
.map(|str| serde_json::from_str(str)) // return Result
.map(|json| json.pointer("/name") // return Option
.when_err_or_none(|err| skip();) // handle Err and None, not extend the map
.map(|key| key.as_str().unwrap().to_owned())
)
Is this possible?
let my_json_list = vec![
"{\"name\":\"Tim\", \"age\":18}",
"{\"name\":\"Alex\", \"age\":25}",
];
let mut my_hash_set: HashSet<String> = HashSet::new();
my_hash_set.extend(
my_json_list
.iter()
.map(|str| serde_json::from_str::<Value>(str))
.filter(|result| result.is_ok())
.map(|result| result.unwrap())
.filter(|json| json["name"].is_string())
.map(|json| json["name"].as_str().unwrap().to_string()),
);
Is this what you want?
I think in this case I would define a dedicated struct Person, and let serde do the heavy lifting:
use serde_json; // 1.0.48
use serde; // 1.0.104
use serde::Deserialize;
use std::collections::HashSet;
#[derive(Deserialize)]
struct Person {
name: String,
age: usize,
}
fn main() {
let my_json_list = vec!(
"{\"name\":\"Tim\", \"age\":18}",
"{\"name\":\"Alex\", \"age\":25}"
);
let my_hash_set : HashSet<String> = my_json_list
.iter()
.filter_map(|str| serde_json::from_str::<Person>(str)
.ok() // convert Result -> Option
.map(|person| person.name) // only interested in name
)
.collect(); // collect names into HashSet
}
https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=607cfe125b5690fb89b23ca904da31cc
If you prefer a more ad-hoc solution without a struct:
use serde_json; // 1.0.48
use std::collections::HashSet;
fn main() {
let my_json_list = vec!(
"{\"name\":\"Tim\", \"age\":18}",
"{\"name\":\"Alex\", \"age\":25}"
);
let my_hash_set : HashSet<String> = my_json_list
.iter()
.filter_map(|str| serde_json::from_str::<serde_json::Value>(str)
.ok() // convert Result -> Option
.and_then(|json_val| json_val.get("name")
.and_then(|name_val| name_val.as_str()) // try to convert to &str
.map(|str| str.to_string()) // convert &str to String
)
)
.collect(); // collect names into HashSet
}
https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=607cfe125b5690fb89b23ca904da31cc

Resources