Caching sqlx Pool causes overrun in file handles

Caching sqlx Pool causes overrun in file handles - rust

I plan to have a application which uses Sqlite databases as data files.
Because different files can be opended more often, I want to cache the connections.
I'm very new to Rust; this is my first project...
My problem is: Somewhen I run out of file handles, and I cannot create new database files.
What I tried so far:
test1(), will only work, if I implement Drop for MyPool. Drop will close the connection-pool. By doing this, I'm sure the file handles gets free again.
test2(), is the async version which I would need for my project (it will be a Rocket app). Here I'm not successful at all.
If you run the code, you would have to delete all db.* files afterwards.
// Cargo.toml
// tokio = { version = "1", features = ["rt-multi-thread", "macros" ] }
// futures = "0.3"
// sqlx = { version = "0.5", features = [ "runtime-tokio-native-tls", "sqlite", "migrate" ] }
use sqlx::{migrate::MigrateDatabase, sqlite::SqlitePoolOptions, Pool, Sqlite};
use futures::executor::block_on;
use std::sync::{Arc, Mutex};
#[derive(Clone)]
struct MyPool(Pool<Sqlite>);
impl Drop for MyPool {
fn drop(&mut self) {
println!("**** drop");
block_on(
self.0.close()
);
}
}
#[tokio::main]
async fn main() {
test1().await;
//test2().await;
}
async fn test1() {
let mut pool: Vec<MyPool> = Vec::new();
for i in 1..1000 {
let db_name = format!("./db.{}.db", i);
Sqlite::create_database(&db_name)
.await.expect(format!("create {} failed", i).as_str());
let conn = SqlitePoolOptions::new()
.max_connections(5)
.connect(&db_name).await.expect(format!("connect {} failed", i).as_str());
if pool.len() == 10 {
println!("Clenup");
pool.clear();
}
println!("{}", i);
pool.push(MyPool(conn));
}
}
async fn test2() {
let pool: Arc<Mutex<Vec<MyPool>>> = Arc::new(Mutex::new(Vec::new()));
let tasks: Vec<_> = (0..1000)
.map(|i| {
let my_pool = pool.clone();
tokio::spawn(async move {
let db_name = format!("./db.{}.db", i);
Sqlite::create_database(&db_name)
.await.expect(format!("create {} failed", i).as_str());
let conn = SqlitePoolOptions::new()
.max_connections(5)
.connect(&db_name).await.expect(format!("connect {} failed", i).as_str());
{
let mut locked_pool = my_pool.lock().expect("locked");
if locked_pool.len() == 10 {
println!("Clenup");
locked_pool.clear();
}
println!("{}", i);
locked_pool.push(MyPool(conn));
}
})
}).collect();
// Wait for all tasks to complete.
futures::future::join_all(tasks).await;
}

Related

How do I simultaneously read messages from multiple Tokio channels in a single task?

I'd like to both read and process messages from two channels and construct another message and send this message via another channel.
Messages from the two channels are received at different frequencies (as per sleep).
Example: "foo1" and "bar1" are received, so we process them and form "foo1bar1". "foo2" is received ("bar2" will be received in 2sec), so we will process it as "foo2bar1". "foo3" is received, so "foo3bar1" is constructed. When "bar2" is received, then we get "foo4bar2" and so on.
In the current implementation, since the two tasks don't communicate with one another, I cannot do the "fooNbarM" construction.
use std::time::Duration;
use tokio;
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tokio::time::sleep;
use futures::future::join_all;
async fn message_sender(msg: &'static str, foo_tx: UnboundedSender<Result<&str, Box<dyn std::error::Error + Send>>>) {
loop {
match foo_tx.send(Ok(msg)) {
Ok(()) => {
if msg == "foo" {
sleep(Duration::from_millis(1000)).await;
} else {
sleep(Duration::from_millis(3000)).await;
}
}
Err(_) => {
println!("failed to send foo");
break;
}
}
}
}
#[tokio::main]
async fn main() {
let result: Vec<&str> = vec![];
let (foo_tx, mut foo_rx): (
UnboundedSender<Result<&str, Box<dyn std::error::Error + Send>>>,
UnboundedReceiver<Result<&str, Box<dyn std::error::Error + Send>>>,
) = tokio::sync::mpsc::unbounded_channel();
let (bar_tx, mut bar_rx): (
UnboundedSender<Result<&str, Box<dyn std::error::Error + Send>>>,
UnboundedReceiver<Result<&str, Box<dyn std::error::Error + Send>>>,
) = tokio::sync::mpsc::unbounded_channel();
let foo_sender_handle = tokio::spawn(async move {
message_sender("foo", foo_tx).await;
});
let foo_handle = tokio::spawn(async move {
while let Some(v) = foo_rx.recv().await {
println!("{:?}", v);
}
});
let bar_sender_handle = tokio::spawn(async move {
message_sender("bar", bar_tx).await;
});
let bar_handle = tokio::spawn(async move {
while let Some(v) = bar_rx.recv().await {
println!("{:?}", v);
}
});
let handles = vec![foo_sender_handle, foo_handle, bar_sender_handle, bar_handle];
join_all(handles.into_iter()).await;
}
Cargo.toml
[package]
name = "play"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
tokio = { version = "1.16.1", features = ["full"] }
futures = "0.3.21"

Use tokio::select to wait for either channel to become ready:
use futures::future; // 0.3.19
use std::time::Duration;
use tokio::{
sync::mpsc::{self, UnboundedSender},
time,
}; // 1.16.1
async fn message_sender(msg: &'static str, foo_tx: UnboundedSender<String>) {
for count in 0.. {
let message = format!("{msg}{count}");
foo_tx.send(message).unwrap();
if msg == "foo" {
time::sleep(Duration::from_millis(100)).await;
} else {
time::sleep(Duration::from_millis(300)).await;
}
}
}
#[tokio::main]
async fn main() {
let (foo_tx, mut foo_rx) = mpsc::unbounded_channel();
let (bar_tx, mut bar_rx) = mpsc::unbounded_channel();
let foo_sender_handle = tokio::spawn(message_sender("foo", foo_tx));
let bar_sender_handle = tokio::spawn(message_sender("bar", bar_tx));
let receive_handle = tokio::spawn(async move {
let mut foo = None;
let mut bar = None;
loop {
tokio::select! {
f = foo_rx.recv() => foo = f,
b = bar_rx.recv() => bar = b,
}
if let (Some(foo), Some(bar)) = (&foo, &bar) {
println!("{foo}{bar}");
}
}
});
future::join_all([foo_sender_handle, bar_sender_handle, receive_handle]).await;
}
You also have to handle the case where only one message has been received yet, so Option comes in useful.

Tokio channel sends, but doesn't receive

TL;DR I'm trying to have a background thread that's ID'd that is controlled via that ID and web calls, and the background threads doesn't seem to be getting the message via all the types of channels I've tried.
I've tried both the std channels as well as tokio's, and of those I've tried all but the watcher type from tokio. All have the same result which probably means that I've messed something up somewhere without realizing it, but I can't find the issue:
use std::collections::{
hash_map::Entry::{Occupied, Vacant},
HashMap,
};
use std::sync::Arc;
use tokio::sync::mpsc::{self, UnboundedSender};
use tokio::sync::RwLock;
use tokio::task::JoinHandle;
use uuid::Uuid;
use warp::{http, Filter};
#[derive(Default)]
pub struct Switcher {
pub handle: Option<JoinHandle<bool>>,
pub pipeline_end_tx: Option<UnboundedSender<String>>,
}
impl Switcher {
pub fn set_sender(&mut self, tx: UnboundedSender<String>) {
self.pipeline_end_tx = Some(tx);
}
pub fn set_handle(&mut self, handle: JoinHandle<bool>) {
self.handle = Some(handle);
}
}
const ADDR: [u8; 4] = [0, 0, 0, 0];
const PORT: u16 = 3000;
type RunningPipelines = Arc<RwLock<HashMap<String, Arc<RwLock<Switcher>>>>>;
#[tokio::main]
async fn main() {
let running_pipelines = Arc::new(RwLock::new(HashMap::<String, Arc<RwLock<Switcher>>>::new()));
let session_create = warp::post()
.and(with_pipelines(running_pipelines.clone()))
.and(warp::path("session"))
.then(|pipelines: RunningPipelines| async move {
println!("session requested OK!");
let id = Uuid::new_v4();
let mut switcher = Switcher::default();
let (tx, mut rx) = mpsc::unbounded_channel::<String>();
switcher.set_sender(tx);
let t = tokio::spawn(async move {
println!("Background going...");
//This would be something processing in the background until it received the end signal
match rx.recv().await {
Some(v) => {
println!(
"Got end message:{} YESSSSSS#!##!!!!!!!!!!!!!!!!1111eleven",
v
);
}
None => println!("Error receiving end signal:"),
}
println!("ABORTING HANDLE");
true
});
let ret = HashMap::from([("session_id", id.to_string())]);
switcher.set_handle(t);
{
pipelines
.write()
.await
.insert(id.to_string(), Arc::new(RwLock::new(switcher)));
}
Ok(warp::reply::json(&ret))
});
let session_end = warp::delete()
.and(with_pipelines(running_pipelines.clone()))
.and(warp::path("session"))
.and(warp::query::<HashMap<String, String>>())
.then(
|pipelines: RunningPipelines, p: HashMap<String, String>| async move {
println!("session end requested OK!: {:?}", p);
match p.get("session_id") {
None => Ok(warp::reply::with_status(
"Please specify session to end",
http::StatusCode::BAD_REQUEST,
)),
Some(id) => {
let mut pipe = pipelines.write().await;
match pipe.entry(String::from(id)) {
Occupied(handle) => {
println!("occupied");
let (k, v) = handle.remove_entry();
drop(pipe);
println!("removed from hashmap, key:{}", k);
let s = v.write().await;
if let Some(h) = &s.handle {
if let Some(tx) = &s.pipeline_end_tx {
match tx.send("goodbye".to_string()) {
Ok(res) => {
println!(
"sent end message|{:?}| to fpipeline: {}",
res, id
);
//Added this to try to get it to at least Error on the other side
drop(tx);
},
Err(err) => println!(
"ERROR sending end message to pipeline({}):{}",
id, err
),
};
} else {
println!("no sender channel found for pipeline: {}", id);
};
h.abort();
} else {
println!(
"no luck finding the value in handle in the switcher: {}",
id
);
};
}
Vacant(_) => {
println!("no luck finding the handle in the pipelines: {}", id)
}
};
Ok(warp::reply::with_status("done", http::StatusCode::OK))
}
}
},
);
let routes = session_create
.or(session_end)
.recover(handle_rejection)
.with(warp::cors().allow_any_origin());
println!("starting server...");
warp::serve(routes).run((ADDR, PORT)).await;
}
async fn handle_rejection(
err: warp::Rejection,
) -> Result<impl warp::Reply, std::convert::Infallible> {
Ok(warp::reply::json(&format!("{:?}", err)))
}
fn with_pipelines(
pipelines: RunningPipelines,
) -> impl Filter<Extract = (RunningPipelines,), Error = std::convert::Infallible> + Clone {
warp::any().map(move || pipelines.clone())
}
depends:
[dependencies]
warp = "0.3"
tokio = { version = "1", features = ["full"] }
uuid = { version = "0.8.2", features = ["serde", "v4"] }
Results when I boot up, send a "create" request, and then an "end" request with the received ID:
starting server...
session requested OK!
Background going...
session end requested OK!: {"session_id": "6b984a45-38d8-41dc-bf95-422f75c5a429"}
occupied
removed from hashmap, key:6b984a45-38d8-41dc-bf95-422f75c5a429
sent end message|()| to fpipeline: 6b984a45-38d8-41dc-bf95-422f75c5a429
You'll notice that the background thread starts (and doesn't end) when the "create" request is made, but when the "end" request is made, while everything appears to complete successfully from the request(web) side, the background thread doesn't ever receive the message. As I've said I've tried all different channel types and moved things around to get it into this configuration... i.e. flattened and thread safetied as much as I could or at least could think of. I'm greener than I would like in rust, so any help would be VERY appreciated!

I think that the issue here is that you are sending the message and then immediately aborting the background task:
tx.send("goodbye".to_string());
//...
h.abort();
And the background task does not have time to process the message, as the abort is of higher priority.
What you need is to join the task, not to abort it.
Curiously, tokio tasks handles do not have a join() method, instead you wait for the handle itself. But for that you need to own the handle, so first you have to extract the handle from the Switcher:
let mut s = v.write().await;
//steal the task handle
if let Some(h) = s.handle.take() {
//...
tx.send("goodbye".to_string());
//...
//join the task
h.await.unwrap();
}
Note that joining a task may fail, in case the task is aborted or panicked. I'm just panicking in the code above, but you may want to do something different.
Or... you could not to wait for the task. In tokio if you drop a task handle, it will be detached. Then, it will finish when it finishes.

How to wait for tokio tasks to finish?

I am trying to write to a HashMap using the Arc<Mutex<T>> pattern as part of a website scraping exercise inspired from the Rust cookbook.
This first part uses tokio runtime. I cannot get past the tasks being completed and returning the HashMap as it just hangs.
type Db = Arc<Mutex<HashMap<String, bool>>>;
pub async fn handle_async_tasks(db: Db) -> BoxResult<HashMap<String, bool>> {
let links = NodeUrl::new("https://www.inverness-courier.co.uk/")
.await
.unwrap();
let arc = db.clone();
let mut handles = Vec::new();
for link in links.links_with_paths {
let x = arc.clone();
handles.push(tokio::spawn(async move {
process(x, link).await;
}));
}
// for handle in handles {
// handle.await.expect("Task panicked!");
// } < I tried this as well>
futures::future::join_all(handles).await;
let readables = arc.lock().await;
for (key, value) in readables.clone().into_iter() {
println!("Checking db: k, v ==>{} / {}", key, value);
}
let clone_db = readables.clone();
return Ok(clone_db);
}
async fn process(db: Db, url: Url) {
let mut db = db.lock().await;
println!("checking {}", url);
if check_link(&url).await.is_ok() {
db.insert(url.to_string(), true);
} else {
db.insert(url.to_string(), false);
}
}
async fn check_link(url: &Url) -> BoxResult<bool> {
let res = reqwest::get(url.as_ref()).await?;
Ok(res.status() != StatusCode::NOT_FOUND)
}
pub struct NodeUrl {
domain: String,
pub links_with_paths: Vec<Url>,
}
#[tokio::main]
async fn main() {
let db: Db = Arc::new(Mutex::new(HashMap::new()));
let db = futures::executor::block_on(task::handle_async_tasks(db));
}
I would like to return the HashMap to main() where the main thread is blocked. How can I wait for all async threaded processes to be complete and return the HashMap?

let links = NodeUrl::new("https://www.some-site.com/.co.uk/").await.unwrap();
This doesn't seem like a valid URL to me.
async fn process(db: Db, url: Url) {
let mut db = db.lock().await;
println!("checking {}", url);
if check_link(&url).await.is_ok() {
db.insert(url.to_string(), true);
} else {
db.insert(url.to_string(), false);
}
}
This is highly problematic. You hold the exclusive lock on the database during the entire request. This makes your application effectively serial.
The default timeout in reqwest is 30 seconds. So if the server isn't responsive and you have a lot of links to go through the program might just seem to 'hang'.
You should only get the database lock for as short as possible - just to do the insert:
async fn process(db: Db, url: Url) {
println!("checking {}", url);
if check_link(&url).await.is_ok() {
let mut db = db.lock().await;
db.insert(url.to_string(), true);
} else {
let mut db = db.lock().await;
db.insert(url.to_string(), false);
}
}
Or even better, eliminating the useless if:
async fn process(db: Db, url: Url) {
println!("checking {}", url);
let valid = check_link(&url).await.is_ok();
db.lock().await.insert(url.to_string(), valid);
}
Finally you didn't show your main function, the way you invoke handle_async_tasks or have other stuff running might be problematic.

My main issue was how to handle the MutexGuard - which I did in the end by using clone and returning the inner value.
There was no need to use an futures::executor in main: since we are within the tokio runtime, calling .await was sufficient to access the final value synchronously.
Cloning the Arc once was enough; I had cloned it twice before passing it into the multi-threaded context.
Thanks to #orlp for pointing out bad logic where it concerned the check_link function.
pub async fn handle_async_tasks() -> BoxResult<HashMap<String, bool>> {
let get_links = NodeUrl::new("https://www.invernesscourier.co.uk/")
.await
.unwrap();
let db: Db = Arc::new(Mutex::new(HashMap::new()));
let mut handles = Vec::new();
for link in get_links.links_with_paths {
let x = db.clone();
handles.push(tokio::spawn(async move {
process(x, link).await;
}));
}
futures::future::join_all(handles).await;
let guard = db.lock().await;
let cloned = guard.clone();
Ok(cloned)
}
#[tokio::main]
async fn main() {
let db = task::handle_async_tasks().await.unwrap();
for (key, value) in db.into_iter() {
println!("Checking db: {} / {}", key, value);
}
}
This is by no means the best Rust code, but I wanted to share how I tackled things in the end.

Waiting for a list of futures in Rust

I am attempting to make a future that continuously finds new work to do and then maintains a set of futures for those work items. I would like to make sure neither my main future that finds work to be blocked for long periods of time and to have my work being done concurrently.
Here is a rough overview of what I am trying to do. Specifically isDone does not exist and also from what I can understand from the docs isn't necessarily a valid way to use futures in Rust. What is the idomatic way of doing this kind of thing?
use std::collections::HashMap;
use tokio::runtime::Runtime;
async fn find_work() -> HashMap<i64, String> {
// Go read from the DB or something...
let mut work = HashMap::new();
work.insert(1, "test".to_string());
work.insert(2, "test".to_string());
return work;
}
async fn do_work(id: i64, value: String) -> () {
// Result<(), Error> {
println!("{}: {}", id, value);
}
async fn async_main() -> () {
let mut pending_work = HashMap::new();
loop {
for (id, value) in find_work().await {
if !pending_work.contains_key(&id) {
let fut = do_work(id, value);
pending_work.insert(id, fut);
}
}
pending_work.retain(|id, fut| {
if isDone(fut) {
// do something with the result
false
} else {
true
}
});
}
}
fn main() {
let runtime = Runtime::new().unwrap();
let exec = runtime.executor();
exec.spawn(async_main());
runtime.shutdown_on_idle();
}

Application on OSX cannot spawn more than 2048 threads

I have a Rust application on on OSX firing up a large amount of threads as can be seen in the code below, however, after looking at how many max threads my version of OSX is allowed to create via the sysctl kern.num_taskthreads command, I can see that it is kern.num_taskthreads: 2048 which explains why I can't spin up over 2048 threads.
How do I go about getting past this hard limit?
let threads = 300000;
let requests = 1;
for _x in 0..threads {
println!("{}", _x);
let request_clone = request.clone();
let handle = thread::spawn(move || {
for _y in 0..requests {
request_clone.lock().unwrap().push((request::Request::new(request::Request::create_request())));
}
});
child_threads.push(handle);
}

Before starting, I'd encourage you to read about the C10K problem. When you get into this scale, there's a lot more things you need to keep in mind.
That being said, I'd suggest looking at mio...
a lightweight IO library for Rust with a focus on adding as little overhead as possible over the OS abstractions.
Specifically, mio provides an event loop, which allows you to handle a large number of connections without spawning threads. Unfortunately, I don't know of a HTTP library that currently supports mio. You could create one and be a hero to the Rust community!

Not sure how helpful this will be, but I was trying to create a small pool of threads that will create connections and then send them over to an event loop via a channel for reading.
I'm sure this code is probably pretty bad, but here it is anyways for examples. It uses the Hyper library, like you mentioned.
extern crate hyper;
use std::io::Read;
use std::thread;
use std::thread::{JoinHandle};
use std::sync::{Arc, Mutex};
use std::sync::mpsc::channel;
use hyper::Client;
use hyper::client::Response;
use hyper::header::Connection;
const TARGET: i32 = 100;
const THREADS: i32 = 10;
struct ResponseWithString {
index: i32,
response: Response,
data: Vec<u8>,
complete: bool
}
fn main() {
// Create a client.
let url: &'static str = "http://www.gooogle.com/";
let mut threads = Vec::<JoinHandle<()>>::with_capacity((TARGET * 2) as usize);
let conn_count = Arc::new(Mutex::new(0));
let (tx, rx) = channel::<ResponseWithString>();
for _ in 0..THREADS {
// Move var references into thread context
let conn_count = conn_count.clone();
let tx = tx.clone();
let t = thread::spawn(move || {
loop {
let idx: i32;
{
// Lock, increment, and release
let mut count = conn_count.lock().unwrap();
*count += 1;
idx = *count;
}
if idx > TARGET {
break;
}
let mut client = Client::new();
// Creating an outgoing request.
println!("Creating connection {}...", idx);
let res = client.get(url) // Get URL...
.header(Connection::close()) // Set headers...
.send().unwrap(); // Fire!
println!("Pushing response {}...", idx);
tx.send(ResponseWithString {
index: idx,
response: res,
data: Vec::<u8>::with_capacity(1024),
complete: false
}).unwrap();
}
});
threads.push(t);
}
let mut responses = Vec::<ResponseWithString>::with_capacity(TARGET as usize);
let mut buf: [u8; 1024] = [0; 1024];
let mut completed_count = 0;
loop {
if completed_count >= TARGET {
break; // No more work!
}
match rx.try_recv() {
Ok(r) => {
println!("Incoming response! {}", r.index);
responses.push(r)
},
_ => { }
}
for r in &mut responses {
if r.complete {
continue;
}
// Read the Response.
let res = &mut r.response;
let data = &mut r.data;
let idx = &r.index;
match res.read(&mut buf) {
Ok(i) => {
if i == 0 {
println!("No more data! {}", idx);
r.complete = true;
completed_count += 1;
}
else {
println!("Got data! {} => {}", idx, i);
for x in 0..i {
data.push(buf[x]);
}
}
}
Err(e) => {
panic!("Oh no! {} {}", idx, e);
}
}
}
}
}

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Caching sqlx Pool causes overrun in file handles - rust

Related

How do I simultaneously read messages from multiple Tokio channels in a single task?

Tokio channel sends, but doesn't receive

How to wait for tokio tasks to finish?

Waiting for a list of futures in Rust

Application on OSX cannot spawn more than 2048 threads

Categories

Resources