Vectorizing a irregular search - rust

I'm trying to vectorize solving the following
\argmax_{x_i\in Z^+, \sum x_i = S} f(x)
Is there a better way of using vectorization than the following? I only get a 2.1x speedup for N=1 vs N=4 and a 2.4x speedup for N=1 vs N=8 on Skylake.
const N: usize = 4;
type F = Simd<f32, N>;
// Hardcoded for i in 0..3 for ease of reading, but can be easily generated with a macro
pub fn iterate(s: usize) -> [f32; 4] {
let mut max = F::splat(f32::NEG_INFINITY);
let mut max0 = F::splat(f32::NEG_INFINITY);
let mut max1 = F::splat(f32::NEG_INFINITY);
let mut max2 = F::splat(f32::NEG_INFINITY);
let mut max3 = F::splat(f32::NEG_INFINITY);
let remaining = s;
let mut idx0 = 0;
while idx0 <= remaining {
let x0 = F::splat(idx0 as f32);
let remaining = remaining - idx0;
let mut idx1 = 0;
while idx1 <= remaining {
let x1 = F::splat(idx1 as f32);
let remaining = remaining - idx1;
let mut i = 0;
while i <= remaining {
let offset = get_misaligned::<N>(remaining - i);
let x2 = offset + F::splat(i as f32);
let x3 = F::splat(remaining as f32) - x2;
{
let fx = f([x0, x1, x2, x3]);
max = max.simd_gt(fx).select(max, fx);
max0 = max.simd_gt(fx).select(max0, x0);
max1 = max.simd_gt(fx).select(max1, x1);
max2 = max.simd_gt(fx).select(max2, x2);
max3 = max.simd_gt(fx).select(max3, x3);
}
i += N;
}
idx1 += 1;
}
idx0 += 1;
}
let (i, _) = max
.to_array()
.iter()
.enumerate()
.reduce(|(i, x), (j, y)| if x > y { (i, x) } else { (j, y) })
.unwrap();
[max0[i], max1[i], max2[i], max3[i]]
}
Godbolt

Related

MoveTo in Crossterm is clearing my output

When using the crossterm library with this code:
fn draw_box(stdout: &mut Stdout, x: u16, y: u16) -> Result<()> {
let size = 5;
let outline = (x..x + size)
.map(|i| (i, y))
.chain((y + 1..y + size - 1).map(|i| (x + size - 1, i)))
.chain((y + 1..y + size - 1).map(|i| (x, i)))
.chain((x..x + size).map(|i| (i, y + size - 1)))
.collect::<Vec<_>>();
for (a, b) in outline {
stdout
.queue(cursor::MoveTo(a, b))?
.queue(style::Print("x"))?;
}
stdout.flush()?;
stdout
.queue(cursor::MoveTo(x + 2, y + 2))?
.queue(style::Print("o"))?;
stdout.flush()?;
Ok(())
}
The last draw command clears the rest of the output to give this:
xxxxx
x x
x o%
If I remove that command I get the full box:
xxxxx
x x
x x
x x
xxxxx%
How can I draw the box and then draw the circle inside of the box?

How to return a value inside for loop in rust?

I want to port this python code
#!/usr/bin/python
# -*- coding: utf-8 -*-
def testing(x, y):
for i in range(y):
x = 2.0 * x
if x > 3.5:
return i
return 999
for i in range(20):
print testing(float(i) / 10, 15)
and its output
999
5
4
3
3
2
2
2
etc.
to rust code. This is the rust code I wrote which is identical to the above python code.
fn testing(x: f32, y: i32) -> i32 {
for i in 0..y {
let x = 2.0 * x;
if x > 3.5 {
return i;
}
}
return 999;
}
fn main() {
for i in 0..20 {
let a = i as f32;
println!("{}", testing(a / 10.0, 15));
}
}
But its output is not the same as the python codes output
999
999
999
999
999
999
999
etc.
What is the right way to return a value in for loop using rust? Why is the my rust code outputting a different output comparing to pythons?
The problem is the line
let x = 2.0 * x;
let introduces a new variable, the original x is not modified.
The next loop iteration will then again multiple 2.0 with the parameter x, not with the variable x from the previous loop's iteration.
You need to instead assign the value to the existing x variable (which requires marking it as mutable):
fn testing(mut x: f32, y: i32) -> i32 {
for i in 0..y {
x = 2.0 * x;
if x > 3.5 {
return i;
}
}
return 999;
}
fn main() {
for i in 0..20 {
let a = i as f32;
println!("{}", testing(a / 10.0, 15));
}
}

My Rust code is much longer than the equivalent Python one, with the wrong result

When converting existing Python code to Rust, the number of lines increased, with many type changes, and worst of all with the wrong result! I'm not sure if I'm doing something wrong or if there is a way to optimize it:
The main function is:
fn main() {
let series = [
30, 21, 29, 31, 40, 48, 53, 47, 37, 39, 31, 29, 17, 9, 20, 24, 27, 35, 41, 38, 27, 31, 27,
26, 21, 13, 21, 18, 33, 35, 40, 36, 22, 24, 21, 20, 17, 14, 17, 19, 26, 29, 40, 31, 20, 24,
18, 26, 17, 9, 17, 21, 28, 32, 46, 33, 23, 28, 22, 27, 18, 8, 17, 21, 31, 34, 44, 38, 31,
30, 26, 32,
];
triple_exponential_smoothing(&series, 12, 0.716, 0.029, 0.993, 24);
}
triple_exponential_smoothing calls two other functions, which I tested, and they give correct results:
fn initial_trend(series: &[i32], slen: i32) -> f32 {
let mut sum = 0.0;
for i in 0..slen as usize { // in Python: for i in range(slen)
sum += (series[i + slen as usize] as f32 - series[i] as f32) / slen as f32;
}
return sum / slen as f32;
}
Which is a conversion of the Python code:
def initial_trend(series, slen):
sum = 0.0
for i in range(slen):
sum += float(series[i+slen] - series[i]) / slen
return sum / slen
# >>> initial_trend(series, 12)
# -0.7847222222222222
The second one is:
fn initial_seasonal_components(series: &[i32], slen: i32) -> Vec<f32> {
let mut seasonals = Vec::new();
let n_seasons = series.len() as i32 / slen;
// # compute season averages
let season_chunks = series //season_averages
.chunks(slen as usize)
.collect::<Vec<_>>();
let season_averages = season_chunks
.iter()
.map(|chunk| chunk.iter().sum::<i32>() as f32 / chunk.len() as f32)
.collect::<Vec<f32>>();
// # compute initial values
for i in 0..slen as usize {
let mut sum_of_vals_over_avg = 0.0;
for j in 0..n_seasons as usize {
sum_of_vals_over_avg +=
series[i + j * slen as usize] as f32 - season_averages[j] as f32;
}
seasonals.push(sum_of_vals_over_avg / n_seasons as f32);
}
return seasonals;
}
Which is a conversion of the Python code:
def initial_seasonal_components(series, slen):
seasonals = {}
season_averages = []
n_seasons = int(len(series)/slen)
# compute season averages
for j in range(n_seasons):
season_averages.append(sum(series[slen*j:slen*j+slen])/float(slen))
# compute initial values
for i in range(slen):
sum_of_vals_over_avg = 0.0
for j in range(n_seasons):
sum_of_vals_over_avg += series[slen*j+i]-season_averages[j]
seasonals[i] = sum_of_vals_over_avg/n_seasons
return seasonals
# >>> initial_seasonal_components(series, 12)
# {0: -7.4305555555555545, 1: -15.097222222222221, 2: -7.263888888888888, 3: -5.097222222222222, 4: 3.402777777777778, 5: 8.069444444444445, 6: 16.569444444444446, 7: 9.736111111111112, 8: -0.7638888888888887, 9: 1.902777777777778, 10: -3.263888888888889, 11: -0.7638888888888887}
The error looks to be in this function:
fn triple_exponential_smoothing(
series: &[i32],
slen: i32,
alpha: f32,
beta: f32,
gamma: f32,
n_preds: i32,
) {
let mut result: Vec<f32> = Vec::new();
let mut seasonals = initial_seasonal_components(&series, slen);
println!("The seasonalities are: {:#?}", seasonals);
let mut smooth = 0.0;
let mut trend = 0.0;
// for i in range(len(series)+n_preds):
for i in 0..(series.len() + n_preds as usize) as usize {
match i {
0 => {
// # initial values
smooth = series[0] as f32;
trend = initial_trend(&series, slen);
println!("The initial_trend is: {:#?}", trend);
result.push(series[0] as f32);
}
i if i >= series.len() => {
// # we are forecasting
let m = i - series.len() + 1;
result.push(
(smooth as usize + m * trend as usize) as f32 + seasonals[i % slen as usize],
)
}
_ => {
let val = series[i];
let last_smooth = smooth;
smooth = alpha * (val as f32 - seasonals[i % slen as usize])
+ (1.0 - alpha) * (smooth + trend);
trend = beta * (smooth - last_smooth) + (1.0 - beta) * trend;
seasonals[i % slen as usize] = gamma * (val as f32 - smooth)
+ (1 - gamma as usize) as f32 * seasonals[i % slen as usize];
result.push(smooth + trend + seasonals[i % slen as usize]);
}
}
}
println!("The forecast is: {:#?}", result);
}
Which is a conversion of the Python code:
def triple_exponential_smoothing(series, slen, alpha, beta, gamma, n_preds):
result = []
seasonals = initial_seasonal_components(series, slen)
for i in range(len(series)+n_preds):
if i == 0: # initial values
smooth = series[0]
trend = initial_trend(series, slen)
result.append(series[0])
continue
if i >= len(series): # we are forecasting
m = i - len(series) + 1
result.append((smooth + m*trend) + seasonals[i%slen])
else:
val = series[i]
last_smooth, smooth = smooth, alpha*(val-seasonals[i%slen]) + (1-alpha)*(smooth+trend)
trend = beta * (smooth-last_smooth) + (1-beta)*trend
seasonals[i%slen] = gamma*(val-smooth) + (1-gamma)*seasonals[i%slen]
result.append(smooth+trend+seasonals[i%slen])
return result
# # forecast 24 points (i.e. two seasons)
# >>> triple_exponential_smoothing(series, 12, 0.716, 0.029, 0.993, 24)
# [30, 20.34449316666667, 28.410051892109554, 30.438122252647577, 39.466817731253066, ...
My complete code is available in the playground
I appreciate any comment to optimize the code and fix the error.
In Rust, you are converting everything to usize all the time:
(1 - gamma as usize) as f32
If you think about that one, (1 - gamma as usize) can only ever be 0 or 1 depending on the value of gamma. If you instead change it to
(1.0 - gamma) as f32
And also change
(smooth as usize + m * trend as usize) as f32
to
(smooth + m as f32 * trend)
Then you get the same result as in Python.
As for performance, this looks about right, but you could introduce some temporary variables to avoid recomputing the same things all the time (although the optimizer should help). The default compilation mode for Rust is debug, be sure to switch to release for benchmarks.
I'm posting here the best comments and answer I received, hope be useful for others:
try to minimize (and possibly to reduce to zero) the number of “as” casts in your code. into()/from() and try_from() help;
Try to replace some raw loops with iterators;
The triple_exponential_smoothing function has some arguments that are easy to confuse at the calling point because Rust currently doesn’t have named arguments. To avoid the problem you could try to pack some arguments in structs/tuples.
Using “return” at the end of Rust functions isn’t much idiomatic.
Also, something worth noting is that in Python, the floating point type is double floating point, which would be f64 in Rust. This likely would introduce small differences in accuracy, although likely nothing major.
The functional neat code the replaced mine is in this PlayGound:
fn main() {
let series = [
30,21,29,31,40,48,53,47,37,39,31,29,17,9,20,24,27,35,41,38,
27,31,27,26,21,13,21,18,33,35,40,36,22,24,21,20,17,14,17,19,
26,29,40,31,20,24,18,26,17,9,17,21,28,32,46,33,23,28,22,27,
18,8,17,21,31,34,44,38,31,30,26,32];
triple_exponential_smoothing(&series, 12, 0.716, 0.029, 0.993, 24);
}
fn initial_trend(series: &[i32], slen: usize) -> f32 {
series[..slen].iter().zip(&series[slen..])
.map(|(&a, &b)| (b as f32 - a as f32) / slen as f32).sum::<f32>() / slen as f32
}
fn initial_seasonal_components (series: &[i32], slen: usize) -> Vec<f32> {
let n_seasons = series.len() / slen;
// # compute season averages
let season_averages = series //season_averages
.chunks(slen)
.map(|chunk| chunk.iter().sum::<i32>() as f32 / chunk.len() as f32)
.collect::<Vec<f32>>();
// # compute initial values
(0..slen).map(|i| {
let mut sum_of_vals_over_avg = 0.0;
for j in 0..n_seasons {
sum_of_vals_over_avg += series[i + j * slen] as f32 - season_averages[j] as f32;
}
sum_of_vals_over_avg / n_seasons as f32
}).collect()
}
fn triple_exponential_smoothing(series: &[i32], slen: usize, alpha: f32, beta: f32,
gamma: f32, n_preds: usize) {
let mut result: Vec<f32> = Vec::new();
let mut seasonals = initial_seasonal_components(&series, slen);
println!("The seasonalities are: {:#?}", seasonals);
let mut smooth = 0.0;
let mut trend = 0.0;
for i in 0..(series.len() + n_preds) {
match i {
0 => { // # initial values
smooth = series[0] as f32;
trend = initial_trend(&series, slen);
println!("The initial_trend is: {:#?}", trend);
result.push(series[0] as f32);
},
i if i >= series.len() => { // # we are forecasting
let m = i - series.len() + 1;
result.push((smooth + m as f32 * trend) + seasonals[i % slen])
},
_ => {
let val = series[i];
let last_smooth = smooth;
smooth = alpha * (val as f32 - seasonals[i % slen]) +
(1.0 - alpha)*(smooth + trend);
trend = beta * (smooth - last_smooth) + (1.0 - beta) * trend;
seasonals[i % slen] = gamma * (val as f32 - smooth) +
(1.0 - gamma) * seasonals[i % slen];
result.push(smooth + trend + seasonals[i % slen]);
}
}
}
println!("The forecast is: {:#?}", result);
}

Draw a line in a bitmap (possibly with piston)

I want to draw a line in a bitmap, e.g. from pixel (10, 10) to pixel (90, 90). The line must have a specific width.
Using piston image, I am able to draw a single pixel:
let mut image = ImageBuffer::<image::Rgb<u8>>::new(100, 100);
image.get_pixel_mut(5, 5).data = [255, 255, 255];
image.save("output.png");
However there is no method to draw a line.
I suppose I have to use piston::graphics for that, but I can’t find any ressource how to do it (any example involves a window that provides a context on which graphics works on).
In addition to the great answer above: there is now direct support for drawing lines and many more shapes (even texts) in the imageproc library (see also the examples there):
extern crate image;
extern crate imageproc;
use image::{Rgb, RgbImage};
use imageproc::drawing::draw_line_segment_mut;
fn main() {
let mut img = RgbImage::new(100, 100);
draw_line_segment_mut(
&mut img,
(5f32, 5f32), // start point
(95f32, 95f32), // end point
Rgb([69u8, 203u8, 133u8]), // RGB colors
);
img.save("output.png").unwrap();
}
If you drop the width requirement and don't need antialiasing either, you could use something like Bresenham's line algorithm (also on Rosetta Code):
extern crate image;
use image::RgbImage;
fn draw_line(img: &mut RgbImage, x0: i64, y0: i64, x1: i64, y1: i64) {
// Create local variables for moving start point
let mut x0 = x0;
let mut y0 = y0;
// Get absolute x/y offset
let dx = if x0 > x1 { x0 - x1 } else { x1 - x0 };
let dy = if y0 > y1 { y0 - y1 } else { y1 - y0 };
// Get slopes
let sx = if x0 < x1 { 1 } else { -1 };
let sy = if y0 < y1 { 1 } else { -1 };
// Initialize error
let mut err = if dx > dy { dx } else {-dy} / 2;
let mut err2;
loop {
// Set pixel
img.get_pixel_mut(x0 as u32, y0 as u32).data = [255, 255, 255];
// Check end condition
if x0 == x1 && y0 == y1 { break };
// Store old error
err2 = 2 * err;
// Adjust error and start position
if err2 > -dx { err -= dy; x0 += sx; }
if err2 < dy { err += dx; y0 += sy; }
}
}
fn main() {
let mut img = RgbImage::new(256, 256);
draw_line(&mut img, 10, 10, 246, 128);
draw_line(&mut img, 128, 10, 10, 246);
img.save("output.png").unwrap();
}
Output:
As a primitive form of adding thickness, you could repeat drawing the line with some offset. Alternatively, draw a filled rectangle where the height of the rectangle corresponds to the thickness of the desired line.
There's a ticket open in the imageproc project to add anti aliased line drawing support: https://github.com/PistonDevelopers/imageproc/issues/97

Equidistant points across Bezier curves

Currently, I'm attempting to make multiple beziers have equidistant points. I'm currently using cubic interpolation to find the points, but because the way beziers work some areas are more dense than others and proving gross for texture mapping because of the variable distance. Is there a way to find points on a bezier by distance rather than by percentage? Furthermore, is it possible to extend this to multiple connected curves?
This is called "arc length" parameterization. I wrote a paper about this several years ago:
http://www.saccade.com/writing/graphics/RE-PARAM.PDF
The idea is to pre-compute a "parameterization" curve, and evaluate the curve through that.
distance between P_0 and P_3 (in cubic form), yes, but I think you knew that, is straight forward.
Distance on a curve is just arc length:
fig 1 http://www.codecogs.com/eq.latex?%5Cint_%7Bt_0%7D%5E%7Bt_1%7D%20%7B%20|P'(t)|%20dt
where:
fig 2 http://www.codecogs.com/eq.latex?P%27(t)%20=%20[%7Bx%27,y%27,z%27%7D]%20=%20[%7B%5Cfrac%7Bdx(t)%7D%7Bdt%7D,%5Cfrac%7Bdy(t)%7D%7Bdt%7D,%5Cfrac%7Bdz(t)%7D%7Bdt%7D%7D]
(see the rest)
Probably, you'd have t_0 = 0, t_1 = 1.0, and dz(t) = 0 (2d plane).
I know this is an old question but I recently ran into this problem and created a UIBezierPath extention to solve for an X coordinate given a Y coordinate and vise versa. Written in swift.
https://github.com/rkotzy/RKBezierMath
extension UIBezierPath {
func solveBezerAtY(start: CGPoint, point1: CGPoint, point2: CGPoint, end: CGPoint, y: CGFloat) -> [CGPoint] {
// bezier control points
let C0 = start.y - y
let C1 = point1.y - y
let C2 = point2.y - y
let C3 = end.y - y
// The cubic polynomial coefficients such that Bez(t) = A*t^3 + B*t^2 + C*t + D
let A = C3 - 3.0*C2 + 3.0*C1 - C0
let B = 3.0*C2 - 6.0*C1 + 3.0*C0
let C = 3.0*C1 - 3.0*C0
let D = C0
let roots = solveCubic(A, b: B, c: C, d: D)
var result = [CGPoint]()
for root in roots {
if (root >= 0 && root <= 1) {
result.append(bezierOutputAtT(start, point1: point1, point2: point2, end: end, t: root))
}
}
return result
}
func solveBezerAtX(start: CGPoint, point1: CGPoint, point2: CGPoint, end: CGPoint, x: CGFloat) -> [CGPoint] {
// bezier control points
let C0 = start.x - x
let C1 = point1.x - x
let C2 = point2.x - x
let C3 = end.x - x
// The cubic polynomial coefficients such that Bez(t) = A*t^3 + B*t^2 + C*t + D
let A = C3 - 3.0*C2 + 3.0*C1 - C0
let B = 3.0*C2 - 6.0*C1 + 3.0*C0
let C = 3.0*C1 - 3.0*C0
let D = C0
let roots = solveCubic(A, b: B, c: C, d: D)
var result = [CGPoint]()
for root in roots {
if (root >= 0 && root <= 1) {
result.append(bezierOutputAtT(start, point1: point1, point2: point2, end: end, t: root))
}
}
return result
}
func solveCubic(a: CGFloat?, var b: CGFloat, var c: CGFloat, var d: CGFloat) -> [CGFloat] {
if (a == nil) {
return solveQuadratic(b, b: c, c: d)
}
b /= a!
c /= a!
d /= a!
let p = (3 * c - b * b) / 3
let q = (2 * b * b * b - 9 * b * c + 27 * d) / 27
if (p == 0) {
return [pow(-q, 1 / 3)]
} else if (q == 0) {
return [sqrt(-p), -sqrt(-p)]
} else {
let discriminant = pow(q / 2, 2) + pow(p / 3, 3)
if (discriminant == 0) {
return [pow(q / 2, 1 / 3) - b / 3]
} else if (discriminant > 0) {
let x = crt(-(q / 2) + sqrt(discriminant))
let z = crt((q / 2) + sqrt(discriminant))
return [x - z - b / 3]
} else {
let r = sqrt(pow(-(p/3), 3))
let phi = acos(-(q / (2 * sqrt(pow(-(p / 3), 3)))))
let s = 2 * pow(r, 1/3)
return [
s * cos(phi / 3) - b / 3,
s * cos((phi + CGFloat(2) * CGFloat(M_PI)) / 3) - b / 3,
s * cos((phi + CGFloat(4) * CGFloat(M_PI)) / 3) - b / 3
]
}
}
}
func solveQuadratic(a: CGFloat, b: CGFloat, c: CGFloat) -> [CGFloat] {
let discriminant = b * b - 4 * a * c;
if (discriminant < 0) {
return []
} else {
return [
(-b + sqrt(discriminant)) / (2 * a),
(-b - sqrt(discriminant)) / (2 * a)
]
}
}
private func crt(v: CGFloat) -> CGFloat {
if (v<0) {
return -pow(-v, 1/3)
}
return pow(v, 1/3)
}
private func bezierOutputAtT(start: CGPoint, point1: CGPoint, point2: CGPoint, end: CGPoint, t: CGFloat) -> CGPoint {
// bezier control points
let C0 = start
let C1 = point1
let C2 = point2
let C3 = end
// The cubic polynomial coefficients such that Bez(t) = A*t^3 + B*t^2 + C*t + D
let A = CGPointMake(C3.x - 3.0*C2.x + 3.0*C1.x - C0.x, C3.y - 3.0*C2.y + 3.0*C1.y - C0.y)
let B = CGPointMake(3.0*C2.x - 6.0*C1.x + 3.0*C0.x, 3.0*C2.y - 6.0*C1.y + 3.0*C0.y)
let C = CGPointMake(3.0*C1.x - 3.0*C0.x, 3.0*C1.y - 3.0*C0.y)
let D = C0
return CGPointMake(((A.x*t+B.x)*t+C.x)*t+D.x, ((A.y*t+B.y)*t+C.y)*t+D.y)
}
// TODO: - future implementation
private func tangentAngleAtT(start: CGPoint, point1: CGPoint, point2: CGPoint, end: CGPoint, t: CGFloat) -> CGFloat {
// bezier control points
let C0 = start
let C1 = point1
let C2 = point2
let C3 = end
// The cubic polynomial coefficients such that Bez(t) = A*t^3 + B*t^2 + C*t + D
let A = CGPointMake(C3.x - 3.0*C2.x + 3.0*C1.x - C0.x, C3.y - 3.0*C2.y + 3.0*C1.y - C0.y)
let B = CGPointMake(3.0*C2.x - 6.0*C1.x + 3.0*C0.x, 3.0*C2.y - 6.0*C1.y + 3.0*C0.y)
let C = CGPointMake(3.0*C1.x - 3.0*C0.x, 3.0*C1.y - 3.0*C0.y)
return atan2(3.0*A.y*t*t + 2.0*B.y*t + C.y, 3.0*A.x*t*t + 2.0*B.x*t + C.x)
}
}

Resources