I was writing a JavaScript implementation of merge sort in NodeJS, since the native V8 sort function uses quick sort, which is less efficient than merge sort in its worst case. In order to the test the performance, I made an array of 10000 elements, pre-sorted (worst-case for quick sort) and timed how long it took to sort this list with both functions. In a single execution, the native sort function takes about 16 milliseconds, where my implementation takes about 9 milliseconds. However, when executing both 100 times, the native sort takes about 2.1 milliseconds on average, and mine takes about 4.3 milliseconds. Here is my code:
const { performance } = require('perf_hooks');
function merge(a, b, func) {
const array = new Array(a.length + b.length);
let aIndex = 0;
let bIndex = 0;
while(aIndex + bIndex < array.length) {
if(aIndex >= a.length || func(a[aIndex], b[bIndex]) > 0) {
array[aIndex + bIndex] = b[bIndex++];
} else {
array[aIndex + bIndex] = a[aIndex++];
}
}
return array;
}
function mergeSort(list, func) {
if(list.length <= 1) {
return list;
}
const half = list.length / 2;
return merge(mergeSort(list.slice(0, half), func), mergeSort(list.slice(half), func), func);
}
function time(func, iters) {
let sum = 0;
for(let i = 0; i < iters; i++) {
let startTime = performance.now();
func();
sum += performance.now() - startTime;
}
return sum / iters;
}
const arr = [...Array(10000).keys()];
const sortFunc = (a, b) => a - b;
console.log("JavaScript built-in sort execution time, one iteration:")
console.log(time(() => arr.sort(sortFunc), 1)); // ~16
console.log("Manually implemented merge sort execution time, one iteration:")
console.log(time(() => mergeSort(arr, sortFunc), 1)); // ~9
console.log();
console.log("JavaScript built-in sort average execution time, 100 iterations:")
console.log(time(() => arr.sort(sortFunc), 100)); // ~2.1
console.log("Manually implemented merge sort average execution time, 100 iterations:")
console.log(time(() => mergeSort(arr, sortFunc), 100)); // ~4.3
Why is it so much faster when executed repeatedly than only once, and why is this improvement more pronounced for the native sort function?
EDIT: I was able to make my algorithm more efficient by tracking array indices instead of using the slice method. My code now consistently beats v8's native sort when used on pre-sorted arrays, but loses on randomized arrays, as expected. Here is that code, for those interested:
const { performance } = require('perf_hooks');
function merge(a, b, func) {
const array = new Array(a.length + b.length);
let aIndex = 0;
let bIndex = 0;
while(aIndex + bIndex < array.length) {
if(aIndex >= a.length || func(a[aIndex], b[bIndex]) > 0) {
array[aIndex + bIndex] = b[bIndex++];
} else {
array[aIndex + bIndex] = a[aIndex++];
}
}
return array;
}
function mergeSortRec(list, func, start, limit) {
if (limit === 1) {
return [list[start]];
}
const half = limit / 2 | 0;
return merge(mergeSortRec(list, func, start, half), mergeSortRec(list, func, half + start, limit - half), func);
}
function mergeSort(list, func) {
return mergeSortRec(list, func, 0, list.length);
}
function time(func) {
let startTime = performance.now();
func();
return performance.now() - startTime;
}
const sortFunc = (a, b) => a - b;
console.log();
console.log("--- Sequential array ---");
console.log();
const sequenceArr = [...Array(10000).keys()];
console.log("JavaScript built-in sort execution time, one iteration:");
console.log(time(() => sequenceArr.slice(0).sort(sortFunc)));
console.log("Manually implemented merge sort execution time, one iteration:");
console.log(time(() => mergeSort(sequenceArr, sortFunc)));
let sum = 0;
for(let i = 0; i < 100; i++) {
const array = sequenceArr.slice(0);
sum += time(() => array.sort(sortFunc));
}
console.log("JavaScript built-in sort average execution time, 100 iterations:");
console.log(sum / 100);
sum = 0;
for(let i = 0; i < 100; i++) {
sum += time(() => mergeSort(sequenceArr, sortFunc))
}
console.log("Manually implemented merge sort average execution time, 100 iterations:");
console.log(sum / 100);
console.log();
console.log("--- Randomized array ---");
console.log();
const randomArrays = new Array(101);
for(let i = 0; i < 101; i++) {
randomArrays[i] = new Array(10000);
for(let j = 0; j < 10000; j++) {
randomArrays[i][j] = Math.random() * 5000 | 0;
}
}
console.log("JavaScript built-in sort execution time, one iteration:");
console.log(time(() => randomArrays[100].slice(0).sort(sortFunc)));
console.log("Manually implemented merge sort execution time, one iteration:");
console.log(time(() => mergeSort(randomArrays[100], sortFunc)));
sum = 0;
for(let i = 0; i < 100; i++) {
const array = randomArrays[i].slice(0)
sum += time(() => array.sort(sortFunc));
}
console.log("JavaScript built-in sort average execution time, 100 iterations:");
console.log(sum / 100);
sum = 0;
for(let i = 0; i < 100; i++) {
sum += time(() => mergeSort(randomArrays[i], sortFunc))
}
console.log("Manually implemented merge sort average execution time, 100 iterations:");
console.log(sum / 100);
Related
I'm working on a function that aims to return a non-repeating list of years for all transactions.
So far it worked fine, but I also need to add up all the values of the transactions for each year that it returned in the first loop.
I tried different ways and without success.
public async getListYear(): Promise<any>{
try{
return await client
.scan({
TableName: 'dbPayments'
})
.promise()
.then((response) => {
let result = response.Items.filter((e, i) => {
return response.Items.findIndex((x) => {
return x.date_created.substring(0,4) == e.date_created.substring(0,4)
}) == i;
});
let years = [];
let sum = [];
for(let i = 0; i < result.length; i++){
const yearTransaction = new Date(response.Items[i]['date_created']).getFullYear();
years.push(yearTransaction)
}
// Here is the problem, I tried as follows:
for(let i = 0; i < response.Items.length; i++){
if(response.Items[i]['status_pgm'] == 'approved'){
for(let i = 0; i < years.length; i++){
let year = new Date(response.Items[i]['date_created']).getFullYear();
let value = 0;
if(year == years[i]){
value += response.Items[i]['transaction_amount']
}
let data = {
years[i]: value
}
sum.push(data)
}
}
}
return {
status: 200,
list: years,
totalSum: sum
}
})
.catch((error: AxiosError) => {
throw error;
})
}catch (e) {
console.log(e);
return {status: 500, msg: "error"}
}
}
Table dbPayments:
id | date_created | transaction_amount | status_pgm
1 2022-10-29T20:54:40.294-04:00 45 approved
2 2022-09-29T20:54:40.294-04:00 30 approved
3 2022-08-29T20:54:40.294-04:00 25 approved
4 2021-10-29T20:54:40.294-04:00 15 approved
5 2021-09-29T20:54:40.294-04:00 10 approved
I need to return the sum of values for each year, what is the best way to do this? In the code I put an example of how I tried to do it.
First. I think this is the kind of operation should be done on the DB not in client.
But i notice some errors.
for(let i = 0; i < response.Items.length; i++){
if(response.Items[i]['status_pgm'] == 'approved'){
for(let i = 0; i < years.length; i++){
Inner and outer loop both use i so this mught clash and mess up the looping.
let year = new Date(response.Items[i]['date_created']).getFullYear();
I think this should be on outer loop not inner loop.
let value = 0;
if(year == years[i]){
value += response.Items[i]['transaction_amount']
}
let data = {
years[i]: value
}
value is set to 0 each time this piece of code is reaches, so no accumulation is actually made.
My reccomendation. Make a Map<number, number>, where key is year and value is the accumulated sum. Then you just could
for(let i = 0; i < response.Items.length; i++){
if(response.Items[i]['status_pgm'] == 'approved'){
let year = new Date(response.Items[i]['date_created']).getFullYear();
let value = response.Items[i]['transaction_amount'];
map.set(year, map.get(year) + value || value)
}
}
And if you need it as a array of sums like in the example, you could then create the array by iterating over years array, and then pushing the corresbonding years sum into a sums array.
for(let i = 0; i < years.length; i++){
let year = years[i];
let obj= {};
obj[year] = map.get(year);
sums.push(obj);
}
I want to understand CPU cache utilisation. For that purpose i wrote a small bit of Node.js code:
let testArray = [];
let length = "";
let times = "";
do {
testArray.push(Math.random());
if (testArray.length % 1000 === 0) {
testArray = testArray.slice();
const start = performance.now();
action(testArray);
const stop = performance.now();
const duration = stop - start;
length += testArray.length + "," + endOfLine;
times += duration + "," + endOfLine;
console.log(`Took: ${duration}, length: ${testArray.length}`);
}
}
while (testArray.length < 10000000)
function action(a) {
let sum = 0;
for (let index = 0; index < 10000; index++) {
sum += a[index];
}
}
I would expect the duration of the call to the function to be similar to this chart:
In spite of my expectations the durations are pretty much the same no matter what the size of the array is. I thought that as the array gets bigger it would exceed L1, L2 and L3 caches and I would see it on the graph.
Is my code wrong or am I missing something?
I want to print out all number from 1 to 1,000,000 with 1,000 numbers per line. and return the response without having a long delay.
Thanks in advance for any type of help!!!
Well, first you can create a function to print numbers from 1 to 1000.
function getThousand(index) {
var result = '';
for (var i = index; i < index + 1000; i++) {
result += i + ' ';
}
return result;
}
Then, you need a function to call this for 1 to 1000000.
function getAll() {
var result = '';
for (var i = 0; i < 1000; i++) {
result = getThousand((i * 1000) + 1) + " \n";
fs.appendFileSync('foo.txt', result);
}
}
Then call it all:
getAll();
This will save your lines into a file. At the end of getAll() you can print what you need.
I wrote a simple function for computing prime numbers in D. I thought it was pretty quick, calculating prime numbers up to 100,000. But then I wanted to compare it to NodeJS. When I ran the NodeJS script for the first time, I was astounded at the difference and double checked I wasn't skipping some sort of calculation some how. But the two are pretty identical functionally.
D:
import std.stdio;
import std.math;
import std.datetime;
import std.file;
import std.array;
enum size_t ITERATIONS = 100_000;
bool divisible(real n) {
real d;
for(d = 3; d < floor(n / 2); d += 2) {
if(n % d == 0) {
return true;
}
}
return false;
}
void main() {
StopWatch sw;
size_t T = ITERATIONS;
size_t C = 0;
real n = 2;
real r[ITERATIONS];
r[C] = n;
sw.start();
C++;
for(n = 3; n < T; n += 2) {
if(!divisible(n)) {
r[C] = n;
C++;
}
}
sw.stop();
double seconds = cast(double)sw.peek().usecs / 1_000_000;
writeln("\n\n", C, " prime numbers calculated in ", seconds, " seconds.");
File file = File("primes.txt", "w");
file.writeln("\n", C, " prime numbers calculated ", seconds, " seconds.");
foreach(number; r[0..C]) {
file.writeln(number);
}
file.writeln("\n", "end");
file.close();
}
NodeJS:
var fs = require('fs');
var ITERATIONS = 100000;
function divisible(n) {
var d;
for(d = 3; d < Math.floor(n / 2); d += 2) {
if(n % d == 0) {
return true;
}
}
return false;
}
(function() {
var buffer = [ ],
now = Date.now(),
C = 0
n = 2
;
buffer.push(n);
C++;
for(n = 3; n < ITERATIONS; n += 2) {
if(!divisible(n)) {
buffer.push(n);
C++;
}
}
var time = Date.now() - now,
seconds = time / 1000
;
console.log("\n\n", C, " prime numbers calculated. Process took ", seconds, " seconds.");
buffer.push("\n" + C + " prime numbers calculated. Process took " + seconds + " seconds.");
fs.writeFile("node_primes.txt", buffer.join("\n"), function(err) {
if(err) throw err;
console.log("Primes have been written to file.");
});
})();
Results:
Calculating 100,000 primes:
D: 3.49126 seconds
NodeJS: 0.652 seconds
Can anybody explain why this is happening?
Thanks in advance.
By unnecessarily declaring variables as real, you are forcing floating point arithmetic where integer arithmetic could be used. Replace all instances of real with int, get rid of that floor() and your D program will run as fast as the Node.JS version:
import std.stdio;
import std.math;
import std.datetime;
import std.file;
import std.array;
enum size_t ITERATIONS = 100_000;
bool divisible(int n) {
int d;
for(d = 3; d < n / 2; d += 2) {
if(n % d == 0) {
return true;
}
}
return false;
}
void main() {
StopWatch sw;
size_t T = ITERATIONS;
size_t C = 0;
int n = 2;
int r[ITERATIONS];
r[C] = n;
sw.start();
C++;
for(n = 3; n < T; n += 2) {
if(!divisible(n)) {
r[C] = n;
C++;
}
}
sw.stop();
double seconds = cast(double)sw.peek().usecs / 1_000_000;
writeln("\n\n", C, " prime numbers calculated in ", seconds, " seconds.");
File file = File("primes.txt", "w");
file.writeln("\n", C, " prime numbers calculated ", seconds, " seconds.");
foreach(number; r[0..C]) {
file.writeln(number);
}
file.writeln("\n", "end");
file.close();
}
ok, I have a homework assignment where I have to read in files and calculate the distance between a bunch of numbers in the files and then print out the mean and standard deviation of each set of numbers. The end of the script, where the console.log stuff is, is giving all NaN for the variables. Can anyone help me out?
*I've omitted repeating parts of the script to make it shorter (their are more arrays than just the lHipJoint array and the calculations for them but I left them out).
var fs = require('fs');
var lHipJoint = new Array();
//open the first text file
fs.readFile('file.txt','utf8', function (err, data)
{
if (err) throw err;
//split the data into an array with each line as an element
stuff=data.split('\n');
for (var i = 0; i < stuff.length; i++)
{
//function that processes each line into an array
//with each number as an element and does the euclidean dis.
processLine(stuff[i]);
}
data.length = 0;
stuff.length = 0;
});
//do the same for the next file
fs.readFile('file2.txt','utf8', function (err, data)
{
if (err) throw err;
stuff=data.split('\n');
for (var i = 0; i < stuff.length; i++)
{
processLine(stuff[i]);
}
data.length = 0;
stuff.length = 0;
});
//and again
fs.readFile('file3.txt','utf8', function (err, data)
{
if (err) throw err;
stuff=data.split('\n');
for (var i = 0; i < stuff.length; i++)
{
processLine(stuff[i]);
}
data.length = 0;
stuff.length = 0;
});
//and again
fs.readFile('file4.txt','utf8', function (err, data)
{
if (err) throw err;
stuff=data.split('\n');
for (var i = 0; i < stuff.length; i++)
{
processLine(stuff[i]);
}
data.length = 0;
stuff.length = 0;
});
//and again
fs.readFile('file5.txt','utf8', function (err, data)
{
if (err) throw err;
stuff=data.split('\n');
for (var i = 0; i < stuff.length; i++)
{
processLine(stuff[i]);
}
data.length = 0;
stuff.length = 0;
});
//and again
fs.readFile('file6.txt','utf8', function (err, data)
{
if (err) throw err;
stuff=data.split('\n');
for (var i = 0; i < stuff.length; i++)
{
processLine(stuff[i]);
}
data.length = 0;
stuff.length = 0;
});
//function to split each line into an array with each number as an element
//then parse the number strings into floats and do the euclidean distances,
//storing the values in arrays for each bone.
function processLine(line)
{
var line1 = line
var numbers = line1.split(" ");
line1.length = 0;
for (var i = 0; i < numbers.length; i++)
{
var number = parseFloat(numbers[i]);
line1[i] = number[i];
}
lHipJoint = Math.sqrt((line1[6] - line1[9])*(line1[6] - line1[9]) + (line1[7] - line1[10])*(line1[7] - line1[10]) + (line1[8] - line1[11])*(line1[8] - line1[11]));
//reset the arrays so they can be reused
line1.length = 0;
numbers.length = 0;
number.length = 0;
}
//calculations and output for the mean and SD of each bone's distance from the root bone.
for(var i = 0; i < lHipJoint.length; i++)
{
var lHipJointTotal = lHipJointTotal + lHipJoint[i];
}
var lHipJointMean = lHipJointTotal/lHipJoint.length;
for(var i = 0; i < lHipJoint.length; i++)
{
var lHipJointSDSum = lHipJointSDSum + (lHipJoint[i] - lHipJointMean)*(lHipJoint[i] - lHipJointMean);
}
var lHipJointSD = Math.sqrt(lHipJointSDSum/lHipJoint.length);
console.log("The mean distance of the left hip joint from the root bone is " +lHipJointMean+ " and the standard deviation is " +lHipJointSD+ ".\n");
You are doing a lot of strange things here in your script i will try to
bring upp as manny as i can.
So first of all dont reset arrays.
your in a garbage collected language just reallocate new ones.
Also in the processLine function you are assigning numbers to the indexes of a string
i asume you think its an array but its not the same thing.
strings are immutable (cant be changed) in javascript.
In the aggregating for loops att the bottom of the file you are
declaring the variable in every iteration. you want to declare it before the loop like this.
var x = 0;
for(var i = 0; i < list.length; i++) {
x = x + ......
}
Your cals to read the files all do the same thing.
So you want to use the same function for that.
write it ones.
You are assigning to the lHipJoint array in the
processLine function my understanding is that you want to add
the calculated value to the array.
You can do this with the push method like this
lHipJoint.push(Math.sqr(........
Also theres a problem with using the async file reading
sins your printing the result before you even read the files.
if you want to use the async ones you need to coordinate so that.
you only print the result when you done all the file reading.
but a tips is to use the non async ones in this case.
I understand this is an assignment so you might not want to read my
attempt to correct the program beneath.
Maybe read it after you handed in yours, but im leaving it here
for the q&a reference for others reading this.
var fs = require("fs");
var filePaths = ["file.txt", "file2.txt",
"file3.txt", "file4.txt",
"file5.txt", "file6.txt"];
var lHipJoint = [];
filePaths.forEach(function(path) {
var content = fs.readFileSync(path, "utf-8");
var lines = content.split("\n");
lines.forEach(function(line) {
if(line.trim() === "") return;
var numbers = line.split("\t").map(parseFloat);
// im not touching your calculation :D
lHipJoint.push(Math.sqrt((numbers[6] - numbers[9])*(numbers[6] - numbers[9])
+ (numbers[7] - numbers[10])*(numbers[7] - numbers[10]) + (numbers[8] - numbers[11])
* (numbers[8] - numbers[11])));
});
});
var lHipJointTotal = lHipJoint.reduce(function(p, c) {
return p + c;
});
var lHipJointMean = lHipJointTotal / lHipJoint.length;
var lHipJointSDSum = lHipJoint.reduce(function(p, c) {
return p + (c - lHipJointMean) * (c - lHipJointMean);
}, 0);
var lHipJointSD = Math.sqrt(lHipJointSDSum/lHipJoint.length);
console.log("The mean distance of the left hip joint from the root bone is "
+ lHipJointMean + " and the standard deviation is " + lHipJointSD + ".\n");
there might be some error in this program i dont know how the data looks but i hope this helps
you.