Why does array this take up 1GB of memory? - node.js

I have files that store game map data, and I am using this function to read the map file and load it into memory when the server starts. Basically 4 bytes at the start tell the server how big the map is, and then each tile on the map has 6 bytes of data. I have roughly 33MB of data stored in multiple files, but when I read them into an array to access from memory, it takes up almost 1GB of RAM. I'm just wondering if something I am doing here is redundant or not needed and causing too much memory to be allocated.
Example: a 256x256 map would have 256 * 256 * 6 + 4 bytes of data
let mapData = [];
function loadMapFiles() {
fs.readdir("./maps", (err, files) => {
for (let file of files) {
let pointer = 4;
fs.readFile("./maps/" + file, (error, data) => {
if (error) throw error;
let sizex = data.readUInt16BE(0);
let sizey = data.readUInt16BE(2);
let mapNameNumber = Number(file);
mapData[mapNameNumber] = [];
for (let y = 0; y < sizey; y++) {
mapData[mapNameNumber][y] = [];
for (let x = 0; x < sizex; x++) {
mapData[mapNameNumber][y][x] = [];
mapData[mapNameNumber][y][x][0] = data.readUInt16BE(pointer);
mapData[mapNameNumber][y][x][1] = data.readUInt16BE(pointer + 2);
mapData[mapNameNumber][y][x][2] = data.readUInt16BE(pointer + 4);
pointer = pointer + 6;
}
}
});
}
});
}

Related

NodeJS packing and unpacking as buffer slower than JSON

I wrote this simple test to compare the speeds of serializing an array of objects as binary and JSON.
const { performance } = require('perf_hooks');
let units = [];
let id = 0;
let CHUNK_SIZE = 23;
for (let i = 0; i < 50000; i++) {
let r = Math.random();
let u = {};
u.id = id;
u.rotation = Math.PI * 2 * Math.random();
u.type = 0;
u.x = i;
u.y = i;
u.size = r * 20 + 12;
u.health = 1;
u.int = 1;
units.push(u);
}
[
[
"JSON",
(units) => JSON.stringify(units.map(unit => [
unit.id,
unit.type,
unit.x.toFixed(0),
unit.y.toFixed(0),
unit.rotation.toFixed(4),
unit.health.toFixed(2),
unit.size.toFixed(0),
parseInt(unit.int)
])),
(units) => JSON.parse(units)
],
[
"Binary",
(units) => {
return Buffer.concat(units.map(unit => {
let buf = new Buffer(CHUNK_SIZE);
buf.writeUInt32BE(unit.id);
buf.writeUInt8(unit.type, 4);
buf.writeInt32BE(unit.x, 5);
buf.writeInt32BE(unit.y, 9);
buf.writeFloatBE(unit.rotation, 13);
buf.writeFloatBE(unit.health, 17);
buf.writeUInt8(unit.size, 21);
buf.writeUInt8(unit.attacking ? 1 : 0, 22);
return buf;
}));
},
(units) => {
let u = units.buffer;
let result = [];
for (let offset = 0; offset < u.byteLength; offset += CHUNK_SIZE) {
let view = new DataView(u, offset, CHUNK_SIZE);
result.push([
view.getUint32(0),
view.getUint8(4),
view.getInt32(5),
view.getInt32(9),
view.getFloat32(13),
view.getFloat32(17),
view.getUint8(21),
view.getUint8(22)
]);
}
return result;
}
]
].forEach(([name, compress, decompress]) => {
console.log("Test: " + name);
let t0 = performance.now();
let compressed = compress(units);
let t1 = performance.now();
let decompressed = decompress(compressed);
let t2 = performance.now();
console.log(`Result: ${decompressed.length}`);
console.log(`Compression took: ${t1 - t0}ms`);
console.log(`Compressed length: ${compressed.byteLength || compressed.length}`);
console.log(`Decompression took: ${t2 - t1}ms`);
console.log(`Total time: ${t2 - t0}ms`);
console.log("");
})
Drop that into NodeJS and look at the results, here are mine
Test: JSON
Result: 50000
Compression took: 411.7958119995892ms
Compressed length: 2227781
Decompression took: 134.79507100209594ms
Total time: 546.5908830016851ms
Test: Binary
Result: 50000
Compression took: 612.1825229972601ms
Compressed length: 1150000
Decompression took: 191.14320900291204ms
Total time: 803.3257320001721ms
I'm quite surprised to find that JSON is faster since it is doing considerably more work than the binary counterpart.
Why is that and how can it be improved?
Hope I don't look like a weirdo answering, like a third consecutive, question I asked myself, but hopefully I'm providing content that could be useful for someone.
It appears as if I have been taking way too much advantage of convenience functions. Combine that with not knowing exactly what is happening "under the hood" and you get the results from the question.
Constructing a new buffer for each entry and then concatenating them seems like a costly thing to do. Instead create a single buffer and add to that:
(units) => {
let buf = new Buffer(units.length * CHUNK_SIZE);
units.forEach((unit, i) => {
let offset = i * CHUNK_SIZE;
buf.writeUInt32BE(unit.id, offset);
buf.writeUInt8(unit.type, offset + 4);
buf.writeInt32BE(unit.x, offset + 5);
buf.writeInt32BE(unit.y, offset + 9);
buf.writeFloatBE(unit.rotation, offset + 13);
buf.writeFloatBE(unit.health, offset + 17);
buf.writeUInt8(unit.size, offset + 21);
buf.writeUInt8(unit.attacking ? 1 : 0, offset + 22);
});
return buf;
},
Same goes on the unserializing part, instead of constructing a new DataView, which can mislead you to believe it would be more effective as it's constructor takes an offset and length, construct one and read off of it:
(units) => {
let u = units.buffer;
let result = [];
let view = new DataView(u);
for (let offset = 0; offset < u.byteLength; offset += CHUNK_SIZE) {
result.push([
view.getUint32(offset + 0),
view.getUint8(offset + 4),
view.getInt32(offset + 5),
view.getInt32(offset + 9),
view.getFloat32(offset + 13),
view.getFloat32(offset + 17),
view.getUint8(offset + 21),
view.getUint8(offset + 22)
]);
}
return result;
}
And now we've got more acceptable results:
Test: JSON
Result: 50000
Compression took: 284.3018040023744ms
Compressed length: 2934399
Decompression took: 197.91818399727345ms
Total time: 522.21998799964786ms
Test: Binary
Result: 50000
Compression took: 175.56888100132346ms
Compressed length: 1150000
Decompression took: 79.27483800053596ms
Total time: 254.84371900185943ms
JSON length has increased because I removed the .toFixed calls from the compress function.
I'm interested if there are further improvements that can be done, as I'm sure there are plenty of people who are more competent than me out there.

Node.js and CPU cache utilization

I want to understand CPU cache utilisation. For that purpose i wrote a small bit of Node.js code:
let testArray = [];
let length = "";
let times = "";
do {
testArray.push(Math.random());
if (testArray.length % 1000 === 0) {
testArray = testArray.slice();
const start = performance.now();
action(testArray);
const stop = performance.now();
const duration = stop - start;
length += testArray.length + "," + endOfLine;
times += duration + "," + endOfLine;
console.log(`Took: ${duration}, length: ${testArray.length}`);
}
}
while (testArray.length < 10000000)
function action(a) {
let sum = 0;
for (let index = 0; index < 10000; index++) {
sum += a[index];
}
}
I would expect the duration of the call to the function to be similar to this chart:
In spite of my expectations the durations are pretty much the same no matter what the size of the array is. I thought that as the array gets bigger it would exceed L1, L2 and L3 caches and I would see it on the graph.
Is my code wrong or am I missing something?

How to Write a String to Memory in WebAssembly

I started off working like this:
// example.wast
(module
(memory (export "mem") 1))
// example.js
WebAssembly.instantiateStreaming(fetch('example.wasm'), {}).then(mod => {
var exports = mod.instance.exports
var i8 = new Uint8Array(exports.mem)
var ra = 100
var rb = 500
var rc = 1000
var rd = 1200
var re = 2000
exports.copy(0, ra)
exports.copy(ra, rb)
exports.copy(rb, rc)
exports.copy(rc, rd)
exports.copy(rd, re)
console.log(ra, getString(ra, i8))
console.log(rb, getString(rb, i8))
console.log(rc, getString(rc, i8))
console.log(rd, getString(rd, i8))
console.log(re, getString(re, i8))
})
function getString(index, buffer) {
var s = ''
for (var i = index; i < index + size; i++) {
s += String.fromCharCode(buffer[i])
}
return s
}
But in order to implement that copy function, I first need to load strings into the WebAssembly memory. Wondering how I do that (using as little JS as possible, using mostly WebAssembly instead). Wondering if you can do this:
exports.loadString(index, 'Hello World', i8)
Or if not, you have to do something like this instead:
function loadString(index, string, buffer) {
for (var i = 0, n = string.length; i < n; i++) {
buffer[index + i] = string[i]
}
}
Or better yet:
function loadString(index, string, buffer) {
for (var i = 0, n = string.length; i < n; i++) {
exports.loadChar(index + i, string[i])
}
}
Wondering how to do this exports.loadChar or exports.loadString in WebAssembly, where it loads to a specific place in memory the string.
I would like to avoid using JS, i.e. buffer[index + i] = string[i]. Perhaps there is a way to dynamically load into the data segment, which would allow for exports.loadString.
WebAssembly does not have any of its own utility functions for reading / writing to linear memory, therefore there is no exported exports.loadString function.
WebAssembly exports a reference to linear memory itself, which you can read / write to as a typed array. You've already obtained a reference to it here:
var i8 = new Uint8Array(exports.mem)
This creates a byte array that allows you to read / write to the linear memory that was exported from your module with the name mem. You need to encode your string and write it to this array:
var encoder = new TextEncoder();
var encodedString = encoder.encode(str);
var i8 = new Uint8Array(exports.mem)
// Copy the UTF-8 encoded string into the WASM memory.
i8.set(encodedString);

Why a nodejs write stream causes an increasingly memory consumption?

I'm reading a 45Mb text file in NodeJS and doing some processing to each char on it.
The first version of my script took one char from the original file, store it in an accumulator variable (result += char), and at then end saves result in a text file. This does not work since the file was so big I was putting too much data on RAM, so I got an error: Javascript Heap out of memory. I decided to use a write stream so I can write data directly to disk one chat at the time so this could solve the issue:
fs = require('fs');
var proportion = 2;
var c = '';
fs.readFile('./concat/all.txt', 'utf8', function (err,data) {
if (err) {
return console.log(err);
}
var h = parseInt(Math.sqrt(data.length / proportion));
var w = parseInt(h * proportion);
console.log(w, h);
var wstream = fs.createWriteStream('output.txt');
var lineCount = 0;
for(var x = 0; x < data.length; x++) {
if(data.charCodeAt(x) === 32 && x > 0 && data.charCodeAt(x - 1) === 32)
continue;
if(data.charCodeAt(x) === 10 && x > 0 && data.charCodeAt(x - 1) === 10)
continue;
c = (data.charCodeAt(x) === 10 || data.charCodeAt(x) === 13 || data.charCodeAt(x) === 9) ? " " : data.charAt(x);
lineCount++;
if(lineCount > w) {
c += "\n";
lineCount = 0;
}
wstream.write(c);
}
wstream.end();
});
But still, I'm getting an out of memory error. The script runs with no problems if I comment wstream.write(c). Why?
Stream, stream, stream ...
Throttle the I/O :
Stream the input in chunks, pause the stream for each chunk, parse & manipulate each character of the current chunk and write it to the output.
Then you may resume the input stream to continue with the next one, repeat until finish.
I had no problem with throttling your code using createReadStream & createWriteStream
Here is the code (tested successfully with a 64MB file)
const fs = require('fs')
var w; var charCount = 0; var proportion = 2
//:This step was required to populate 'w'
fs.readFile('input.txt', 'utf8', function (err,data) {
if(err){return console.log(err)}
let h = parseInt(Math.sqrt(data.length / proportion))
w = parseInt(h * proportion); data = undefined;
console.log('[+] starting ( w:',w,')'); EMX()
});
//:Here is the magick
function EMX(){
const I = fs.createReadStream('input.txt')
const O = fs.createWriteStream('output.txt')
I.on('end',()=>{console.log("[+] operation completed")})
I.on('data', (chunk) => {
I.pause(); let data = chunk.toString()
for(var x=0;x<data.length;x++){
if(data.charCodeAt(x) === 32 && x > 0 && data.charCodeAt(x - 1) === 32) continue
if(data.charCodeAt(x) === 10 && x > 0 && data.charCodeAt(x - 1) === 10) continue
c = (data.charCodeAt(x) === 10 || data.charCodeAt(x) === 13 || data.charCodeAt(x) === 9) ? " " : data.charAt(x)
if(charCount>w){c+="\n";charCount=0;} charCount++
O.write(c)
}
I.resume()
})
}

How do I reverse a scanline using the jpeg-js module/node JS buffer?

I've been fiddling around with the jpeg-js module and Node JS Buffer, and attempting to create a small command line program that modifies the decoded JPEG buffer data and creates a pattern of X number of reversed scanlines and X number of normal scanlines before saving a new JPEG. In other words, I'm looking to flip portions of the image, but not the entire image itself (plenty of modules that do such a thing, of course, but not the specific use case I have).
To create the reversed/normal line patterns, I've been reading/writing line by line, and saving a slice of that line to a variable, then starting at the end of scanline and incrementally going down by slices of 4 bytes (the alloc for an RGBA value) until I'm at the beginning of the line. Code for the program:
'use strict';
const fs = require('fs');
const jpeg = require('jpeg-js');
const getPixels = require('get-pixels');
let a = fs.readFileSync('./IMG_0006_2.jpg');
let d = Buffer.allocUnsafe(a.width * a.height * 4);
let c = jpeg.decode(a);
let val = false; // track whether normal or reversed scanlines
let lineWidth = b.width * 4;
let lineCount = 0;
let track = 0;
let track2 = 0;
let track3 = 0;
let curr, currLine; // storage for writing/reading scnalines, respectively
let limit = {
one: Math.floor(Math.random() * 141),
two: Math.floor(Math.random() * 151),
three: Math.floor(Math.random() * 121)
};
if (limit.one < 30) {
limit.one = 30;
}
if (limit.two < 40) {
limit.two = 40;
}
if (limit.two < 20) {
limit.two = 20;
}
let calc = {};
calc.floor = 0;
calc.ceil = 0 + lineWidth;
d.forEach(function(item, i) {
if (i % lineWidth === 0) {
lineCount++;
/* // alternate scanline type, currently disabled to figure out how to succesfully reverse image
if (lineCount > 1 && lineCount % limit.one === 0) {
// val = !val;
}
*/
if (lineCount === 1) {
val = !val; // setting alt scanline check to true initially
} else if (calc.floor + lineWidth < b.data.length - 1) {
calc.floor += lineWidth;
calc.ceil += lineWidth;
}
currLine = c.data.slice(calc.floor, calc.ceil); // current line
track = val ? lineWidth : 0; // tracking variable for reading from scanline
track2 = val ? 4 : 0; // tracking variable for writing from scanline
}
//check if reversed and writing variable has written 4 bytes for RGBA
//if so, set writing source to 4 bytes at end of line and read from there incrementally
if (val && track2 === 4) {
track2 = 0; // reset writing count
curr = currLine.slice(track - 4, track); // store 4 previous bytes as writing source
if (lineCount === 1 && lineWidth - track < 30) console.log(curr); //debug
} else {
curr = currLine; //set normal scanline
}
d[i] = curr[track2];
// check if there is no match between data source and decoded image
if (d[i] !== curr[track2]) {
if (track3 < 50) {
console.log(i);
}
track3++;
}
track2++; //update tracking variable
track = val ? track - 1 : track + 1; //update tracking variable
});
var rawImageData = {
data: d,
width: b.width,
height: b.height
};
console.log(b.data.length);
console.log('errors\t', track3);
var jpegImageData = jpeg.encode(rawImageData, 100);
fs.writeFile('foo2223.jpg', jpegImageData.data);
Alas, the reversed scanline code I've written does not properly. Unfortunately, I've only been able successfully reverse the red channel of my test image (see below left), with the blue and green channels just turning into vague blurs. The color scheme should look something like the right image.
What am I doing wrong here?
For reversed lines, you stored slices of 4 bytes(4 bytes = 1 pixel), then write the first value of the pixel(red) correctly.
But in the next iteration, you overwrite the slice curr with currLine, rest of channels gets wrong values.
if (val && track2 === 4) {
track2 = 0; // reset writing count
curr = currLine.slice(track - 4, track); // store 4 previous bytes as writing source
if (lineCount === 1 && lineWidth - track < 30) console.log(curr); //debug
} else {
curr = currLine; //set normal scanline
}
Iteration 0: val == true, track2 == 4, set curr to next pixel, write red channel.
Iteration 1: val == true, track2 == 1, (val && track2 === 4) == false, set curr to currLine, write green channel.
You can move track2 === 4 branch to avoid this:
if (val) {
if (track2 === 4) {
track2 = 0; // reset writing count
curr = currLine.slice(track - 4, track); // store 4 previous bytes as writing source
if (lineCount === 1 && lineWidth - track < 30) console.log(curr); //debug
}
} else {
curr = currLine; //set normal scanline
}
Fixed code should look like this:
function flipAlt(input, output) {
const fs = require('fs');
const jpeg = require('jpeg-js');
let a = fs.readFileSync(input);
let b = jpeg.decode(a);
let d = Buffer.allocUnsafe(b.width * b.height * 4);
let val = false; // track whether normal or reversed scanlines
let lineWidth = b.width * 4;
let lineCount = 0;
let track = 0;
let track2 = 0;
let track3 = 0;
let curr, currLine; // storage for writing/reading scnalines, respectively
let limit = {
one: Math.floor(Math.random() * 141),
two: Math.floor(Math.random() * 151),
three: Math.floor(Math.random() * 121)
};
if (limit.one < 30) {
limit.one = 30;
}
if (limit.two < 40) {
limit.two = 40;
}
if (limit.two < 20) {
limit.two = 20;
}
let calc = {};
calc.floor = 0;
calc.ceil = 0 + lineWidth;
d.forEach(function(item, i) {
if (i % lineWidth === 0) {
lineCount++;
if (lineCount > 1) {
val = !val;
}
if (lineCount === 1) {
val = !val; // setting alt scanline check to true initially
} else if (calc.floor + lineWidth < b.data.length - 1) {
calc.floor += lineWidth;
calc.ceil += lineWidth;
}
currLine = b.data.slice(calc.floor, calc.ceil); // current line
track = val ? lineWidth : 0; // tracking variable for reading from scanline
track2 = val ? 4 : 0; // tracking variable for writing from scanline
}
//check if reversed and writing variable has written 4 bytes for RGBA
//if so, set writing source to 4 bytes at end of line and read from there incrementally
if (val) {
if (track2 === 4) {
track2 = 0; // reset writing count
curr = currLine.slice(track - 4, track); // store 4 previous bytes as writing source
if (lineCount === 1 && lineWidth - track < 30) console.log(curr); //debug
}
} else {
curr = currLine; //set normal scanline
}
d[i] = curr[track2];
// check if there is no match between data source and decoded image
if (d[i] !== curr[track2]) {
if (track3 < 50) {
console.log(i);
}
track3++;
}
track2++; //update tracking variable
track = val ? track - 1 : track + 1; //update tracking variable
});
var rawImageData = {
data: d,
width: b.width,
height: b.height
};
console.log(b.data.length);
console.log('errors\t', track3);
var jpegImageData = jpeg.encode(rawImageData, 100);
fs.writeFile(output, jpegImageData.data);
}
flipAlt('input.jpg', 'output.jpg');
Instead of tracking array indices, you can use utility library like lodash, it should make things easier:
function flipAlt(input, output) {
const fs = require('fs');
const jpeg = require('jpeg-js');
const _ = require('lodash');
const image = jpeg.decode(fs.readFileSync(input));
const lines = _.chunk(image.data, image.width*4);
const flipped = _.flatten(lines.map((line, index) => {
if (index % 2 != 0) {
return line;
}
const pixels = _.chunk(line, 4);
return _.flatten(pixels.reverse());
}));
const imageData = jpeg.encode({
width: image.width,
height: image.height,
data: new Buffer(flipped)
}, 100).data;
fs.writeFile(output, imageData);
}
flipAlt('input.jpg', 'output.jpg');

Resources