How to improve the memory usage in nodejs code? - node.js

I tried one code at hackerearth : https://www.hackerearth.com/practice/data-structures/stacks/basics-of-stacks/practice-problems/algorithm/fight-for-laddus/description/
The speed seems fine however the memory usage exceed the 256mb limit by nearly 2.8 times.
In java and python the memory is 5 times less however the time is nearly twice.
What factor can be used to optimise the memory usage in nodejs code implementation?
Here is nodejs implementation:
// Sample code to perform I/O:
process.stdin.resume();
process.stdin.setEncoding("utf-8");
var stdin_input = "";
process.stdin.on("data", function (input) {
stdin_input += input; // Reading input from STDIN
});
process.stdin.on("end", function () {
main(stdin_input);
});
function main(input) {
let arr = input.split("\n");
let testCases = parseInt(arr[0], 10);
arr.splice(0,1);
finalStr = "";
while(testCases > 0){
let inputArray = (arr[arr.length - testCases*2 + 1]).split(" ");
let inputArrayLength = inputArray.length;
testCases = testCases - 1;
frequencyObject = { };
for(let i = 0; i < inputArrayLength; ++i) {
if(!frequencyObject[inputArray[i]])
{
frequencyObject[inputArray[i]] = 0;
}
++frequencyObject[inputArray[i]];
}
let finalArray = [];
finalArray[inputArrayLength-1] = -1;
let stack = [];
stack.push(inputArrayLength-1);
for(let i = inputArrayLength-2; i>=0; i--)
{
let stackLength = stack.length;
while(stackLength > 0 && frequencyObject[inputArray[stack[stackLength-1]]] <= frequencyObject[inputArray[i]])
{
stack.pop();
stackLength--;
}
if (stackLength > 0) {
finalArray[i] = inputArray[stack[stackLength-1]];
} else {
finalArray[i] = -1;
}
stack.push(i);
}
console.log(finalArray.join(" ") + "\n")
}
}

What factor can be used to optimize the memory usage in nodejs code implementation?
Here are some things to consider:
Don't buffer any more input data than you need to before you process it or output it.
Try to avoid making copies of data. Use the data in place if possible. Remember that all string operations create a new string that is likely a copy of the original data. And, many array operations like .map(), .filter(), etc... create new copies of the original array.
Keep in mind that garbage collection is delayed and is typically done during idle time. So, for example, modifying strings in a loop may create a lot of temporary objects that all must exist at once, even though most or all of them will be garbage collected when the loop is done. This creates a poor peak memory usage.
Buffering
The first thing I notice is that you read the entire input file into memory before you process any of it. Right away for large input files, you're going to use a lot of memory. Instead, what you want to do is read enough of a chunk to get the next testCase and then process it.
FYI, this incremental reading/processing will make the code significantly more complicated to write (I've written an implementation myself) because you have to handle partially read lines, but it will hold down memory use a bunch and that's what you asked for.
Copies of Data
After reading the entire input file into memory, you immediately make a copy of it all with this:
let arr = input.split("\n");
So, now you've more than doubled the amount of memory the input data is taking up. Instead of just one string for all the input, you now still have all of that in memory, but you've now broken it up into hundreds of other strings (each with a little overhead of its own for a new string and of course a copy of each line).
Modifying Strings in a Loop
When you're creating your final result which you call finalStr, you're doing this over and over again:
finalStr = finalStr + finalArray.join(" ") + "\n"
This is going to create tons and tons of incremental strings that will likely end up all in memory at once because garbage collection probably won't run until the loop is over. As an example, if you had 100 lines of output that were each 100 characters long so the total output (not count line termination characters) was 100 x 100 = 10,000 characters, then constructing this in a loop like you are would create temporary strings of 100, 200, 300, 400, ... 10,000 which would consume 5000 (avg length) * 100 (number of temporary strings) = 500,000 characters. That's 50x the total output size consumed in temporary string objects.
So, not only does this create tons of incremental strings each one larger than the previous one (since you're adding onto it), it also creates your entire output in memory before writing any of it out to stdout.
Instead, you can incremental output each line to stdout as you construct each line. This will put the worst case memory usage at probably about 2x the output size whereas you're at 50x or worse.

Related

Set lookup in Nodejs does not seem to have O(1)

I wrote a test to test the lookup speed of Set in Nodejs (v8.4).
const size = 5000000;
const lookups = 1000000;
const set = new Set();
for (let i = 0; i < size; i++) {
set.add(i);
}
const samples = [];
for (let i = 0; i < lookups; i++) {
samples.push(Math.floor(Math.random() * size));
}
const start = Date.now();
for (const key of samples) {
set.has(key);
}
console.log(`size: ${size}, time: ${Date.now() - start}`);
After running it with size = 5000, 50000, 500000, and 5000000, the result is surprising to me:
size: 5000, time: 29
size: 50000, time: 41
size: 500000, time: 81
size: 5000000, time: 130
I expected the time it takes is relatively constant. But it increases substantially as the number of items in the Set increases. Isn't the lookup supposed to be O(1)? What am I missing here?
Update 1:
After reading some comments and answers, I understand the point everyone is trying to make here. Maybe my question should be 'What is causing the increase in time?'. In hash map implementation, with the same number of lookups, the reason for increase in lookup time can only be there are more key collisions.
Update 2:
After more research, here is what I found:
V8 uses ordered hash table for both Set and Map implementation
According to this link, there are performance impact on the lookup time for ordered hash map, while unordered hash map's performance stays constant.
However, V8's ordered hash table implementation is based on this, and that doesn't seem to add any overhead to the look up time with increasing number of items.
Regardless of whether the JS Set implementation is actually O(1) or not (I'm not sure it is), you should not expect O(1) operations to result in speed that is identical across calls. It is a means of measuring the operation complexity rather than the actual throughput speed.
To demonstrate this, consider the use case of sorting an array of numbers. You can sort using array.sort which I believe is O(n * log(n)) in Node.js. You can also create a (bad, but amusing) O(n) implementation using timeouts (ignore complexity of adding to the array, etc):
// input data
let array = [
681, 762, 198, 347, 340,
73, 989, 967, 409, 752,
660, 914, 711, 153, 691,
35, 112, 907, 970, 67
];
// buffer of new
let sorted = [];
// O(n) sorting algorithm
array.forEach(function (num) {
setTimeout(sorted.push.bind(sorted, num), num);
});
// ensure sort finished
setTimeout(function () {
console.log(sorted);
}, 2000);
Of course, the first implementation is faster - but in terms of complexity, the second one is "better". The point is that you should only really be using O to estimate, it does not guarantee any specific amount of time. If you called the O(n) above with an array of 20 numbers (so the same length) but it had only two digit numbers, it would be a large execution time difference.
Stupid example, but it should hopefully support the point I'm trying to make :)
Caching and memory locality. V8's implementation of Set lookup has O(1) theoretical complexity, but real hardware has its own constraints and characteristics. Specifically, not every memory access has the same speed. Theoretical complexity analysis is only concerned with the number of operations, not the speed of each operation.
Update for updated question:
This answers your updated question! When you make many requests to a small Set, it will be likely that the CPU has cached the relevant chunks of memory, making many of the lookups faster than they would be if the data had to be retrieved from memory. There don't have to be more collisions for this effect to happen; it is simply the case that accessing a small memory region repeatedly is faster than spreading out the same number of accesses over a large memory region.
In fact, you can measure the same effect (with smaller magnitude) with an array:
const size = 5000000;
const lookups = 1000000;
const array = new Array(size);
for (let i = 0; i < size; i++) {
array[i] = 1;
}
const start = Date.now();
var result = 0;
for (var i = 0; i < lookups; i++) {
var sample = Math.floor(Math.random() * size);
result += array[sample];
}
const end = Date.now();
console.log(`size: ${size}, time: ${end - start}`);
A million lookups of random indices on a 5,000-element array will be faster than a million lookups of random indices on a 5,000,000 element array.
The reason is that for a smaller data structure, there's a greater likelihood that the random accesses will read elements that are already in the CPU's cache.
In theory you could be right, a Set could have a lookup of O(1), but the JS set definition is very specific on the algorithm. See ECMA Script definition. There is a loop over all elements included.
Try have a look at various HashSet implementation you can find for example here, there might be one with O(1) .has-speed.

Remove NodeJs Stream padding

I'm writing an application where I need to strip the first X and last Y bytes from a stream. So what I need is basically a function I can pass to pipe that takes X and Y as parameters and removes the desired number of bytes from the stream as it comes through. My simplified setup is like this:
const rs = fs.createReadStream('some_file')
const ws = fs.createWriteStream('some_other_file')
rs.pipe(streamPadding(128, 512)).pipe(ws)
After that, some_other_fileshould contain all the contents of some_fileminus the first 128 Bytes and the last 512 bytes. I've read up on streams, but couldn't figure out how to properly do this, so that it also handles errors during the transfer and does backpressure correctly.
As far as I know, I'd need a duplex stream, that, whenever I read from it, reads from its input stream, keeps track of where in the stream we are and skips the first 128 bytes before emitting data. Some tips on how to implement that would be very helpful.
The second part seems more difficult, if not impossible to do, because how would I know whether I already reached the last 512 bytes or not, before the input stream actually closed. I suspect that might not be possible, but I'm sure there must be a way to solve this problem, so if you have any advice on that, I'd be very thankful!
You can create a new Transform Stream which does what you wish. As for losing the last x bytes, you can always keep the last x bytes buffered and just ignore them when the stream ends.
Something like this (assuming you're working with buffers).
const {Transform} = require('stream');
const ignoreFirst = 128,
ignoreLast = 512;
let lastBuff,
cnt = 0;
const MyTrimmer = new Transform({
transform(chunk,encoding,callback) {
let len = Buffer.byteLength(chunk);
// If we haven't ignored the first bit yet, make sure we do
if(cnt <= ignoreFirst) {
let diff = ignoreFirst - cnt;
// If we have more than we want to ignore, adjust pointer
if(len > diff)
chunk = chunk.slice(diff,len);
// Otherwise unset chunk for later
else
chunk = undefined;
}
// Keep track of how many bytes we've seen
cnt += len;
// If we have nothing to push after trimming, just get out
if(!chunk)
return callback();
// If we already have a saved buff, concat it with the chunk
if(lastBuff)
chunk = Buffer.concat([lastBuff,chunk]);
// Get the new chunk length
len = Buffer.byteLength(chunk);
// If the length is less than what we ignore at the end, save it and get out
if(len < ignoreLast) {
lastBuff = chunk;
return callback();
}
// Otherwise save the piece we might want to ignore and push the rest through
lastBuff = chunk.slice(len-ignoreLast,len);
this.push(chunk.slice(0,len-ignoreLast));
callback();
}
});
Then you add that your pipeline, assuming you're reading from a file and writing to a file:
const rs = fs.createReadStream('some_file')
const ws = fs.createWriteStream('some_other_file')
myTrimmer.pipe(ws);
rs.pipe(myTrimmer);

why it's slowly when I parse a message of Google protocol buffer in multi-thread?

I try to parse many Google protocol buffer messages from a binary file generated by calling SerializeToString. I first load all Bytes into a heap memory by calling new function. I also have two arrays to store the Bytes begin address of a message in the heap memory and the Bytes count of the message.
Then I begin to parse message by calling ParseFromString.I want to quicken the procedure by using multi-thread.
In each thread, I pass the start index and end index of address array and Byte count array.
In parent process. the main code is:
struct ParsePara
{
char* str_buffer;
size_t* buffer_offset;
size_t* binary_string_length_array;
size_t start_idx;
size_t end_idx;
Flight_Ticket_Info* ticket_info_buffer_array;
};
//Flight_Ticket_Info is class of message
//offset_size is the count of message
ticket_array = new Flight_Ticket_Info[offset_size];
const int max_thread_count = 6;
pthread_t pthread_id_vec[max_thread_count];
CTimer thread_cost;
thread_cost.start();
vector<ParsePara*> para_vec;
const size_t each_count = ceil(float(offset_size) / max_thread_count);
for (size_t k = 0;k < max_thread_count;k++)
{
size_t start_idx = each_count * k;
size_t end_idx = each_count * (k+1);
if (start_idx >= offset_size)
break;
if (end_idx >= offset_size)
end_idx = offset_size;
ParsePara* cand_para_ptr = new ParsePara();
if (!cand_para_ptr)
{
_ERROR_EXIT(0,"[Malloc memory fail.]");
}
cand_para_ptr->str_buffer = m_valdata;//heap memory for storing Bytes of message
cand_para_ptr->buffer_offset = offset_array;//begin address of each message
cand_para_ptr->start_idx = start_idx;
cand_para_ptr->end_idx = end_idx;
cand_para_ptr->ticket_info_buffer_array = ticket_array;//array to store message
cand_para_ptr->binary_string_length_array = binary_length_array;//Bytes count of each message
para_vec.push_back(cand_para_ptr);
}
for(size_t k = 0 ;k < para_vec.size();k++)
{
int ret = pthread_create(&pthread_id_vec[k],NULL,parserFlightTicketForMultiThread,para_vec[k]);
if (0 != ret)
{
_ERROR_EXIT(0,"[Error] [create thread fail]");
}
}
for (size_t k = 0;k < para_vec.size();k++)
{
pthread_join(pthread_id_vec[k],NULL);
}
In each thread the thread function is:
void* parserFlightTicketForMultiThread(void* void_para_ptr)
{
ParsePara* para_ptr = (ParsePara*) void_para_ptr;
parserFlightTicketForMany(para_ptr->str_buffer,para_ptr->ticket_info_buffer_array,para_ptr->buffer_offset,
para_ptr->start_idx,para_ptr->end_idx,para_ptr->binary_string_length_array);
}
void parserFlightTicketForMany(const char* str_buffer,Flight_Ticket_Info* ticket_info_buffer_array,
size_t* buffer_offset,const size_t start_idx,const size_t end_idx,size_t* binary_string_length_array)
{
printf("start_idx:%d,end_idx:%d\n",start_idx,end_idx);
for (size_t k = start_idx;k < end_idx;k++)
{
if (k % 100000 == 0)
cout << k << endl;
size_t cand_offset = buffer_offset[k];
size_t binary_length = binary_string_length_array[k];
ticket_info_buffer_array[k].ParseFromString(string(&str_buffer[cand_offset],binary_length-1));
}
printf("done %ld %ld\n",start_idx,end_idx);
}
But multi-thread cost is more than one thread.
one thread cost is:40455623ms
My computer is 8 core and six thread cost is:131586865ms
Anyone can help me? thank you!
Some possible problems -- you'll have to experiment to determine which:
Protobuf parsing speed is often limited by memory bandwidth rather than CPU time, especially with a large input data set. In that case, more threads won't help, since all the cores are sharing bandwidth to main memory. Indeed, having multiple cores fighting over memory bandwidth could make the overall operation slower. Note that the biggest consumer of memory is not the input bytes but rather the parsed data objects -- that is, the output of parsing -- which are many times larger than the encoded data. To improve this problem, consider writing the parsing loop so that it fully-processes each message immediately after parsing, before moving on to the text message. That way, instead of allocating k protobuf objects, you only need to allocate one protobuf object per thread, and repeatedly reuse the same object for parsing. This way the object will (probably) stay in the core's private L1 cache and avoid consuming memory bandwidth; only the input bytes will be read over the main bus.
How are you loading data into RAM? Did you read() into a large array or did you mmap()? In the latter case the data is read from disk lazily -- it won't happen until you actually attempt to parse it. Even in the read() case, it could be that the data has been swapped out, creating similar effects. Either way, your threads are now not just fighting for memory bandwidth, but disk bandwidth, which is of course much slower. Having six threads reading separate parts of a big file will definitely be slower overall than having one thread read the whole file, because the operating system optimizes for sequential access.
Protobuf allocates memory during parsing. Many memory allocators take a lock while allocating new memory. Since all your threads are allocating tons and tons of objects in a tight loop, they will contend for this lock. Make sure you are using a thread-friendly memory allocator, such as Google's tcmalloc. Note that repeatedly reusing the same protobuf object in a parse-consume loop rather than allocating lots of different objects will also help immensely here, because the protobuf object will automatically reuse memory for sub-objects.
There may be a bug in your code and it might not be doing what you expect at all when multithreaded. For example, a bug might be causing all the threads to process the same data, rather than different data, and it could be that the data they're choosing happens to be bigger. Make sure you are testing that the results of your code are exactly the same when you run single-threaded vs. multi-threaded.
In short, if you want multiple cores to make your code faster, you have to think about not just what each core is doing, but what data is going in and out of each core, and how much the cores have to talk to each other. Ideally you want each core to operate all on its own without talking to anyone or anything; then you get maximum parallelism. That's not usually possible, of course, but the closer you can get to that, the better.
BTW, a random optimization for you:
ParseFromString(string(&str_buffer[cand_offset],binary_length-1))
Replace that with:
ParseFromArray(&str_buffer[cand_offset],binary_length-1)
Creating at std::string makes a copy of the data, which wastes time (and memory bandwidth). (This doesn't explain why threading is slow, though.)

why does a a nodejs array shift/push loop run 1000x slower above array length 87369?

Why is the speed of nodejs array shift/push operations not linear in the size of the array? There is a dramatic knee at 87370 that completely crushes the system.
Try this, first with 87369 elements in q, then with 87370. (Or, on a 64-bit system, try 85983 and 85984.) For me, the former runs in .05 seconds; the latter, in 80 seconds -- 1600 times slower. (observed on 32-bit debian linux with node v0.10.29)
q = [];
// preload the queue with some data
for (i=0; i<87369; i++) q.push({});
// fetch oldest waiting item and push new item
for (i=0; i<100000; i++) {
q.shift();
q.push({});
if (i%10000 === 0) process.stdout.write(".");
}
64-bit debian linux v0.10.29 crawls starting at 85984 and runs in .06 / 56 seconds. Node v0.11.13 has similar breakpoints, but at different array sizes.
Shift is a very slow operation for arrays as you need to move all the elements but V8 is able to use a trick to perform it fast when the array contents fit in a page (1mb).
Empty arrays start with 4 slots and as you keep pushing, it will resize the array using formula 1.5 * (old length + 1) + 16.
var j = 4;
while (j < 87369) {
j = (j + 1) + Math.floor(j / 2) + 16
console.log(j);
}
Prints:
23
51
93
156
251
393
606
926
1406
2126
3206
4826
7256
10901
16368
24569
36870
55322
83000
124517
So your array size ends up actually being 124517 items which makes it too large.
You can actually preallocate your array just to the right size and it should be able to fast shift again:
var q = new Array(87369); // Fits in a page so fast shift is possible
// preload the queue with some data
for (i=0; i<87369; i++) q[i] = {};
If you need larger than that, use the right data structure
I started digging into the v8 sources, but I still don't understand it.
I instrumented deps/v8/src/builtins.cc:MoveElemens (called from Builtin_ArrayShift, which implements the shift with a memmove), and it clearly shows the slowdown: only 1000 shifts per second because each one takes 1ms:
AR: at 1417982255.050970: MoveElements sec = 0.000809
AR: at 1417982255.052314: MoveElements sec = 0.001341
AR: at 1417982255.053542: MoveElements sec = 0.001224
AR: at 1417982255.054360: MoveElements sec = 0.000815
AR: at 1417982255.055684: MoveElements sec = 0.001321
AR: at 1417982255.056501: MoveElements sec = 0.000814
of which the memmove is 0.000040 seconds, the bulk is the heap->RecordWrites (deps/v8/src/heap-inl.h):
void Heap::RecordWrites(Address address, int start, int len) {
if (!InNewSpace(address)) {
for (int i = 0; i < len; i++) {
store_buffer_.Mark(address + start + i * kPointerSize);
}
}
}
which is (store-buffer-inl.h)
void StoreBuffer::Mark(Address addr) {
ASSERT(!heap_->cell_space()->Contains(addr));
ASSERT(!heap_->code_space()->Contains(addr));
Address* top = reinterpret_cast<Address*>(heap_->store_buffer_top());
*top++ = addr;
heap_->public_set_store_buffer_top(top);
if ((reinterpret_cast<uintptr_t>(top) & kStoreBufferOverflowBit) != 0) {
ASSERT(top == limit_);
Compact();
} else {
ASSERT(top < limit_);
}
}
when the code is running slow, there are runs of shift/push ops followed by runs of 5-6 calls to Compact() for every MoveElements. When it's running fast, MoveElements isn't called until a handful of times at the end, and just a single compaction when it finishes.
I'm guessing memory compaction might be thrashing, but it's not falling in place for me yet.
Edit: forget that last edit about output buffering artifacts, I was filtering duplicates.
this bug had been reported to google, who closed it without studying the issue.
https://code.google.com/p/v8/issues/detail?id=3059
When shifting out and calling tasks (functions) from a queue (array)
the GC(?) is stalling for an inordinate length of time.
114467 shifts is OK
114468 shifts is problematic, symptoms occur
the response:
he GC has nothing to do with this, and nothing is stalling either.
Array.shift() is an expensive operation, as it requires all array
elements to be moved. For most areas of the heap, V8 has implemented a
special trick to hide this cost: it simply bumps the pointer to the
beginning of the object by one, effectively cutting off the first
element. However, when an array is so large that it must be placed in
"large object space", this trick cannot be applied as object starts
must be aligned, so on every .shift() operation all elements must
actually be moved in memory.
I'm not sure there's a whole lot we can do about this. If you want a
"Queue" object in JavaScript with guaranteed O(1) complexity for
.enqueue() and .dequeue() operations, you may want to implement your
own.
Edit: I just caught the subtle "all elements must be moved" part -- is RecordWrites not GC but an actual element copy then? The memmove of the array contents is 0.04 milliseconds. The RecordWrites loop is 96% of the 1.1 ms runtime.
Edit: if "aligned" means the first object must be at first address, that's what memmove does. What is RecordWrites?

How to append binary data to a buffer in node.js

I have a buffer with some binary data:
var b = new Buffer ([0x00, 0x01, 0x02]);
and I want to append 0x03.
How can I append more binary data? I'm searching in the documentation but for appending data it must be a string, if not, an error occurs (TypeError: Argument must be a string):
var b = new Buffer (256);
b.write ("hola");
console.log (b.toString ("utf8", 0, 4)); //hola
b.write (", adios", 4);
console.log (b.toString ("utf8", 0, 11)); //hola, adios
Then, the only solution I can see here is to create a new buffer for every appended binary data and copy it to the major buffer with the correct offset:
var b = new Buffer (4); //4 for having a nice printed buffer, but the size will be 16KB
new Buffer ([0x00, 0x01, 0x02]).copy (b);
console.log (b); //<Buffer 00 01 02 00>
new Buffer ([0x03]).copy (b, 3);
console.log (b); //<Buffer 00 01 02 03>
But this seems a bit inefficient because I have to instantiate a new buffer for every append.
Do you know a better way for appending binary data?
EDIT
I've written a BufferedWriter that writes bytes to a file using internal buffers. Same as BufferedReader but for writing.
A quick example:
//The BufferedWriter truncates the file because append == false
new BufferedWriter ("file")
.on ("error", function (error){
console.log (error);
})
//From the beginning of the file:
.write ([0x00, 0x01, 0x02], 0, 3) //Writes 0x00, 0x01, 0x02
.write (new Buffer ([0x03, 0x04]), 1, 1) //Writes 0x04
.write (0x05) //Writes 0x05
.close (); //Closes the writer. A flush is implicitly done.
//The BufferedWriter appends content to the end of the file because append == true
new BufferedWriter ("file", true)
.on ("error", function (error){
console.log (error);
})
//From the end of the file:
.write (0xFF) //Writes 0xFF
.close (); //Closes the writer. A flush is implicitly done.
//The file contains: 0x00, 0x01, 0x02, 0x04, 0x05, 0xFF
LAST UPDATE
Use concat.
Updated Answer for Node.js ~>0.8
Node is able to concatenate buffers on its own now.
var newBuffer = Buffer.concat([buffer1, buffer2]);
Old Answer for Node.js ~0.6
I use a module to add a .concat function, among others:
https://github.com/coolaj86/node-bufferjs
I know it isn't a "pure" solution, but it works very well for my purposes.
Buffers are always of fixed size, there is no built in way to resize them dynamically, so your approach of copying it to a larger Buffer is the only way.
However, to be more efficient, you could make the Buffer larger than the original contents, so it contains some "free" space where you can add data without reallocating the Buffer. That way you don't need to create a new Buffer and copy the contents on each append operation.
This is to help anyone who comes here looking for a solution that wants a pure approach. I would recommend understanding this problem because it can happen in lots of different places not just with a JS Buffer object. By understanding why the problem exists and how to solve it you will improve your ability to solve other problems in the future since this one is so fundamental.
For those of us that have to deal with these problems in other languages it is quite natural to devise a solution, but there are people who may not realize how to abstract away the complexities and implement a generally efficient dynamic buffer. The code below may have potential to be optimized further.
I have left the read method unimplemented to keep the example small in size.
The realloc function in C (or any language dealing with intrinsic allocations) does not guarantee that the allocation will be expanded in size with out moving the existing data - although sometimes it is possible. Therefore most applications when needing to store a unknown amount of data will use a method like below and not constantly reallocate, unless the reallocation is very infrequent. This is essentially how most file systems handle writing data to a file. The file system simply allocates another node and keeps all the nodes linked together, and when you read from it the complexity is abstracted away so that the file/buffer appears to be a single contiguous buffer.
For those of you who wish to understand the difficulty in just simply providing a high performance dynamic buffer you only need to view the code below, and also do some research on memory heap algorithms and how the memory heap works for programs.
Most languages will provide a fixed size buffer for performance reasons, and then provide another version that is dynamic in size. Some language systems opt for a third-party system where they keep the core functionality minimal (core distribution) and encourage developers to create libraries to solve additional or higher level problems. This is why you may question why a language does not provide some functionality. This small core functionality allows costs to be reduced in maintaining and enhancing the language, however you end up having to write your own implementations or depending on a third-party.
var Buffer_A1 = function (chunk_size) {
this.buffer_list = [];
this.total_size = 0;
this.cur_size = 0;
this.cur_buffer = [];
this.chunk_size = chunk_size || 4096;
this.buffer_list.push(new Buffer(this.chunk_size));
};
Buffer_A1.prototype.writeByteArrayLimited = function (data, offset, length) {
var can_write = length > (this.chunk_size - this.cur_size) ? (this.chunk_size - this.cur_size) : length;
var lastbuf = this.buffer_list.length - 1;
for (var x = 0; x < can_write; ++x) {
this.buffer_list[lastbuf][this.cur_size + x] = data[x + offset];
}
this.cur_size += can_write;
this.total_size += can_write;
if (this.cur_size == this.chunk_size) {
this.buffer_list.push(new Buffer(this.chunk_size));
this.cur_size = 0;
}
return can_write;
};
/*
The `data` parameter can be anything that is array like. It just must
support indexing and a length and produce an acceptable value to be
used with Buffer.
*/
Buffer_A1.prototype.writeByteArray = function (data, offset, length) {
offset = offset == undefined ? 0 : offset;
length = length == undefined ? data.length : length;
var rem = length;
while (rem > 0) {
rem -= this.writeByteArrayLimited(data, length - rem, rem);
}
};
Buffer_A1.prototype.readByteArray = function (data, offset, length) {
/*
If you really wanted to implement some read functionality
then you would have to deal with unaligned reads which could
span two buffers.
*/
};
Buffer_A1.prototype.getSingleBuffer = function () {
var obuf = new Buffer(this.total_size);
var cur_off = 0;
var x;
for (x = 0; x < this.buffer_list.length - 1; ++x) {
this.buffer_list[x].copy(obuf, cur_off);
cur_off += this.buffer_list[x].length;
}
this.buffer_list[x].copy(obuf, cur_off, 0, this.cur_size);
return obuf;
};
insert byte to specific place.
insertToArray(arr,index,item) {
return Buffer.concat([arr.slice(0,index),Buffer.from(item,"utf-8"),arr.slice(index)]);
}

Resources