I've code similar to the following stripped down example
const req = request('http://www.my.url.here.com/file.bin') // 80 MB file
const decripher = .... // decipher from nodejs's crypto module
const output = fs.createWriteStream('result.zip');
const archive = archiver('zip', {zlib: { level: 9 } });
archive.pipe(output);
const stream = req
.pipe(decipher)
.on('error', (error) => {
console.error(`Error deciphering file`)
req.abort() // Does nothing
decipher.unpipe() // Unpiping to prevent the next step producing a [ERR_STREAM_WRITE_AFTER_END] error
stream.end() // End the stream as an error does not end it automatically
})
archive.append(stream, { name: 'file.bin' });
Once an error occurs deciphering the file I don't want to download any more data. But I've noticed that in these scenarios a req.abort() does nothing.
In the end I have a file partially decrypted in the archive but it's still ~80 MBs. i.e. The entire file has been downloaded despite the error (which I setup to fire near the start of the file).
Why would this occur? How can I prevent the entire file from downloading?
You can destroy the underlying socket. You can get the socket in socket or response event.
const req = request(options);
req.on('response', function(response) {
....
res.socket.end(); // or res.socket.destroy();
....
});
req.pipe(...);
Maybe in your case, modify a bit, this is basically a theory but you can do:
const req = request(options);
let sock = null;
req.on('socket', function(socket) {
sock = socket;
}).on('error', ()=>{
sock.destroy()//or end();
});
req.pipe(...);
Related
Im' working with a pelias project and using package wof-admin-lookup to handle data that is read from a file.
There is a case, there is no valid data for being pushed to stream. The wof-admin-lookup will never end.
Here is my code:
const stream = recordStream.create(filePath)
.pipe(blacklistStream())
.pipe(adminLookup.create())
.pipe(model.createDocumentMapperStream())
.pipe(peliasDbclient())
stream
.on('data', data => {
count++
})
.on('finish', () => {
console.log(`Imported ${count} addresses`)
resolve()
})
.on('error', (e) => {
reject(e)
})
Here is the code in wof-admin-lookup:
module.exports = function(pipResolver, config) {
if (!pipResolver) {
throw new Error('valid pipResolver required to be passed in as the first parameter');
}
// pelias 'imports.adminLookup' config section
config = config || {};
const pipResolverStream = createPipResolverStream(pipResolver, config);
const end = createPipResolverEnd(pipResolver);
const stream = parallelTransform(config.maxConcurrentReqs || 1, pipResolverStream);
stream.on('end', end);
return stream;
};
Although the console logged "Imported 0 addresses" but the pipResolverStream will stay forever if I do not shut down it manually by Ctrl+C.
Update, this case only happens if there is no data passed through stream.
"the end event will never trigger without something like < /dev/null to generate that EOF. Otherwise the program waits for the terminal to send a ^D."
node.js: How to detect an empty stdin stream?
I need to trigger, through an http request, a process where I download some data from S3, gunzip it, modify the stream, gzip it and send to another bucket in S3.
So far I was able to either:
Download
Gunzip
Modify (filter) the data
return the data
Or:
Download
Gunzip
Gzip
Upload the unmodified data and retrieve the url of the object
My first attempt at this consisted in using the on('data') event from the gunzip stream to modify the data; then when the 'end' event is thrown, I can return it to the browser making the request.
var accumulator = [];
gunzip.on('data', chunk=>{
var lines = chunk.toString('utf-8').split(\n);
lines.forEach(line=>{
if(shouldBeFiltered(line)){
accumulator.push(line);
}
})
})
gunzip.on('end', ()=>{
res.send(accumulator);
})
getS3.pipe(gunzip)
If instead of returning the result (res.send) I try to pipe gunzip to gzip, the filter is ignored. It makes sense as I have an accumulator array that I return (in the previous case) when the end event is thrown.
Then after some digging, I found a reference suggesting that the data should be pushed to, and I tried the following, which did not work:
gunzip.on('data', chunk=>{
var lines = chunk.toString('utf-8').split(\n);
lines.forEach(line=>{
if(shouldBeFiltered(line)){
gunzip.push(line);
}
})
})
// the end event no longer mattered
// gunzip.on('end', ()=>{
// res.send(accumulator);
// })
getS3.pipe(gunzip).pipe(gzip).pipe(putS3(putS3param.Key, putS3param.Bucket));
Then I tried to create a transform stream (this is extremely simplified as I was trying the concept), but then I had an internal error:
const stream = require('stream');
const Transform = stream.Transform;
function filter(pipeline) {
var the_filter = new Transform({
transform(chunk, encoding, next) {
console.log();
chunk += Buffer('Modified', 'utf-8');
this.push(chunk);
next();
}
});
pipeline.pipe(the_filter);
}
Other than creating a file and gziping it and uploading I have no more ideas.
Thanks for any help!
After much digging around, I finally found the answer in this page
It seems that what was missing as setting Transform as objectMode, other than that, I see nothing relevant.
var stream = require('stream')
var liner = new stream.Transform( { objectMode: true } )
liner._transform = function (chunk, encoding, done) {
var data = chunk.toString()
if (this._lastLineData) data = this._lastLineData + data
var lines = data.split('\n')
this._lastLineData = lines.splice(lines.length-1,1)[0]
lines.forEach(this.push.bind(this))
done()
}
liner._flush = function (done) {
if (this._lastLineData) this.push(this._lastLineData)
this._lastLineData = null
done()
}
module.exports = liner
I have simple question - Can I get the Buffer from fs.createWriteStream()?
In my case I use archiver NPM module which may create archives from Buffer. When this module end work with method archive.finalize() I may get result from archive.pipe(stream). I want have Buffer from fs.createWriteStream() which I may send to res.send() Express method.
let archive = archiver("zip", {
zlib: { level: 9 } // Sets the compression level.
});
const stream = fs.createWriteStream(
"./src/backend/api/test.zip"
);
archive.pipe(stream);
archive.append(buffer, { name: "test.csv" });
archive.finalize();
If I understood your problem correctly, that might help you
const stream = fs.createWriteStream(
"./src/backend/api/test.zip"
);
stream.on('data', (chunk) => {
//here u get every chunk as a buffer
})
stream.on('finish', () => {
//here your code if the stream is ending
})
I am making use of "socket.io-client" and "socket.io stream" to make a request and then stream some data. I have the following code that handles this logic
Client Server Logic
router.get('/writeData', function(req, res) {
var io = req.app.get('socketio');
var nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
var nameNodeData = {};
async.waterfall([
checkForDataNodes,
readFileFromS3
], function(err, result) {
if (err !== null) {
res.json(err);
}else{
res.json("Finished Writing to DN's");
}
});
function checkForDataNodes(cb) {
nameNodeSocket.on('nameNodeData', function(data) {
nameNodeData = data;
console.log(nameNodeData);
cb(null, nameNodeData);
});
if (nameNodeData.numDataNodes === 0) {
cb("No datanodes found");
}
}
function readFileFromS3(nameNodeData, cb) {
for (var i in nameNodeData['blockToDataNodes']) {
var IP = nameNodeData['blockToDataNodes'][i]['ipValue'];
var dataNodeSocket = io.connect('http://'+ IP +":5000");
var ss = require("socket.io-stream");
var stream = ss.createStream();
var byteStartRange = nameNodeData['blockToDataNodes'][i]['byteStart'];
var byteStopRange = nameNodeData['blockToDataNodes'][i]['byteStop'];
paramsWithRange['Range'] = "bytes=" + byteStartRange.toString() + "-" + byteStopRange.toString();
//var file = require('fs').createWriteStream('testFile' + i + '.txt');
var getFileName = nameNodeData['blockToDataNodes'][i]['key'].split('/');
var fileData = {
'mainFile': paramsWithRange['Key'].split('/')[1],
'blockName': getFileName[1]
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
s3.getObject(paramsWithRange).createReadStream().pipe(stream);
//dataNodeSocket.disconnect();
}
cb(null);
}
});
Server Logic (that gets the data)
var dataNodeIO = require('socket.io')(server);
var ss = require("socket.io-stream");
dataNodeIO.on('connection', function(socket) {
console.log("Succesfully connected!");
ss(socket).on('sendData', function(stream, data) {
var IP = data['ipValue'];
var blockName = data['blockName'];
var mainFile = data['mainFile'];
dataNode.makeDir(mainFile);
dataNode.addToReport(mainFile, blockName);
stream.pipe(fs.createWriteStream(mainFile + '/' + blockName));
});
});
How can I properly disconnect the connections in function readFileFromS3. I have noticed using dataNodeSocket.disconnect() at the end does not work as I cannot verify the data was received on the 2nd server. But if I comment it out, I can see the data being streamed to the second server.
My objective is to close the connections in Client Server side
It appears that the main problem with closing the socket is that you weren't waiting for the stream to be done writing before trying to close the socket. So, because the writing is all asynchronous and finishes sometime later, you were trying to close the socket before the data had been written.
Also because you were putting asynchronous operations inside a for loop, you were also running all your operations in parallel which may not be exactly what you want as it makes error handling more difficult and server load more difficult.
Here's the code I would suggest that does the following:
Create a function streamFileFromS3() that streams a single file and returns a promise that will notify when it's done.
Use await in a for loop with that streamFileFromS3() to serialize the operations. You don't have to serialize them, but then you would have to change your error handling to figure out what to do if one errors while the others are already running and you'd have to be more careful about concurrency issues.
Use try/catch to catch any errors from streamFileFromS3().
Add error handling on the stream.
Change all occurrences of data['propertyName'] to data.propertyName. The only time you need to use brackets is if the property name contains a character that is not allowed in a Javascript identifier or if the property name is in a variable. Otherwise, the dot notation is preferred.
Add socket.io connection error handling logic for both socket.io connections.
Set returned status to 500 when there's an error processing the request
So, here's the code for that:
const ss = require("socket.io-stream");
router.get('/writeData', function(req, res) {
const io = req.app.get('socketio');
function streamFileFromS3(ip, data) {
return new Promise((resolve, reject) => {
const dataNodeSocket = io.connect(`http://${ip}:5000`);
dataNodeSocket.on('connect_error', reject);
dataNodeSocket.on('connect_timeout', () {
reject(new Error(`timeout connecting to http://${ip}:5000`));
});
dataNodeSocket.on('connection', () => {
// dataNodeSocket connected now
const stream = ss.createStream().on('error', reject);
paramsWithRange.Range = `bytes=${data.byteStart}-${data.byteStop}`;
const filename = data.key.split('/')[1];
const fileData = {
'mainFile': paramsWithRange.Key.split('/')[1],
'blockName': filename
};
ss(dataNodeSocket).emit('sendData', stream, fileData);
// get S3 data and pipe it to the socket.io stream
s3.getObject(paramsWithRange).createReadStream().on('error', reject).pipe(stream);
stream.on('close', () => {
dataNodeSocket.disconnect();
resolve();
});
});
});
}
function connectError(msg) {
res.status(500).send(`Error connecting to ${NAMENODE_ADDRESS}`);
}
const nameNodeSocket = io.connect(NAMENODE_ADDRESS, { reconnect: true });
nameNodeSocket.on('connect_error', connectError).on('connect_timeout', connectError);
nameNodeSocket.on('nameNodeData', async (nameNodeData) => {
try {
for (let item of nameNodeData.blockToDataNodes) {
await streamFileFromS3(item.ipValue, item);
}
res.json("Finished Writing to DN's");
} catch(e) {
res.status(500).json(e);
}
});
});
Other notes:
I don't know what paramsWithRange is as it is not declared here and when you were doing everything in parallel, it was getting shared among all the connections which is asking for a concurrency issue. In my serialized implementation, it's probably safe to share it, but the way it is now bothers me as it's a concurrency issue waiting to happen.
After i emit error event in MyWritableStream, data transmission stops. What i need to do to resume data transfer?
var readable = fs.createReadStream('test.txt');
var writable = new MyWritableStream();
writable.on('error', function(error) {
console.log('error', error);
// How i can resume?
});
writable.on('finish', function(){
console.log('finished');
})
readable.pipe(writable);
I know this question is old, but you might wanna check out https://github.com/miraclx/xresilient
I built this for this exact same reason (works best with seekable streams).
You define a function that returns a readable stream, the library measures the number of bytes that have passed through until an error is met.
Once the readable stream encounters an error event, it recalls the defined function with the number of bytes read so you can index the stream source.
Example:
const fs = require('fs');
const xresilient = require('xresilient');
const readable = xresilient(({bytesRead}) => {
return generateSeekableStreamSomehow({start: bytesRead});
}, {retries: 5});
const writable = fs.createWriteStream('file.test');
readable.pipe(writable);
File streams are indexable with the start option of the fs.createReadStream() function.
HTTP Requests are indexable with the Range HTTP Header.
Check it out.
https://www.npmjs.com/package/xresilient
I am not sure, if it is a normal practice, but i can't see another solution for now & it works for me. If you can advise more accurate solution, please do it.
We can track readable stream instance using pipe event in writeable one:
function WriteableStream(options) {
Writable.call(this, options);
this.source = null;
var instance = this;
this.on('pipe', function(source){
instance.source = source;
});
}
util.inherits(WriteableStream, Writable);
So, when we emit error event, and readable stream is unpiped automatically, we can re-pipe it ourself:
WriteableStream.prototype._write = function(chunk, encoding, done) {
this.emit('error', new Error('test')); // unpipes readable
done();
};
WriteableStream.prototype.resume = function() {
this.source.pipe(this); // re-pipes readable
}
Finally, we will use it the following way:
var readable = fs.createReadStream(file);
var writeable = new WriteableStream();
writeable.on('error', function(error) {
console.log('error', error);
writeable.resume();
});
readable.pipe(writeable);