Node - sequential download and zip extraction - node.js

I am creating an Express server which would allow the client to receive downloaded files from different sources.
One of the file that is downloaded is a zip file, which needs to be extracted before it is send to the client. The API which is exposed to the node client has a Promise.all() to ensure that files are not send back until everything is downloaded and extracted.
// Client would call this as /download API. Once the download is completed (along with zip extraction), these files would be send to client
function download() {
return Promise.all([downloadInvoice(), downloadImages()]);
})
}).then(function (result) {
// this is happening even before the zip is extracted
console.log('content downloaded and zip extracted, send everything to client');
})
})
}
The downloadImages function calls a REST service (POST) to get images, which are then written to filesystem using pipe. Once the zip is saved in the file system, I am using extract module to extract the zip to a folder.
var req = require('request');
var extract = require('extract-zip')
function downloadImages() {
return new Promise(function(resolve, reject) {
var imageUrl = 'http://localhost:8080/restserver/downloadImages';
var postData = {
username: 'abc',
password: 'xyz'
}
var options = {
method: 'post',
body: postData,
json: true,
url: imageUrl
}
req(options, function (err, res, body) {
if (err) {
console.log('Error posting json '+err)
return;
}
console.log('Status code '+res.statusCode);
}).pipe(fs.createWriteStream('C:/temp/Images.zip'))
.on('finish',function() { // 'finish' fired as download completed. Now extract
console.log('Finished downloading Images, now extract them');
extract('C:/temp/Images.zip',
{dir: 'C:/temp/Images'},
function (err) {
if(err) {
console.log('Error extracting Images zip '+err);
}
resolve("Promised resolved for Images");
})
}).on('error', function(error) {
reject('Error extracting images', error);
});
}
}
Challenge that I am facing is that, even before zip extracts, Promise.all resolves. So, an empty folder is send to the client.
I would like things to happen in this order -
1. Promise.all in download() is used to instantiate the download
2. DownloadImages() downloads the zip of images and saves on the node server (using pipe)
3. DownloadImages() extracts the zip
4. Promise.all in download() is completed and extracted images are send to the client.
Step 3 is happening after step 4. I am not able to make out what am I doing incorrectly to chain these operations in the right sequence.

Related

How do I create a file in express and node on my server and then download it to my client. I am using NextJS for my frontend and backend

How do I create a file in express and node on my server and then download it to my client. I am using NextJS for my frontend and backend. I am confused on how I would download the file on the front end after the file is created on the root of the server folder. Since I am using React for my frontend whenever I try to visit that filepath it tries to take me to a page instead of the file
Here is what I have in my express route in node
var xls = json2xls(json, {
fields
});
// If there isn't a folder called /temp in the
// root folder it creates one
if (!fs.existsSync('./temp')) {
fs.mkdirSync('./temp');
}
const fileName = `temp/${req.user.first_name}${req.body._id + Date.now()}.xlsx`
// fs.writeFileSync(fileName, xls, 'binary');
fs.writeFile(fileName, xls, 'binary', function (err, result) {
if (err) {
return console.log(err);
}
console.log(result, 'this is result')
});
Here is what I have on my frontend
axios.post('api/download',payload)
.then(res => {
const link = document.createElement('a');
link.href = res.data.url;
link.download
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
})
.catch(err => {
throw err
})
Can you make request with GET on api, and.
Make request with GET.
Make temp directory to be static resources directory:
app.use(express.static('temp')); // app is your express instance.
// Maybe you have to correct temp's path
Response the post request with file url data
fs.writeFile(fileName, xls, 'binary', function (err, result) {
if (err) {
return console.log(err);
res.status(500).json({err});
}
console.log(result, 'this is result');
res.json({url: 'http://localhost:8080/temp/' + fileName}); // res is response object of you router handler.
// Maybe you have correct the server address
});
On other way, you can send the xls binary direct to client, in the client you create a BLOB object from the response, then create download link for the blob object.

Node.js - AWS - Program Terminates Before Upload to S3 Bucket Completes

I've written a program that creates HTML files. I then attempt to upload the files to my S3 bucket at the end of the program. It seems that the problem is that my program terminates before allowing the function to complete or receiving a callback from the function.
Here is the gist of my code:
let aws = require('aws-sdk');
aws.config.update({
//Censored keys for security
accessKeyId: '*****',
secretAccessKey: '*****',
region: 'us-west-2'
});
let s3 = new aws.S3({
apiVersion: "2006-03-01",
});
function upload(folder, platform, browser, title, data){
s3.upload({
Bucket: 'html',
Key: folder + platform + '/' + browser + '/' + title + '.html',
Body: data
}, function (err, data) {
if (err) {
console.log("Error: ", err);
}
if (data) {
console.log("Success: ", data.Location);
}
});
}
/*
*
* Here is where the program generates HTML files
*
*/
upload(folder, platform, browser, title, data);
If I call the upload() function (configured with test/dummy data) before the HTML generation section of my code, the upload succeeds. The test file successfully uploads to S3. However, when the function is called at the end of my code, I do not receive an error or success response. Rather, the program simply terminates and the file isn't uploaded to S3.
Is there a way to wait for the callback from my upload() function before continuing the program? How can I prevent the program from terminating before uploading my files to S3? Thank you!
Edit: After implementing Deiv's answer, I found that the program is still not uploading my files. I still am not receiving a success or error message of any kind. In fact, it seems like the program just skips over the upload() function. To test this, I added a console.log("test") after calling upload() to see if it would execute. Sure enough, the log prints successfully.
Here's some more information about the project: I'm utilizing WebdriverIO v4 to create HTML reports of various tests passing/failing. I gather the results of the tests via multiple event listeners (ex. this.on('test:start'), this.on('suite:end'), etc.). The final event is this.on('end'), which is called when all of the tests have completed execution. It is here were the test results are sorted based on which Operating System it was run on, Browser, etc.
I'm now noticing that my program won't to do anything S3 related in the this.on('end') event handler even if I put it at the very beginning of the handler, though I'm still convinced it's because it isn't given enough time to execute because the handler is able to process the results and create HTML files very quickly. I have this bit of code that lists all buckets in my S3:
s3.listBuckets(function (err, data) {
if (err) {
console.log("Error: ", err);
} else {
console.log("Success: ", data.Buckets);
}
});
Even this doesn't return a result of any kind when run at the beginning of this.on('end'). Does anyone have any ideas? I'm really stumped here.
Edit: Here is my new code which implement's Naveen's suggestion:
this.on('end', async (end) => {
/*
* Program sorts results and creates variable 'data', the contents of the HTML file.
*/
await s3.upload({
Bucket: 'html',
Key: key,
Body: data
}, function (err, data) {
if (err) {
console.log("Error: ", err);
}
if (data) {
console.log("Success: ", data.Location);
}
}).on('httpUploadProgress', event => {
console.log(`Uploaded ${event.loaded} out of ${event.total}`);
});
}
The logic seems sound, but still I get no success or error message, and I do not see the upload progress. The HTML file does not get uploaded to S3.
You can use promises to wait for your upload function to finish. Here's what it will look like:
function upload(folder, platform, browser, title, data) {
return new Promise((resolve, reject) => {
s3.upload({
Bucket: 'html',
Key: folder + platform + '/' + browser + '/' + title + '.html',
Body: data
}, function(err, data) {
if (err) {
console.log("Error: ", err);
return reject(err);
}
if (data) {
console.log("Success: ", data.Location);
return resolve(); //potentially return resolve(data) if you need the data
}
});
});
}
/*
*
* Here is where the program generates HTML files
*
*/
upload(folder, platform, browser, title, data)
.then(data => { //if you don't care for the data returned, you can also do .then(() => {
//handle success, do whatever else you want, such as calling callback to end the function
})
.catch(error => {
//handle error
}

Sending Zip file from server to client browser with Express and Archiver

I am a beginner with Node and I am trying to figure out how to create a zip file at the server then send it to the client and then download the zip file to the user's browser. I am using the Express framework and I am using Archiver to actually do the zipping. My server code is the following which was taken from Dynamically create and stream zip to client
router.get('/image-dl', function (req,res){
res.writeHead(200, {
'Content-Type': 'application/zip',
'Content-disposition': 'attachment; filename=myFile.zip'
});
var zip = archiver('zip');
// Send the file to the page output.
zip.pipe(res);
// Create zip with some files. Two dynamic, one static. Put #2 in a sub folder.
zip.append('Some text to go in file 1.', { name: '1.txt' })
.append('Some text to go in file 2. I go in a folder!', { name: 'somefolder/2.txt' })
.finalize();
});
So its zipping two text files and returning the result. On the client side I am using the following function in a service to actually call that endpoint
downloadZip(){
const headers = new Headers({'Content-Type': 'application/json'});
const token = localStorage.getItem('token')
? '?token=' + localStorage.getItem('token')
: '';
return this.http.get(this.endPoint + '/job/image-dl' + token, {headers: headers})
.map((response: Response) => {
const result = response;
return result;
})
.catch((error: Response) => {
this.errorService.handleError(error.json());
return Observable.throw(error.json());
});
}
and then I have another function which calls downloadZip() and actually downloads the zip file to the user's local browser.
testfunc(){
this.jobService.downloadZip().subscribe(
(blah:any)=>{
var blob = new Blob([blah], {type: "application/zip"});
FileSaver.saveAs(blob, "helloworld.zip");
}
);
}
When testfunc() is called, a zip file is downloaded to the user's browser however when I try to unzip it it creates a zip.cpgz file which then turns back into a zip file when clicked in an infinite loop which indicates that some kind of corruption happened. Can anyone see where I went wrong here?

Upload file to servlet from node without saving it

On my node express server, I am receiving a pdf file. I am using the below code to get the pdf contents from the request
var data = new Buffer('');
request.on('data', function (chunk) {
data = Buffer.concat([data, chunk]);
});
request.on('end', function() {
console.log('PDF data is '+JSON.stringify(data));
});
Now that PDF content is available on node, I need to send it as it is to a J2EE server. In order to do that, I am first saving the PDF file in the node server, reading it from the node server and then piping it to request.post (https://github.com/request/request)
var req = require('request');
fs.writeFile('abc.pdf', data, 'binary', function(err) {
if (err) {
console.log('Error ' + JSON.stringify(err) );
throw err;
}
var source = fs.createReadStream('abc.pdf');
//send our data via POST request
source.pipe(req.post('http://'+j2ee_host+':'+j2ee_port+'/myjavaapp/Upload')
});
This works fine. However, I feel the part of saving the PDF file on the node server and then reading it is (before posting to the J2EE server using request module) is completely unnecessary, as I am not making any changes to the file.
Once I have the PDF contents in 'data' variable, I would like to directly post them to the J2EE server. However, I have not been able to find a way to use the request module to directly post file contents. I have seen some examples related to POST using request module but they refer to formData. In my case, I don't have formData but instead reading the file from request and directly posting it to the J2EE server.
Is there a way to achieve this and avoid the file write and read?
EDIT
Below is my complete code
function upload(request, response) {
var data = new Buffer('');
request.on('data', function (chunk) {
data = Buffer.concat([data, chunk]);
});
request.on('end', function () {
fs.writeFile('abc.pdf', data, 'binary', function(err){
if (err) {
console.log('Error ' + JSON.stringify(err) );
throw err;
}
var source = fs.createReadStream('abc.pdf');
source.pipe(req.post('http://'+j2ee_host+':'+j2ee_port+'/myj2eeapp/Upload'));
})
})
}
You can pipe directly from the data request to the servlet
var req = require('request');
function upload(request, response) {
var target = req.post('http://'+j2ee_host+':'+j2ee_port+'/myjavaapp/Upload');
request.pipe(target);
target.on('finish', function () {
console.log('All done!');
//send the response or make a completed callback here...
});
}

How to await fields before processing the file stream in multipart form

I'm using SendGrid for receiving files via email. SendGrid parses the incoming emails and sends the files in a multipart form to an endpoint I have set up.
I don't want the files on my local disk so I stream them straight to Amazon S3. This works perfect.
But before I can stream to S3 I need to get hold of the destination mail address so I can work out the correct s3 folder. This is sent in a field named "to" in the form post. Unfortunately this field sometimes arrives after the files are arriving, hence I need a way to await the to-field before I'm ready to take the stream.
I thought I could wrap the onField in a promise and await the to-field from within the onFile. But this concept seems to lock it self up when the field arrives after the file.
I'm new to booth streams and promises. I would really appreciate if someone could tell me how to do this.
This is the non working pseudoish code:
function sendGridUpload(req, res, next) {
var busboy = new Busboy({ headers: req.headers });
var awaitEmailAddress = new Promise(function(resolve, reject) {
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
if(fieldname === 'to') {
resolve(val);
} else {
return;
}
});
});
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
function findInbox(emailAddress) {
console.log('Got email address: ' + emailAddress);
..find the inbox and generate an s3Key
return s3Key;
}
function saveFileStream(s3Key) {
..pipe the file directly to S3
}
awaitEmailAddress.then(findInbox)
.then(saveFileStream)
.catch(function(err) {
log.error(err)
});
});
req.pipe(busboy);
}
I finally got this working. The solution is not very pretty, and I have actually switched to another concept (described at the end of the post).
To buffer the incoming data until the "to"-field arrives I used stream-buffers by #samcday. When I get hold of the to-field I release the readable stream to the pipes lined up for the data.
Here is the code (some parts omitted, but essential parts are there).
var streamBuffers = require('stream-buffers');
function postInboundMail(req, res, next) {
var busboy = new Busboy({ headers: req.headers});
//Sometimes the fields arrives after the files are streamed.
//We need the "to"-field before we are ready for the files
//Therefore the onField is wrapped in a promise which gets
//resolved when the to field arrives
var awaitEmailAddress = new Promise(function(resolve, reject) {
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated) {
var emailAddress;
if(fieldname === 'to') {
try {
emailAddress = emailRegexp.exec(val)[1]
resolve(emailAddress)
} catch(err) {
return reject(err);
}
} else {
return;
}
});
});
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
var inbox;
//I'm using readableStreamBuffer to accumulate the data before
//I get the email field so I can send the stream through to S3
var readBuf = new streamBuffers.ReadableStreamBuffer();
//I have to pause readBuf immediately. Otherwise stream-buffers starts
//sending as soon as I put data in in with put().
readBuf.pause();
function getInbox(emailAddress) {
return model.inbox.findOne({email: emailAddress})
.then(function(result) {
if(!result) return Promise.reject(new Error(`Inbox not found for ${emailAddress}`))
inbox = result;
return Promise.resolve();
});
}
function saveFileStream() {
console.log('=========== starting stream to S3 ========= ' + filename)
//Have to resume readBuf since we paused it before
readBuf.resume();
//file.save will approximately do the following:
// readBuf.pipe(gzip).pipe(encrypt).pipe(S3)
return model.file.save({
inbox: inbox,
fileStream: readBuf
});
}
awaitEmailAddress.then(getInbox)
.then(saveFileStream)
.catch(function(err) {
log.error(err)
});
file.on('data', function(data) {
//Fill readBuf with data as it arrives
readBuf.put(data);
});
file.on('end', function() {
//This was the only way I found to get the S3 streaming finished.
//Destroysoon will let the pipes finish the reading bot no more writes are allowed
readBuf.destroySoon()
});
});
busboy.on('finish', function() {
res.writeHead(202, { Connection: 'close', Location: '/' });
res.end();
});
req.pipe(busboy);
}
I would really much like feedback on this solution, even though I'm not using it. I have a feeling that this can be done much more simple and elegant.
New solution:
Instead of waiting for the to-field I send the stream directly to S3. I figured, the more stuff I put in between the incoming stream and the S3 saving, the higher the risk of loosing the incoming file due to a bug in my code. (SendGrid will eventually resend the file if I'm not responding with 200, but it will take some time.)
This is how I do it:
Save a placeholder for the file in the database
Pipe the stream to S3
Update the placeholder with more information as it arrives
This solution also gives me the opportunity to easily get hold of unsuccessful uploads since the placeholders for unsuccessful uploads will be incomplete.
//Michael

Resources