I am building an API that needs to accept file uploads. So a user can POST a file to an endpoint, the file will be sent to a virus scan, then if it's clean will be sent to storage (probably S3). So far I have achieved this with one issue: The files are temporarily saved in the applications file system. I need to design an app that doesn't store things in memory. Here is my currently working code:
app.js
const express = require('express');
const bb = require('express-busboy');
const app = express();
// Busboy modules extends the express app to handle incoming files
bb.extend(app, {
upload: true,
path: './tmp'
});
Routes.js
const express = require('express');
const router = express.Router();
const fileManagementService = require('./file-management-service')();
router
.route('/:fileId')
.post(async (req, res, next) => {
try {
const {fileId} = req.params;
const {files} = req;
const response = await fileManagementService.postFile(files, fileId);
res.status(201).json(response);
} catch (err) {
next(err);
}
})
file-management-service.js
const fs = require('fs');
function createUploader() {
// POST /:fileId
async function postFile(data, fileId) {
const {file} = data.file;
const fileStream = fs.createReadStream(file);
const scanOutput = await scanFile(fileStream); // Function scans file for viruses
const status = scanOutput.status === 'OK';
let upload = 'NOT UPLOADED';
if (status) {
upload = await postS3Object({file}); // Some function that sends the file to S3 or other storage
}
fs.unlinkSync(file);
return {
fileId,
scanned: scanOutput,
upload
};
}
return Object.freeze({
postFile
});
}
module.exports = createUploader;
As mentioned, the above works as expected, the file is sent to be scanned, then sent to an S3 bucket before returning a response to the poster to that effect. However my implementation of express-busboy is storing the file in the ./tmp folder, then I'm converting this into a readable stream using fs.createReadStream(filePath); before sending it to the AV and again in the function that sends the file to S3.
This API is being hosted in a kubernetes cluster and I need to avoid creating states. How can I achieve the above without actually saving the file? I'm guessing busboy receives this file as some sort of stream, so without sounding dense, can it not just remain a stream and be piped through these functions to achieve the same outcome?
You can use busboy at a bit lower level and get access to it's translated readstream. Here's an example from the busboy doc that can be adapted for your situation:
http.createServer(function(req, res) {
if (req.method === 'POST') {
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
var saveTo = path.join(os.tmpDir(), path.basename(fieldname));
file.pipe(fs.createWriteStream(saveTo));
});
busboy.on('finish', function() {
res.writeHead(200, { 'Connection': 'close' });
res.end("That's all folks!");
});
return req.pipe(busboy);
}
res.writeHead(404);
res.end();
}).listen(8000, function() {
console.log('Listening for requests');
});
The key part is this which I've annotated:
// create a new busboy instance on each incoming request that has files with it
var busboy = new Busboy({ headers: req.headers });
// register for the file event
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
// at this point the file argument is a readstream for the data of an uploaded file
// you can do whatever you want with this readstream such as
// feed it directly to your anti-virus
// this example code saves it to a tempfile
// you would replace this with code that sends the stream to your anti-virus
var saveTo = path.join(os.tmpDir(), path.basename(fieldname));
file.pipe(fs.createWriteStream(saveTo));
});
// this recognizes the end of the upload stream and sends
// whatever you want the final http response to be
busboy.on('finish', function() {
res.writeHead(200, { 'Connection': 'close' });
res.end("That's all folks!");
});
// this gets busboy started, feeding the incoming request to busboy
// so it can start reading it and parsing it and will eventually trigger
// one or more "file" events
return req.pipe(busboy);
When you've identified an incoming request that you want to do this custom busboy operation in, you create an instance of Busboy, pass it the headers and register for the file event. That file event gives you a new file readstream that is the converted file as a readstream. You could then pipe that stream directly to your anti-virus without ever going through the file system.
Related
I made a research and found only forms and formidable tutorials. However, I couldn't find how to get the original file name.
I am using Postman to send a file to http://localhost:8081/file. The file is sent in binary as body. The file sent is xxx.json.
In Node I created an HTTP server:
const http = require("http");
const fs = require("fs");
const server = http.createServer(async (req, res) => {
if (req.method === "POST" && req.url === "/file") {
req.on("data", (chunk) => {
console.log(`Data chunk available: ${chunk}`);
// fs.createWriteStream("./finalFolder");
});
}
res.end();
});
I want to save the file to /finalFolder preserving the original filename xxx.json.
Where do I get the name of the file uploaded?
I am facing the issue like the following:
When uploading a pdf file in vue.js to serverless node.js application, file content is broken.
Because the serverless parses binary data type incorrectly, it happens the issue.
How can I accept binary data type like pdf correctly or other method to solve the issue?
// Vue.js
let formData = new FormData();
formData.append('file', fileObj);
axios.post(API_ENDPOINT + '/upload', formData).then(resp => {
console.log(resp);
})
// Serverless Express
const express = require('express');
const app = express();
const fileUpload = require('express-fileupload');
app.use(fileUpload());
app.post('/upload', (req, res) => {
console.log(req.files.file) // Uploaded tmp file - It has broken content
});
Currently I am using multiparty. I will provide the middleware in the following snippet of code
const { Form } = require('multiparty')
function formDataParser(req, res, next) {
if (!req.is('multipart/form-data')) {
next();
return;
}
const form = new Form();
form.parse(req, (err, fields, files) => {
if (err) {
res.status(400).json({
message: 'Could not parse multipart form.'
});
return;
}
const fieldsKeys = Object.keys(fields);
for (const fieldKey of fieldsKeys) {
fields[fieldKey] = fields[fieldKey][0];
}
req.form = {
fields,
files
}
next();
});
}
module.exports = formDataParser;
I suggest not to attach this middleware as a global one but instead use it only on specific routes that need it. With the provided solution you can access form fields in the following way:
req.form.fields.somefield
I'm trying to handle this API interaction with node.js (using express and request) but i'm having a lot of trouble dealing with the data.
Here's my current code:
// Requirements
const express = require("express");
const bodyParser = require("body-parser");
const request = require("request");
const fs = require("fs");
const zlib = require("zlib");
const gunzip = require("gunzip-file");
const decompressResponse = require("decompress-response");
// Setting Up App
const app = express();
app.use(bodyParser.urlencoded({ extended: false }));
// Routes
app.get("/", (req, res) => {
res.send("App Running");
});
// API Integration
let responseXML = "";
let bodyXML =
'<?xml version="1.0" encoding="UTF-8"?><RequestMensagemCB><login>14087809000107</login><senha>xxxx</senha><mId>1</mId></RequestMensagemCB>';
const options = {
url: "http://webservice.newrastreamentoonline.com.br/",
method: "POST",
body: bodyXML
};
app.get("/onix", function(req, res) {
request(options, function(error, response, body) {
// body is the decompressed response body
console.log(
"server encoded the data as: " +
(response.headers["content-encoding"] || "identity")
);
console.log("the decoded data is: " + body);
})
.on("data", function(data) {
// decompressed data as it is received
console.log("decoded chunk: " + data);
})
.on("response", function(response) {
// unmodified http.IncomingMessage object
response.on("data", function(data) {
// compressed data as it is received
console.log("received " + data.length + " bytes of compressed data");
});
});
});
// Server Listening
app.listen(process.env.PORT || 3000, () => {
console.log("Server Online Listening to port 3000");
});
This is the console.log response i get:
Using postman i can reach the XML through the following route:
I first make the post request with the XML needed to validate the API Access, then i send and download the response giving it the extension of .gz and inside the .gz there's a compressed version of the file that when opened shows the XML response:
This is my first time working with an API that returns the data in .gz that way. I've tried piping the data using zLib and now was thinking on the following route: download the response in .gz, decompress the resulting file, then opening it to reach the XML. I imagine there's a better way of doing this!
I Could fix the code through another approach.
I've added 2 more params for the const options:
const options = {
url: "http://webservice.newrastreamentoonline.com.br/",
method: "POST",
body: bodyXML,
headers: {
"Accept-Encoding": "gzip"
},
encoding: null,
gzip: true
};
The key value is the following: encoding: null, , that way the stream comes without corruption.
Then i've been able to print out the XML response. Using xml2json NPM i've been able to convert it on a JSON object, and now i'll work normally with the data.
I'm trying to download a file from another site from my Node app after an express HTTP get request and then return the file for download. I've tried multiple ways of getting the file, using pipe, blob, etc. but I'm grasping in the dark. The code might give you a bit more of an insight as to what I'm trying to achieve:
var router = require('express').Router();
var fs = require('fs');
var http = require('http');
router.get('/download/:file', function (req, res, next) {
http.get('http://anothersite/' + req.params.file, function(response) {
res.setHeader('Content-disposition', 'attachment; filename=' + req.params.file);
res.setHeader('Content-type', 'application/octet-stream');
res.download(fs.createWriteStream(req.params.file).pipe(response));
});
});
This gives me an error "Cannot pipe. Not Readable". The file itself is not a regular file format (it's a file from our customized software with its own extension).
What am I missing?
For one you need to use readable stream here, not writable
The Express res object is derived from node's http.ServerResponse, which is itself implementing node's WritableStream interface. See docs here and here.
Since that is the case, I think you can use response argument passed to your callback directly, since that is already a ReadableStream (see here). Try using readable stream like this:
router.get('/download/:file', function (req, res, next) {
http.get('http://anothersite/' + req.params.file, function(response) {
res.setHeader('Content-disposition', 'attachment; filename=' + req.params.file);
res.setHeader('Content-type', 'application/octet-stream');
response.pipe(res); // <-- change here
});
});
This code is working, with node v5.0.0 and latest chrome:
const express = require('express');
const app = express();
const http = require('http');
app.get('/', (req, res) => {
http.get('http://www.fillmurray.com/200/300', (response) => {
res.setHeader('Content-disposition', 'attachment; filename=' + 'hello.jpg');
res.setHeader('Content-type', 'application/octet-stream');
response.pipe(res)
});
});
app.listen(3001, () => console.log(('listening :)')))
you can use request library as:
request('http://anothersite/').pipe(fs.createWriteStream('filename.extension'))
Update:
If you are willing to do it by http you can create a write stream to as follow. After saving the file successfully I think you can do res.download() correctly.
var http = require('http')
http.get('http://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png').on('response', function (response) {
var writeStream = fs.createWriteStream('output.png');
response.pipe(writeStream);
response.on('end', function () {
console.log('stream completed!')
});
// This is here incase any errors occur
writeStream.on('error', function (err) {
console.log(err);
});
});
I have a file called Sitemap.xml on Server1 and I want to write to this file from another server Server2.
File Structure of Server1
Server1:
app
views
public
sitemap.xml
app.js
Sitemap can be accessed by Server1/sitemap as I have used below code in my express file
app.use('/sitemap', express.static(__dirname + '/sitemap.xml'));
You should to protect route with secret token to avoid exposing. Hope this will help you:
// Server 1
const fs = require('fs');
app.use(function(req, res, next) {
var secret = req.headers.hasOwnProperty('authorization')
? req.headers.authorization
: false;
if (! secret || secret !== 'token [your-secret-token]') {
res.status(403).send('Access forbidden');
}
// Create write stream to sitemap file
var stream = fs.createWriteStream('sitemap.xml');
// Redirect request body to stream which writes to sitemap file
req.pipe(stream)
.on('end', () => res.send('ok'));
});
// Server 2
const http = require('http');
const fs = require('fs');
var stream = fs.createReadStream('new-sitemap.xml');
var req = http.request({
host: 'server1',
headers: {
authorization: 'token [your-secret-token]',
},
});
req.on('response', (res) => {
if (res.status === 200) {
console.log('File uploaded');
}
else {
console.error('File not loaded');
}
});
// Write data from file into request body
stream.pipe(req);
Note that token should be minimum 32 chars length to be strong enough. And don't forget to update it from time to time. And also it's a concept.