I'm having problem with sqlite3 not flush to disk. The code I'm using is below. My total filelist are over 470k and the program tends to use several gigabytes of memory. while the program is running test.db is 0 bytes and no journal is used. It only starts to write to disk when db.close() is running.
var fs = require('fs');
var sqlite3 = require('sqlite3').verbose();
var db = new sqlite3.Database('test.db');
db.serialize(function () {
db.run("BEGIN;");
db.run("CREATE TABLE if not exists Files (name TEXT);");
db.run("COMMIT;");
var files = fs.readdirSync("./files/");
console.log("File list completed: " + files.length);
for (var i = 0; i < files.length; i++) {
db.run("INSERT INTO Files VALUES (?);",files[i]);
}
});
db.close();
I have have tried to remove db.run("BEGIN;"); and db.run("COMMIT;"); but it does not help.
Bug?
I'm reporting this as a bug on github
I'm think that there is a problem with transaction and db.serialize.
db.serialize is uncontrolled code. I don't know when it's useful.
Try control flow like below
var fs = require('fs');
var sqlite3 = require('sqlite3');
var async = require('async');
var db = new sqlite3.Database('test.db');
async.series ([
function(cb) {
db.run('CREATE TABLE if not exists Files (name TEXT)', cb);
},
function(cb) {
db.run('begin transaction', cb);
},
function(cb) {
var files = fs.readdirSync("./files/");
async.each(
files,
function(file, cb) { db.run('INSERT INTO Files VALUES (?)', file, cb); },
cb
);
},
function(cb) {
db.run('commit transaction', cb);
}
],
function(err) {
if (err) {
console.log(err);
db.run('rollback transaction'); // can fail if error occurs on create table
}
db.close();
}
)
If you don't need insert all rows or nothing that you can try next code
var fs = require('fs');
var sqlite3 = require('sqlite3');
var async = require('async');
var db = new sqlite3.Database('test.db');
db.run('CREATE TABLE if not exists Files (name TEXT)', function (err) {
if (err)
return console.log(err);
var files = fs.readdirSync("./files/");
async.eachSeries(
files,
function(file, cb) { db.run('INSERT INTO Files VALUES (?)', file, cb); },
function(err) {
console.log((err) ? err : 'Done');
db.close();
}
);
});
Related
I want to save and retrieve a Word doc and PDF file with a size of 1 MB, directly in MongoDB with Node.js. How can I do this is there any article explain about it or can some one help me on this.
Here is the standalone node js code to save the file as binary data in MongoDB. As the maximum file size is 1MB, you can save it in normal collection rather than GridFs.
This can be extended to run as web apps using "express" or "hapi" frameworks. You may need to refer the respective tutorial for that.
Save the file as binary data in MongoDB:-
Note: I have the sample file in "docs" directory. So, I have prefixed it with docs (i.e. "/docs/testplan.docx"). You can remove that if you don't need it.
var MongoClient = require('mongodb').MongoClient;
var Binary = MongoClient.Binary;
var fs = require('fs');
var assert = require('assert');
var url = 'mongodb://localhost:27017/test';
var binaryFileData = fs.readFileSync(__dirname + "/docs/testplan.docx");
var insertDocument = function(db, callback) {
db.collection('file_save').insertOne( {
"fileName" : "testplan.docx",
"fileData" : binaryFileData
}, function(err, result) {
assert.equal(err, null);
console.log("Inserted a document into the collection.");
callback();
});
};
MongoClient.connect(url, function(err, db) {
assert.equal(null, err);
insertDocument(db, function() {
console.log("Closing the database connection...")
db.close();
});
});
Read the file data and save it to disk:-
var MongoClient = require('mongodb').MongoClient;
var Binary = MongoClient.Binary;
var fs = require('fs');
var assert = require('assert');
var url = 'mongodb://localhost:27017/test';
var findDocument = function (fileName, db, callback) {
db.collection('file_save').findOne({"fileName" : fileName }, function (err, document) {
assert.equal(null, err);
console.log(document.fileName);
fs.writeFileSync("testplan_out.docx", document.fileData.buffer);
console.log("File has been written to disk");
callback();
});
};
MongoClient.connect(url, function (err, db) {
assert.equal(null, err);
findDocument("testplan.docx", db, function () {
db.close();
});
});
that works perfectly alright! But I'm trying to upload the document from the POSTMAN and I'm developing my project with MEAN stack.
//document-model.js
var mongoose = require('mongoose'),
Schema = mongoose.Schema;
var documentSchema = {
docFile: {
type: String
}
};
mongoose.model('FileDocument', documentSchema);
//document-route.js
var express = require('express'),
documentRoute = express.Router(),
document = require('../controllers/document-controller');
documentRoute.post('/upload', document.uploadDocument);
module.exports = documentRoute;
//document-controller.js
var document = {
uploadDocument: function (req, res) {
var fileDocument = new FileDocument({
docFile: req.body.docFile
});
fileDocument.save(function (err, result) {
if (err) {
res.status(500).send(err);
} else {
res.status(200).send('Document Uploaded Successfully');
}
});
}
};
I was trying in this way but it is not uploading to mongodb the result is gives in the mongo shell.
{ "_id" : ObjectId("59693872b8b83f42b42a3b9f"), "docFile" : "", "__v" : 0 }
I am trying to write a file into MongoDB using mongoose and GridFS.
However, gridfs writestream is not firing any of the events - close or finish.
Also, it is not firing even the 'error event'(Just in case if there is any error). My nodejs version is 4.4.5 .
Code is below:
var mongoose = require('mongoose');
var formidable = require('formidable'),
http = require('http'),
util = require('util'),
fs = require('fs-extra');
var Regex = require("regex");
var fs = require('fs');
var path=require('path');
var grid =require("gridfs-stream");
var createRequirement = function (req, res) {
var form = new formidable.IncomingForm({
uploadDir: __dirname + '/upload'
});
form.multiples = true;
form.keepExtensions = true;
files = [],
fields = [];
form.on('field', function (field, value) {
})
form.on('file', function (field, file) {
console.log(file.name);
console.log('File uploaded : ' + file.path);
grid.mongo = mongoose.mongo;
var gfs = grid(db.db);
var writestream = gfs.createWriteStream({
filename: file.name,
mode: 'w'
});
fs.createReadStream(file.path).pipe(writestream);
//Below event is not fired.
writestream.on('finish', function (file) {
Company.findOne({
"users.userName": req.user.userName
}).then(function (data) {
var company = data;
if (!company) {
return res.status(404).send({
'Not Found': 'Company data not found'
});
} else {
Contact.findByIdAndUpdate(
file._id, {
$push: {
"attachments": {
id: file._id
}
}
}, {
safe: true,
upsert: true,
new: true
},
function (err, model) {
console.log(err);
}
);
}
});
})
});
form.parse(req);
return;
};
I could figure out the solution after lot of tries .
It was mongoose connection which was causing the issue.
gridfs-stream expects direct Mongo-DB connection.
I wish they had streamlined things and facilitated usage of Mongoose connection.
I am able to achieve recursive file traversal in a directory (i.e to explore all the subdirectories and files in a directory). For that I have used an answer from a respective post on stack overflow. The snippet of that is below:
var fs = require("fs");
var tree = function(dir, done) {
var results = {
"path": dir,
"children": []
};
fs.readdir(dir, function(err, list) {
if (err) { return done(err); }
var pending = list.length;
if (!pending) { return done(null, results); }
list.forEach(function(file) {
fs.stat(dir + '/' + file, function(err, stat) {
if (stat && stat.isDirectory()) {
tree(dir + '/' + file, function(err, res) {
results.children.push(res);
if (!--pending){ done(null, results); }
});
} else {
results.children.push({"path": dir + "/" + file});
if (!--pending) { done(null, results); }
}
});
});
});
};
module.exports = tree;
When I run:
tree(someDirectoryPath, function(err, results) {
if (err) throw err;
console.log(results);
});
I get a sample result, such as this one:
{ path: '/Users/UserName/Desktop/1',
children:
[ { path: '/Users/UserName/Desktop/1/file1' },
{ path: '/Users/UserName/Desktop/1/file2' },
{ path: '/Users/UserName/Desktop/1/file3' },
{ path: '/Users/UserName/Desktop/1/subdir1',
children: [Object] } ] }
I am also able to hash a single file in a specific location, by using the fs' module ReadStream method. The snippet for that is below:
/**
* Checking File Integrity
*/
var fs = require('fs'),
args = process.argv.splice('2'),
path = require('path'),
traverse = require('/Users/UserName/Desktop/tree.js'),
crypto = require('crypto');
//var algorithm = ['md5', 'sha1', 'sha256', 'sha512'];
var algorithm = 'sha512';
var hashTable = new Array();
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(args[0]);
fileStream.on('data', function(data) {
hash.update(data);
fileStream.on('end', function() {
var digest = hash.digest('hex');
console.log('algorithm used: ', algorithm);
console.log('hash for the file: ',digest);
hashTable[args[0]] = digest;
console.log(hashTable);
});
});
Where args[0] stores the location of the file to be read by the ReadStream. After hashing of a specific file, the console log returned is as follows:
node fileIntegrityChecker.js hello.txt
algorithm used: sha512
hash for the file: 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043
the hashtable is: [ 'hello.txt': '9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043' ]
My problem is that I tried to somehow integrate the tree module functionality in the hash related js file. My idea is that the program will capture the user's input, as a path to a directory and that input will be processed to traverse the whole subdirectories and files of a folder. Also, the fileStream.on method should be included in the callback from the tree module. However I am not fully initiated in the callback mechanism and I hope to get some insight from you.
This is what I've tried
/**
* Checking File Integrity
*/
var fs = require('fs'),
args = process.argv.splice('2'),
path = require('path'),
tree = require('/Users/UserName/Desktop/tree.js'),
crypto = require('crypto');
//var algorithm = ['md5', 'sha1', 'sha256', 'sha512'];
var algorithm = 'sha512';
var hashTable = new Array();
var pathString = 'Users/UserName/Desktop/1';
tree(pathString, function(err, results) {
if (err) throw err;
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(results.children[1]['path']);
fileStream.on('data', function(data) {
hash.update(data);
fileStream.on('end', function() {
var digest = hash.digest('hex');
console.log('algorithm used: ', algorithm);
console.log('hash for the file: ',digest);
hashTable[results.children[1]['path']] = digest;
console.log('The hashtable is: ', hashTable);
});
});
});
Now, I've made some progress in the sense that I don't receive an error. Basically I achieved my scope. However I am able to extract only one result explicitly. For some reason, I cannot think how to iteratively (for instance) get each child of the result JSON object. If that is solved, I think the problem will be completely solved.
Can you please show me a way how to successfully combine the module and the js file to recursively traverse all the contents of a directory and create a hash for every file in it. I need this to ultimately check if some changes in the files occurred, based on their hashes. Thank you!
The simplest thing to do would be to generate the hash while you are already walking the directory tree. This involves updating the tree.js file as follows:
} else {
var fname = dir + "/" + file};
// put your hash generation here
generateHash(fname, function (e, hash) {
if (e) done(e);
results.children.push({"path": fname, "hash" : hash);
if (!--pending) {
done(null, results);
}
});
}
Then put your hash generation code in a function like this:
function generateHash (filename, callback) {
var algorithm = 'sha512';
var hashTable = new Array();
var hash = crypto.createHash(algorithm);
var fileStream = fs.ReadStream(filename);
fileStream.on('data', function(data) {
hash.update(data);
});
fileStream.on('end', function() {
var digest = hash.digest('hex');
callback(null, digest);
});
}
Using vinyl-fs, you could glob a directory. This will probably cut down on your code quite a bit.
Then you would pipe the files through a handler that would generate your hash.
Here's an example:
fs.src(['./**/*.js'])
.pipe(hasher)
.pipe(concater)
.dest('output.file')
import crypto from 'crypto';
import fs from 'fs';
import path from 'path';
// walk dir recursively
function* walkSync(dir: string) {
const files = fs.readdirSync(dir, { withFileTypes: true });
for (const file of files) {
if (file.isDirectory()) {
yield* walkSync(path.join(dir, file.name));
} else {
yield path.join(dir, file.name);
}
}
}
// concat all files hashes and hash the hashes
function dirHash(dir: string) {
const hexes = [];
for (const file of walkSync(dir)) {
const buffer = fs.readFileSync(file);
const hash = crypto.createHash('sha256');
hash.update(buffer);
const hex = hash.digest('hex');
hexes.push(hex);
}
return crypto.createHash('sha256').update(hexes.join('')).digest('hex');
}
console.log(dirHash('./src'));
I'm trying to concatenate multiple files and save to a new file using node js createWriteStream and createReadStream. I have a small bug that I would like to print out the file name before its contents. However, the file name is always printed on top of the output file. Please see my result output and if you have any ideas why, please help!
Thanks!
test.js
var async = require('async');
var fs = require('fs');
var path = require('path');
var SOURCE_FOLDER = '/tmp/test';
var SOURCE_FILE_PATTERN = /\.json$/
var REPORT_FILE = path.join(SOURCE_FOLDER, 'output.html');
var writeStream = fs.createWriteStream(REPORT_FILE, {
flags: 'w',
encoding: 'UTF-8'
});
var appendReport = function appendReport(file, callback) {
var readStream = fs.createReadStream(file, {
flags: 'r',
encoding: 'UTF-8'
});
readStream.pipe(writeStream, {
end: false
});
readStream.on('end', callback);
};
fs.readdir(SOURCE_FOLDER, function (err, files) {
if (err) {
console.log(err);
} else {
writeStream.write("<html><body><pre>\n");
async.forEach(files, function (file, callback) {
var filePath = path.join(SOURCE_FOLDER, file);
fs.stat(filePath, function (err, stats) {
if (err) {
callback(err);
} else if (stats.isFile() && file.match(SOURCE_FILE_PATTERN)) {
writeStream.write("\n" + filePath);
appendReport(filePath, callback);
} else {
callback();
}
});
}, function (err) {
writeStream.write("\n</pre></body></html>");
});
}
});
My Current Result:
# node test.js; cat /tmp/test/output.html
<html><body><pre>
/tmp/test/a.json
/tmp/test/b.jsoncontent A
content B
</pre></body></html>
My Expected Result:
# node test.js; cat /tmp/test/output.html
<html><body><pre>
/tmp/test/a.json
content A
/tmp/test/b.json
content B
</pre></body></html>
The problem is that async.forEach === async.each and async.each() calls the iterator in parallel. What you want is async.eachSeries() instead.
hi i had tried to unzip the file from my c drive and trying to parse to javascript object
here is the code
var AdmZip = require('adm-zip');
var fs = require('fs'), xml2js = require('xml2js');
var parser = new xml2js.Parser();
var paramdata = 'c:/sample/kusuma.zip';
console.log(paramdata);
var zip = new AdmZip(paramdata);
var zipEntries = zip.getEntries();
var obj = [];
var count = 0;
zipEntries.forEach(function(zipEntry) {
var len = zipEntries.length;
console.log(zipEntry.toString());
console.log(zipEntry.entryName);
fs.readFile("", function(err, data) {
console.log(data);
parser.parseString(data, function(err, result) {
count++;
console.log(count);
obj.push(result);
if (count === len) {
console.log(obj);
res.send(obj);
}
});
});
});
please check the code once and provide me some more examples
Well, fs.readFile() is for reading files that are themselves directly on disk, which these aren't.
However, adm-zip is already reading in the contents of the .zip, so you shouldn't need fs. Each zipEntry has getData() and getDataAsync() methods that can be used to retrieve contents.
zipEntries.forEach(function (zipEntry) {
zipEntry.getDataAsync(function (data) {
parser.parseString(data, function (err, result) {
console.log(result);
});
});
});
Also, as zipEntries is an Array, you can use .filter() to reduce it to only XML files.
var zipEntries = zip.getEntries().filter(function (zipEntry) {
return !zipEntry.isDirectory && /\.xml$/.test(zipEntry.entryName);
});
You'll also want to determine len once from the collection rather than from each entry. You can also test that against use obj.length rather than having to keep count separately:
var len = zipEntries.length;
var obj = [];
zipEntries.forEach(function (zipEntry) {
zipEntry.getDataAsync(function (data) {
parser.parseString(data, function (err, result) {
obj.push(result);
if (obj.length === len) {
res.send(obj);
}
});
});
});