Compress word doc xml using archiver - node.js

I am trying to make a program in Node.js that would anonymize a given path for a word doc for a larger project. I have already unzipped the docx file and I have edited the document.xml file. All I need to do now is recompress it.
I have looked into using Archiver, but the problem is that it is zipping the folder to a .zip, so when you try to convert it to a docx, it is corrupted.
fs.readFile('./extracted_doc/word/document.xml', 'utf8', (err, data) => {
if (err) reject(err);
var name = data.indexOf('<w:t>')
var end = data.indexOf('<\/w:t>')
var result = data.replace(data.slice(name + 5, end), "XXXXXXXXXXXXXXXXXX")
fs.writeFile('./extracted_doc/word/document.xml', result, (err) => {
if (err) reject(err)
//zipping the file back to docx
var output = fs.createWriteStream('./anonymized_submission.docx')
var archive = archiver('zip')
archive.on('error', function (err) {
throw err;
})
archive.pipe(output)
archive.directory("./extracted_doc", "extracted_doc")
archive.finalize()
})
});

Here's a potential solution to your problem, I've tested and it works for me. It will replace the first line with 'XXXX...'.
The main issue with your existing code was it was creating a root directory 'extracted_doc' in the .zip file that contains the doc archive. This is not what Word is expecting. It expects the document structure in the root of the archive.
I've created the zipDirectory function to work around this. The main goal here is to preserve the directory structure of the archive.
const archiver = require("archiver");
const fs = require("fs");
fs.readFile('./extracted_doc/word/document.xml', 'utf8', (err, data) => {
if (err) reject(err);
var name = data.indexOf('<w:t>');
var end = data.indexOf('<\/w:t>');
var result = data.replace(data.slice(name + 5, end), "XXXXXXXXXXXXXXXXXX")
fs.writeFile('./extracted_doc/word/document.xml', result, (err) => {
if (err) reject(err);
zipDirectory('./extracted_doc/', './anonymized_submission.docx');
})
});
function zipDirectory(inputDir, outputFile) {
let archive = archiver('zip');
archive.on('error', function (err) {
throw err;
})
let output = fs.createWriteStream(outputFile);
archive.pipe(output);
/* Ok, so we don't want a root name of <input_dir>, this is our workaround. */
archive.directory(inputDir, '../');
archive.finalize();
}

Related

node js file systems read a dynamically changing path to a pdf

I am trying to read a pdf file from fs and send it through email using sendgrid.
My folder structure is like this
/
-src
--controllers
---travelplan.js
-pdf
In the travelplan.js if I do it like this
fs.readFile('pdf/204.pdf', function (err, data) {
if (err) {
console.log("THIS ERROR IS AWESOME", err)
}
})
everything works fine. No problem.
But if read it like this
let pdf_number = 204;
fs.readFile(`pdf/${pdf_number}.pdf`, function (err, data) {
if (err) {
console.log("THIS ERROR IS AWESOME", err)
}
})
This doesn't work. Pdf doesn't send correctly.
Then I tried this
let pdf_number = 204;
var pdf_path = path.join(__dirname, '..', 'pdf',pdf_number);
fs.readFile(pdf_path, function (err, data) {
if (err) {
console.log("THIS ERROR IS AWESOME", err)
}
})
This also doesn't work.
How do I read a pdf file by passing the pdf file name as an argument?

In nodejs how to read a file and move it to another folder if the file contains a specified text

So I have the following code
var processed;
fs.readFile(path, 'utf-8', function(err, data) {
processed = false;
//checking if text is in file and setting flag
processed = true;
});
if (processed == true) {
try {
var fname = path.substring(path.lastIndexOf("\\") + 1);
fs.moveSync(path, './processedxml/' + fname, {
overwrite: true
})
} catch (err) {
console.log("Error while moving file to processed folder " + err);
}
}
But I don't get the desired output. Because looks like the readfile is executed by a separate thread and so the value of "processed" is not reliable.
I am not very familiar with nodejs so any help will be greatly appreciated.
Yes, you are right, your executions are performed by different threads.
In this scenario, you'll need to use promises.
You can solve your need easily by using "Promise FS" (you can use any other promise solution anyway).
Your code would be something like the following:
fs = require('promise-fs');
var fname = 'test.txt' ;
var toMove = false ;
fs.readFile('test.txt','utf8')
.then (function (content) {
if(content.indexOf('is VALID') !== -1) {
console.log('pattern found!');
toMove = true ;
}
else { toMove = false
}
return toMove ;
}).
then (function (toMove) {
if(toMove) {
var oldPath = 'test.txt'
var newPath = '/tmp/moved/file.txt'
fs.rename(oldPath, newPath, function (err) {
if (err) throw err
console.log('Successfully renamed - moved!')
}) ;
}
})
.catch (function (err) {
console.log(err);
})
Create a file "test.txt" and add the following contents:
this is text.file contents
token is VALID
The code above will evaluate if "is VALID" is present as content and if it does then it will move the file "test.txt" from your current folder to a new one called "moved" in "/tmp" directory. It will also rename the file as "file.txt" file name.
Hope it helps you.
Regards
It looks like you're shadowing path, trying to use it as a variable and as a node module. The easiest way to make this work is to choose a different variable name for the file and move the processing logic into the callback of fs.readFile.
var path = require('path');
var fs = require('fs-extra');
var file = 'some/file/path/foo.xml';
var text = 'search text';
fs.readFile(file, 'utf-8', function (err, data) {
if (err) {
console.error(err);
} else {
//checking if text is in file and setting flag
if (data.indexOf(text) > -1) {
try {
var fname = path.basename(file);
fs.moveSync(file, './processedxml/' + fname, {
overwrite: true
})
} catch (err) {
console.log("Error while moving file to processed folder " + err);
}
}
}
});

Node.js ftp put method is writing the name of the file to be uploaded to the ftp server, not the actual file

I am trying to write an app that uploads files to an ftp server in node.js using the npm module ftp. I have a file, foo.txt, whose content is a single line: "This is a test file to upload via ftp." My code is:
var Client = require("ftp");
var fs = require("fs");
var connection = require("./connections.js");
var c = new Client();
const ftpFolder = "./files/";
var fileList = [];
fs.readdir(ftpFolder, (err, files) => {
if(err) {
console.log(err);
} else {
files.forEach(file => {
console.log(file);
fileList.push(file);
});
}
console.log(fileList);
});
c.on("ready", function(){
fileList.forEach(file => {
c.put(file, "/backups/" + file, function(err){
if(err){
console.log(err);
} else {
console.log(file + " was uploaded successfully!");
}
c.end();
});
});
});
// Connect to ftp site
c.connect(connection.server_ftp);
I see the file foo.txt on the ftp server, but when I open it the contents are: "foo.txt". It appears to have written the name of the file to the file rather than uploading it. Any guidance would be appreciated!
When you read a directory, it gives you a list of files. It doesn't read the contents of the file, it just lists the names of the files in the dir.
You will need to use this file name to create a path to read the file from.
const path = require('path')
let filePath = path.join(ftpFolder, file)
let fileContents = fs.readFile(path, 'utf8', (err, data) => {
// do the upload
})
As a side note... While your directory reading may work, you should consider reading the directory after the connection is established. Otherwise, there is a chance you will see it fail because it's a race condition between the client connection and the directory read. You may need to read a directory with so many files that it resolves AFTER the client connects.
You could nest the callbacks, but another way to handle this is Promises. You can kick off both async methods at the same time, and handle the results when both have resolved
var filesPromise = new Promise((resolve, reject) => {
fs.readdir(ftpFolder, (err, files) => {
if(err) reject(err)
else resolve(files)
})
})
var connectionPromise = new Promise((resolve, reject) => {
c.on("ready", () => { resolve(c) }
c.connect(connection.server_ftp)
})
Promise.all([filesPromise, connectionPromise], results => {
results[0] // files
results[1] // client
}).catch(err => {console.error(err)})
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
In my case which is very similar, I put the filename instead of the full path to file in c.put . From your code I think it is the same.

fs.createWriteStream, no such file or directory, open Nodejs

I want to save files that I am getting from another server on my server but the problem is when I am calling createWriteStream it giving me the error :
no such file or directory, open
E:\pathtoproject\myproject\public\profile_14454.jpg
Here is my code which is in E:\pathtoproject\myproject\modules\dowload.js :
request.head(infos.profile_pic, function(err, res, body) {
const completeFileName = '../public/profile_14454.' + res.headers['content-type'].split('/')[1];
var imageStream = fs.createWriteStream(completeFileName);
imageStream.on('open', function(fd) {
console.log("File open");
request(infos.profile_pic).pipe(imageStream).on('close', function(body) {
consoleLog('Profile pic saved');
console.log('This is the content of body');
console.log(body);
connection.query('UPDATE user set photo=? where id=?', [completeFileName, lastID], function(err, result, fields) {
if (err) {
consoleLog('Error while update the profile pic');
}
});
})
});
});
When I removed the directory ../public/ and leave only the name of the file
profile_14454.' + res.headers['content-type'].split('/')[1] , it worked but the file was saved in the root directory of the project (E:\pathtoproject\myproject\).
What's wrong in what I am doing? How can I have the file saved under public directory?
I am using nodeJS 8.9.4
I tried with my small code .
var fs = require("fs");
var data = 'Simply Easy Learning';
// Create a writable stream
var writerStream = fs.createWriteStream('./airo/output.txt');
// Write the data to stream with encoding to be utf8
writerStream.write(data,'UTF8');
// Mark the end of file
writerStream.end();
// Handle stream events --> finish, and error
writerStream.on('finish', function() {
console.log("Write completed.");
});
writerStream.on('error', function(err){
console.log(err.stack);
});
console.log("Program Ended");
My code is in this path E:\syed ayesha\nodejs\nodejs now I want to store my file in airo folder which is in this path. So I used one dot for storing. Hope this helps.

Node js remove old files in a Directory

I want to remove some files in a directory after reaching some limits.(for example remove files if number of files more than 20)
It would be great if any automation can be done to remove those files.
In details:
In my case there is a uploads directory, where I'm uploading the images. For each new image, a directory is created and the image resides in the directory. So I want to keep some of the newly created or recently used directories and remove others after a certain limit(for example after reaching 20 numbers of directories). While creating new images, it'll check the limit and if exceeds the max dir limits, it'll remove the unused directories.
Note: The directories are not empty.
How can i do that using Node.js
Any help would be appreciable.
The most widely used technique would be to have an API that can delete files in your folder. Take a look at
fs.unlink
You can get more details here
Once you have this API, it is preferable to have a cron call this API every month or so. Take a look at
crontab -e
If you're running Node on a Linux server, you can use the exec module to execute Linux commands. For example, here is a function I use to move old log files:
var exec = require('child_process').exec;
exec('mv ' + __dirname + '/log/*.log ' + __dirname + '/log/archive',
function(err, stdout, stderr) {
if (err) {
console.log('Error archiving log files: ' + stderr);
} else {
console.log('Log files archived to ' + __dirname + '/log/archive');
}
});
You can use any Linux command - so you could use this approach to remove files as well.
I create a "cronjob" function in node.js to remove files in a folder (note child folders will be ignore)
USAGE:
// keep only 5 newest files in `logs` folder
watchAndRemoveOldFiles('logs', 5, function (err, removeFiles) {
console.log('These files has been remove:', removeFiles);
});
Full code (you need npm install async to run the code):
var fs = require('fs');
var path = require('path');
var async = require('async');
function findAndRemoveOldFiles(inputDir, keepCount, callback) {
if(!callback) {
callback = function (err, removeFiles) {
// default callback: doing nothing
};
};
fs.readdir(inputDir, function (err, files) {
if(err) {
return callback(err);
}
fileNames = files.map(function (fileName) {
return path.join(inputDir, fileName);
});
async.map(fileNames, function (fileName, cb) {
fs.stat(fileName, function (err, stat) {
if(err) {
return cb(err);
};
cb(null, {
name: fileName,
isFile: stat.isFile(),
time: stat.mtime,
});
});
}, function (err, files) {
if(err) {
return callback(err);
};
files = files.filter(function (file) {
return file.isFile;
})
files.sort(function (filea, fileb) {
return filea.time < fileb.time;
});
files = files.slice(keepCount);
async.map(files, function (file, cb) {
fs.unlink(file.name, function (err) {
if(err) {
return cb(err);
};
cb(null, file.name);
});
}, function (err, removedFiles) {
if(err) {
return callback(err);
}
callback(null, removedFiles);
});
});
});
}
function watchAndRemoveOldFiles(inputDir, keepCount, callback) {
findAndRemoveOldFiles(inputDir, keepCount, callback);
fs.watch(inputDir, function () {
findAndRemoveOldFiles(inputDir, keepCount, callback);
});
}
// USAGE: watch and remove old files, keep only 5 newest files
watchAndRemoveOldFiles('log', 5, function (err, removeFiles) {
console.log('These files has been remove:', removeFiles);
});
you might consider setting up a kue task:
https://github.com/learnboost/kue
Kue (or a slight wrapper/mod on top of it) is likely to be what makes it into core for our scheduled jobs down the road.

Resources