I am working through nodeschool.io learnyounode and on the fourth challenge, which is:
Write a program that uses a single asynchronous filesystem operation to read a file and print the number of newlines it contains to the console (stdout), similar to running cat file | wc -l.
I wrote one solution, which is different than the solution provided, but both seem to work, and I am curious to know which would be better style, and how they might function differently.
Here is my solution:
var fs = require('fs');
var fileAsArray = [];
function asyncRead(print) {
fs.readFile(process.argv[2], 'utf-8', function callback(error, contents) {
fileAsArray = contents.split('\n');
print();
})
}
function printNumberOfLines() {
console.log(fileAsArray.length - 1);
}
asyncRead(printNumberOfLines);
And here is the solution provided by learnyounode:
var fs = require('fs')
var file = process.argv[2]
fs.readFile(file, function (err, contents) {
// fs.readFile(file, 'utf8', callback) can also be used
var lines = contents.toString().split('\n').length - 1
console.log(lines)
})
I also noticed that the learnyounode code lacks semicolons. I thought they were strongly recommended/required?
Related
I'm trying to do the following: Read the content of a directory to find all the .xml files (I'm using glob but I'd like to use something like fs.readdir from fs), then I want to read every file using fs.readFile and then I want to convert the xml file to JSON objects. I'm using xml2json for this purpose.
Once I have the json objects, I would like to iterate every one of them to get the one property out of it and push it to an array. Eventually, all the code is wrapped in a function that logs the content of the array (once is completed). This code currently works fine but I'm getting to the famous callback hell.
const fs = require('fs');
const glob = require('glob');
const parser = require('xml2json');
let connectors = []
function getNames(){
glob(__dirname + '/configs/*.xml', {}, (err, files) => {
for (let j=0; j < files.length; j++) {
fs.readFile( files[j], function(err, data) {
try {
let json = parser.toJson(data, {object: true, alternateTextNode:true, sanitize:true})
for (let i=0; i< json.properties.length; i++){
connectors.push(json.properties[i].name)
if (connectors.length === files.length){return console.log(connectors)}
}
}
catch(e){
console.log(e)
}
});
}
})
}
getNames()
However, I'd like to move to a more clean and elegant solution (using promises). I've been reading the community and I've found some ideas in some similar posts here or here.
I'd like to have your opinion on how I should proceed for this kind of situations. Should I go for a sync version of readFile instead? Should I use promisifyAll to refactor my code and use promises everywhere? If so, could you please elaborate on what my code should look like?
I've also learned that there's a promises based version of fs from node v10.0.0 onwards. Should I go for that option? If so how should I proceed with the parser.toJson() part. I've also seen that there's another promise-based version called xml-to-json-promise.
I'd really appreciate your insights into this as I'm not very familiar with promises when there are several asynchronous operations and loops involved, so I end up having dirty solutions for situations like this one.
Regards,
J
I would indeed suggest that you use the promise-version of glob and fs, and then use async, await and Promise.all to get it all done.
NB: I don't see the logic about the connectors.length === files.length check, as in theory the number of connectors (properties) can be greater than the number of files. I assume you want to collect all of them, irrespective of their number.
So here is how the code could look (untested):
const fs = require('fs').promises; // Promise-version (node 10+)
const glob = require('glob-promise'); // Promise-version
const parser = require('xml2json');
async function getNames() {
let files = await glob(__dirname + '/configs/*.xml');
let promises = files.map(fileName => fs.readFile(fileName).then(data =>
parser.toJson(data, {object: true, alternateTextNode:true, sanitize:true})
.properties.map(prop => prop.name)
));
return (await Promise.all(promises)).flat();
}
getNames().then(connectors => {
// rest of your processing that needs access to connectors...
});
As in comments you write that you have problems with accessing properties.map, perform some validation, and skip cases where there is no properties:
const fs = require('fs').promises; // Promise-version (node 10+)
const glob = require('glob-promise'); // Promise-version
const parser = require('xml2json');
async function getNames() {
let files = await glob(__dirname + '/configs/*.xml');
let promises = files.map(fileName => fs.readFile(fileName).then(data =>
(parser.toJson(data, {object: true, alternateTextNode:true, sanitize:true})
.properties || []).map(prop => prop.name)
));
return (await Promise.all(promises)).flat();
}
getNames().then(connectors => {
// rest of your processing that needs access to connectors...
});
I use node.js to read a file, save results to a variable, then replace a regex match with another string and write the result to the file.
The weirdest thing happened. I used Regex101 (with JS engine set and the same flags as my code) and it matched my desired text.
Link: https://regex101.com/r/WbmOLw/1
Implementing the exact same regex in my code, it fails to match!
I created the most minimal version representing my code:
tst.txt (the target file):
# Direct Methods
.method public constructor <init>()V
.locals 2
This seems to be the raw string (according to CyberChef):
# direct methods\n.method public constructor <init>()V\n .locals 2
test.js (the code):
var fs = require('fs');
var mainDir = 'tst.txt'
function start(){
fs.readFile(mainDir, "utf-8", function (err, data) {
data = data.replace(/(constructor \<init\>[(][)]V. \.locals )(\d+)/gms, 'BLABLAIDONTWORK')
console.log(data) // not changed
fs.writeFile(mainDir, data, 'utf8', function (err) {
if (err) return console.log(err);
})
});
}
start()
Whatever is written in the file isn't different at all. I suspect it's a strange side effect of newline handling, but can't figure out what's causing this! any help will be highly appreciated.
The file you've saved very likely has \r\n as a newline terminator, rather than just \n, so the V. pattern does not match. I can reproduce the problem on Windows, when the file was saved with Notepad++ with the \r\n setting enabled. Using \r\n in the pattern instead worked for me:
data = data.replace(/(constructor \<init\>[(][)]V\r\n \.locals )(\d+)/g, 'BLABLAIDONTWORK')
// ^^^^
To make the pattern more flexible, to match a newline on any platform, alternate with \n and (for rare cases) \r:
data = data.replace(/(constructor \<init\>[(][)]V(?:\r\n|\r|\n) \.locals )(\d+)/g, 'BLABLAIDONTWORK')
// ^^^^^^^^^^^^^^
(no need for the s modifier anymore, or the m modifier at all)
You can try using the following regex:
var fs = require('fs');
var mainDir = 'tst.txt'
function start(){
fs.readFile(mainDir, "utf-8", function (err, data) {
data = data.replace(/(constructor \<init\>[(][)]V\n*\s*\.locals\s)(\d+)/gms, 'BLABLAIDONTWORK')
console.log(data) // not changed
fs.writeFile(mainDir, data, 'utf8', function (err) {
if (err) return console.log(err);
})
});
}
start()
Classic embarrassing newbie question. Why doesn't my store variable contain any results? I know it is accumulating results along the way. I also know enough about nodejs to know it has to do with promises, single-threadedness, etc.
var readline = require('readline');
var fs = require("fs");
var pathToFile = '/scratch/myData.csv';
var rd = readline.createInterface({
input: fs.createReadStream(pathToFile),
// output: process.stdout,
console: false
});
var store = {};
rd.on('line', function(line) {
store[line] = 1;
// console.log (`store is now: ${JSON.stringify (store)}`);
});
console.log (`store is now: ${JSON.stringify (store)}`);
This has nothing to do with Promises, (Although, you can promisify it, if you like).
As you said, it is accumulating the results line by line, but this is hapening inside the scope of the callback function.
And if you want to make use of the data, you will have to call another function inside the callback function when the last line is called, (or maybe listen to a different event).
Something like the following:
var store = {};
rd.on('line', function(line) {
store[line] = 1;
// console.log (`store is now: ${JSON.stringify (store)}`);
})
.on('close', function() {
myFunc(store);
});
function myFunc(store){
console.log (`store is now: ${JSON.stringify (store)}`);
}
How to reset nodejs stream?
How to read stream again in nodejs?
Thanks in advance!
var fs = require('fs');
var lineReader = require('line-reader');
// proxy.txt = only 3 lines
var readStream = fs.createReadStream('proxy.txt');
lineReader.open(readStream, function (err, reader) {
for(var i=0; i<6; i++) {
reader.nextLine(function(err, line) {
if(err) {
readStream.reset(); // ???
} else {
console.log(line);
}
});
}
});
There are two ways of solving your problem, as someone commented before you could simply wrap all that in a function and instead of resetting - simply read the file again.
Ofc this won't work well with HTTP requests for example so the other way, provided that you do take a much bigger memory usage into account, you can simply accumulate your data.
What you'd need is to implement some sort of "rewindable stream" - this means that you'd essentially need to implement a Transform stream that would keep a list of all the buffers and write them to a piped stream on a rewind method.
Take a look at the node API for streams here, the methods should look somewhat like this.
class Rewindable extends Transform {
constructor() {
super();
this.accumulator = [];
}
_transform(buf, enc, cb) {
this.accumulator.push(buf);
callback()
}
rewind() {
var stream = new PassThrough();
this.accumulator.forEach((chunk) => stream.write(chunk))
return stream;
}
And you would use this like this:
var readStream = fs.createReadStream('proxy.txt');
var rewindableStream = readStream.pipe(new Rewindable());
(...).on("whenerver-you-want-to-reset", () => {
var rewound = rewindablesteram.rewind();
/// and do whatever you like with your stream.
});
Actually I think I'll add this to my scramjet. :)
Edit
I released the logic below in rereadable-stream npm package. The upshot over the stream depicted here is that you can now control the buffer length and get rid of the read data.
At the same time you can keep a window of count items and tail a number of chunks backwards.
tail -f logfile.txt outputs the last 10 lines of logfile.txt, and then continues to output appended data as the file grows.
What's the recommended way of doing the -f part in node.js?
The following outputs the entire file (ignoring the "show the last 10 lines") and then exits.
var fs = require('fs');
var rs = fs.createReadStream('logfile.txt', { flags: 'r', encoding: 'utf8'});
rs.on('data', function(data) {
console.log(data);
});
I understand the event-loop is exiting because after the stream end & close event there are no more events -- I'm curious about the best way of continuing to monitor the stream.
The canonical way to do this is with fs.watchFile.
Alternatively, you could just use the node-tail module, which uses fs.watchFile internally and has already done the work for you. Here is an example of using it straight from the documentation:
Tail = require('tail').Tail;
tail = new Tail("fileToTail");
tail.on("line", function(data) {
console.log(data);
});
node.js APi documentation on fs.watchFile states:
Stability: 2 - Unstable. Use fs.watch instead, if available.
Funny though that it says almost the exact same thing for fs.watch:
Stability: 2 - Unstable. Not available on all platforms.
In any case, I went ahead and did yet another small webapp, TailGate, that will tail your files using the fs.watch variant.
Feel free to check it out here:
TailGate on github.
you can try to use fs.read instead of ReadStream
var fs = require('fs')
var buf = new Buffer(16);
buf.fill(0);
function read(fd)
{
fs.read(fd, buf, 0, buf.length, null, function(err, bytesRead, buf1) {
console.log(buf1.toString());
if (bytesRead != 0) {
read(fd);
} else {
setTimeout(function() {
read(fd);
}, 1000);
}
});
}
fs.open('logfile', 'r', function(err, fd) {
read(fd);
});
Note that read calls callback even if there is no data and it just reached end of file. Without timeout you'll get 100% cpu here. You could try to use fs.watchFile to get new data immediately.
Substack has a file slice module that behaves exactly like tail -f, slice-file can stream updates after the initial slice of 10 lines.
var sf = require('slice-file');
var xs = sf('/var/log/mylogfile.txt');
xs.follow(-10).pipe(process.stdout);
Source: https://github.com/substack/slice-file#follow
https://github.com/jandre/always-tail seems a great option if you have to worry about log rotating, example from the readme:
var Tail = require('always-tail');
var fs = require('fs');
var filename = "/tmp/testlog";
if (!fs.existsSync(filename)) fs.writeFileSync(filename, "");
var tail = new Tail(filename, '\n');
tail.on('line', function(data) {
console.log("got line:", data);
});
tail.on('error', function(data) {
console.log("error:", data);
});
tail.watch();