This question already has answers here:
How to use JavaScript regex over multiple lines?
(8 answers)
Closed 4 years ago.
Looking to scrape the comments out of a JS file. Was thinking I can create a function to input a .js file, perform a RegExp match, and output an array of strings using fs.readFile() and string.match();
Here's an over-simplified example:
I have two files class.js (to read) and parse.js (to perform the text parsing)
class.js:
/*
by: Mike Freudiger
*/
/**
* one
* #returns 'Hello World'
*/
function one () {
return 'Hello World';
}
alert();
/* end of file */
parse.js:
var fs = require('fs');
var file = fs.readFile('C:\\Users\\mikef\\Desktop\\node_regex_test\\class.js', 'utf8', function(err, doc) {
var comments = doc.match(/(\/\*\*(.|\n)+?\*\/)/g);
console.log(comments);
});
when I run node parse.js the console output is null.
However when I run the regex match on a multiline string, I get the expected output:
var doc = `/*
by: Mike Freudiger
*/
/**
* one
* #returns 'Hello World'
*/
function one () {
return 'Hello World';
}
alert();
/* end of file */`
Any idea why the readFile() string would behave differently than a string literal?
...Also, I realize there may be a better way to get these comments out, with another npm package or something, but now I really just want to know why these two strings are different.
As mentioned by vsemozhetbyt, it seems that newlines used in class.js file are either \r\n or \r.
One of the simplest (and fastest) way to match these newlines would be to use [\s\S] instead of (.|\n) in your regex.
Thus you get:
var fs = require('fs');
var file = fs.readFile('C:\\Users\\mikef\\Desktop\\node_regex_test\\class.js', 'utf8', function(err, doc) {
var comments = doc.match(/(\/\*\*[\s\S]+?\*\/)/g);
console.log(comments);
});
Related
I'm trying to write a node script that identifies unused translation strings in my React project.
First, I want to get a list of all the translations that are used. To do this, I am getting a list of each JS file in my /src/components folder and then reading the file.
My translation strings look like this: t('some.translation.key'), so basically, I want to identify each instance of t('...') using RegEx and then get the key in between those parentheses (i.e. "some.translation.key"). From there, I should be able to compare the keys to the ones in my translation JSON file and remove the ones that aren't being used.
unused.js
const path = require('path');
const fs = require('fs');
let files = [];
// https://stackoverflow.com/a/63111390/2262604
function getFiles(dir) {
fs.readdirSync(dir).forEach(file => {
const absolute = path.join(dir, file);
if (fs.statSync(absolute).isDirectory()) {
getFiles(absolute);
} else {
if (absolute.includes('.js')) {
files.push(absolute);
}
}
});
return files;
}
function getTranslations() {
const pathComponents = path.join(__dirname, '../../src/components');
// get all js files in components directory
const files = getFiles(pathComponents);
const translationKeys = [];
// for each js file
for(let i = 0; i < files.length; i++) {
// read contents of file
const contents = fs.readFileSync(files[i]).toString();
// search contents for all instances of t('...')
// and get the key between the parentheses
}
}
getTranslations();
How can I use RegEx to find all instances of t('...') in contents and then extract the ... string between the parentheses?
Yes, you could use a regular expression:
for (const [, str] of contents.matchAll(/\bt\(['"](.*?)['"]\)/g)) {
console.log('t called with string argument:', str)
}
However, with regular expressions the problem will be that they don't understand the code and would cause trouble with matching strings that contain ( ) or \' themselves, have issues with concatenated strings or extra whitespace, etc., and you'd then also get the contents literally, including possible escape sequences.
A more robust way would be to create an AST (abstract syntax tree) from the code and look for calls to t in it.
A popular AST parser would be acorn. There is also the supplementary module acorn-walk that helps walking through the whole syntax tree without building your own recursive algorithm.
import acorn from 'acorn'
import walk from 'acorn-walk'
// Example
const contents = "function a () { if (123) { t('hello') } return t('world') }"
// The arguments to acorn.parse would have to be adjusted based
// on what kind of syntax your files can use.
const result = acorn.parse(contents, {ecmaVersion: 2020})
walk.full(result, node => {
if (node.type === 'CallExpression' && node.callee.type === 'Identifier' && node.callee.name === 't') {
if (node.arguments.length === 1 && node.arguments[0].type === 'Literal' && typeof node.arguments[0].value === 'string') {
// This is for the case `t` is called with a single string
// literal as argument.
console.log('t called with string argument:', node.arguments[0].value)
} else {
// In case you have things like template literals as well,
// or multiple arguments, you'd need to handle them here too.
console.log('t called with unknown arguments:', node.arguments)
}
}
})
// Will output:
// t called with string argument: hello
// t called with string argument: world
I'm trying to escape quotes in txt file using node.js and regex.
My code looks like this:
const fs = require("fs");
const utf8 = require("utf8");
var dirname = ".\\f\\";
const regex = new RegExp(`(?<=".*)"(?=.*"$)`, "gm");
fs.readFile(dirname + "test.txt", (error, data) => {
if (error) {
throw error;
}
var d = data.toString();
d = utf8.encode(d)
console.log(`File: ${typeof d}`); //string
// d = `Another string\n"Test "here"."\n"Another "here"."\n"And last one here."`;
console.log(`Text: ${typeof d}`); //string
var re = d.replace(regex, '\\"');
console.log(`Result:\n${re}`);
/* Another string
"Test \"here\"."
"Another \"here\"."
"And last one here."
*/
});
The problem is:
When I remove comment from the line, everything works fine. But if i read the text from the file it doesn't want to work.
Thanks for any comments on this.
Well.. turns out the problem was in file encoding. The file was encoded in UTF-16, not in UTF-8. Node.js wasn't giving me any signs of wrong encoding, so well, nice.
I would like add string on the top of my js file. Actuly, it's on the end :
var file = './public/js/app.bundleES6.js',
string = '// My string';
fs.appendFileSync(file, string);
Do you have idea for add my string on the first line ?
Thank you !
I think there is no built-in way to insert at the beginning of the file in Node.js.
But you can use readFileSync and writeFile methods of fs to resolve this issue
It will append string at top of the file
Try this
Method#1
var fs = require('fs');
var data = fs.readFileSync('./example.js').toString().split("\n");
data.splice(0, 0, "Append the string whatever you want at top" );
var text = data.join("\n");
fs.writeFile('./example.js', text, function (err) {
if (err) return err;
});
Method#2
If you are relying on to use third party module then you can use prepend module to add the string at the top as suggested by #robertklep.
var prepend = require('prepend');
prepend(FileName, 'String to be appended', function(error) {
if (error)
console.error(error.message);
});
I wanted to ask if anyone knows of a good solution for how to use node.js to look in a .scss file and grab all the classes listed and to then put them in either an object or an array?
The thing with this is that you are going to need the sass folder to be available to you server, this is not a recommended practice since you only publish the css compiled file, there is no need to also publish the dev assets.
However if you do so, you will need to read .scss file using node and from there use a regex to match the .class strings inside the file.
This will make the reading of the file:
var fs = require('fs');
function readSassFile () {
fs.readFile('./public/scss/components/_styles.scss', 'utf8', function (err, data) {
if (err) {
console.log(err);
return;
}
regexArray(data);
});
}
As you can see, at the end if the readFile retrieves the file with success, I'm calling a function regexArray() and sending the data of the file loaded.
In the regexArray function you need to define a regex to evaluate the string of the file loaded.
function regexArray (data) {
var re = /\.\S*/g;
var m;
var classArray = [];
while ((m = re.exec(data)) !== null) {
if (m.index === re.lastIndex) {
re.lastIndex++;
}
classArray.push(m[0]);
}
console.log(classArray);
}
the var re is the regular expression matching any string starting with a . and ending with a non-whitespace character which will match your css class names.
then we evaluate the m variable when is different from null and store the results in the array classArray, then you can log it to see the results.
I made the test with the path that is in the fs.readFile method, you can change it for you own path.
I have several arrays that contain data that I would like to export, each array to a txt file, in order to be analyzed using MATLAB.
Let's say my array is:
var xPosition = [];
// some algorithm that adds content to xPosition
// TODO: export array into a txt file let's call it x_n.txt
It would be great to store each element of an array per line.
I have found a guide for the solution to my question in this post. The following code is what I ended up using:
var fs = require('fs');
var xPosition = [];
// some algorithm that adds content to xPosition
var file = fs.createWriteStream('./positions/x_n.txt');
file.on('error', function(err) { /* error handling */ });
xPosition.forEach(function(v) { file.write(v + '\n'); });
file.end();
The solution you found works, but here's how I'd have done it:
var fs = require('fs');
var xPosition = [1,2,3]; // Generate this
var fileName = './positions/x_n.txt';
fs.writeFileSync(fileName, xPosition.join('\n'));
This uses node's synchronous file writing capability, which is ideal for your purposes. You don't have to open or close file handles, etc. I'd use streams only if I had gigabytes of data to write out.