Japanese Transliteration in Node.js and Kakasi - node.js

I have written a little wrapper for Kakasi
that is like the following:
Kakasi.prototype.transliterate = function (data) {
var self = this;
return new Promise(function (resolve, reject) {
var args;
args = [
'-i',
'euc',
'-Ha',
'-Ka',
'-Ja',
'-Ea',
'-ka',
'-s',
'-iutf8',
'-outf8'
];
var kakasi = spawn(self._options.bin, args, {});
console.log( "echo \""+data+"\" | " + kakasi.spawnargs.join(' ') )
args = [
data
];
var echo = spawn('echo', args, {});
echo.stdout.pipe( kakasi.stdin );
var res='';
kakasi.stdout.on('data', function(_data) {
var data=new Buffer(_data,'utf-8').toString();
res+=data;
});
kakasi.stdout.on('end', function(_) {
return resolve(res);
});
kakasi.on('error', function(error) {
return reject(error);
});
if (self._options.debug) kakasi.stdout.pipe(process.stdout);
});
}//transliterate
This code basically does the following command
echo "退屈であくびばっかしていた毎日" | kakasi -i euc -Ha -Ka -Ja -Ea -ka -s -iutf8 -outf8
that outputs taikutsu deakubibakkashiteita mainichi
Problem is that the javascript is missing some output infact:
$ node transliterate.js
echo "退屈であくびばっかしていた毎日" | kakasi -i euc -Ha -Ka -Ja -Ea -ka -s -iutf8 -outf8
----------
deakubibakkashiteita
The input parameters are the same, but for some reason (encoding?) the child output to stdout is different.
The kakasi.js code is available here.

I think your dictionary loader causes this problem. You should flip the dictionaries as follows.
process.env.KANWADICTPATH = resolve('./data/kanwadict');
process.env.ITAIJIDICTPATH = resolve('./data/itaijidict');
instead of
process.env.KANWADICTPATH = resolve('./data/itaijidict');
process.env.ITAIJIDICTPATH = resolve('./data/kanwadict');

Related

node exec awk command quotations

Using this pattern to pass a parameter in exec command:
const the_xml_file = 'pubmed22n1171.xml';
const the_command = "echo " + the_xml_file;
async function myBash() {
try {
const { stdout, stderr } = await exec( the_command );
console.log(stdout);
} catch (err){
console.error(err);
//console.log('stderr:', stderr);
};
};
However when the command is
const the_command = "awk 'BEGIN{splitno=15000; sn=splitno+1; out=FILENAME\"_\"int(c++/sn)+1\".xml\"} /<PubmedArticle/{f=1} f{print > out} /<\/PubmedArticle>/&&c%sn==0{close(out); f=0}' " + the_xml_file;
I get an error:
Error: Command failed: awk 'BEGIN{splitno=15000; sn=splitno+1; out=FILENAME"_"int(c++/sn)+1".xml"} /<PubmedArticle/{f=1} f{print > out} /</PubmedArticle>/&&c%sn==0{close(out); f=0}' pubmed22n1171.xml
awk: non-terminated regular expression &&c%sn==0{... at source line 1
context is
BEGIN{splitno=15000; sn=splitno+1; out=FILENAME"_"int(c++/sn)+1".xml"} /<PubmedArticle/{f=1} f{print > out} /</PubmedArticle>/&&c%sn==0{close(out); >>> f=0} <<<
Ive tested the awk command and it works from the console so I'm assuming this is a quotation mark issue. Any suggestions would be appreciated.

Spawn command with redirection

Let say I have this command
somecli -s < "/path/to/file.txt"
How can I convert the above command to NodeJS spawn command ? I did something like this, but seems like it didn't pass the input.
spawn('somecli', ['-s', '<', '"/path/to/file.txt"'], { stdio: 'inherit'}).on('error', function (error) {
// something
});
I can use the exec command below and it's working, but I prefer if we can see the live output.
exec('somecli -s < "/path/to/file.txt"', (e, stdout, stderr) => {
// something
})
something like this should help
const { spawn } = require('child_process');
const fs = require('fs');
const writeStream = fs.createWriteStream("/path/to/file.txt");
const shell = spawn('somecli', ['-s']);
shell.stdout.pipe(writeStream);
To pass file input to command ( STDIN redirection )
$ somecli -s < /path/to/file.txt
We can do it something like this
spawn('somecli', ['-s'], {stdio: [fs.openSync('/path/to/file.txt', 'r'), process.stdout, process.stderr]});
To pass command output to file ( STDOUT redirection )
$ somecli -s > /path/to/file.txt
You may follow Ashish answer
let s = spawn('somecli', ['-s])
s.stdout.pipe(fs.createWriteStream('/path/to/file.txt'))

how to use require('typescript').transform?

const ts = require('typescript');
let template = `
let hello: string = 'hello,world';
`
ts.transform
How to convert strings in the above operation?
You can use the transpile function. This will allow you to compile an arbitrary string:
import * as typescript from 'typescript';
let template = `
let hello: string = 'hello,world';
class test{}
`
let errors : typescript.Diagnostic[] = []
let result = typescript.transpile(template, {}, undefined, errors);
// The result
console.log(result);
// The erorrs
for(let err of errors) console.log(err.messageText);
Edit
The solution above works, but it only checks for syntactical errors not for semantic errors. A version which does module resolution and will check for semantic errors would be:
function transpileWithAllErrors(input: string, compilerOptions?: typescript.CompilerOptions, fileName: string = "dynamic.ts", diagnostics?: typescript.Diagnostic[]): string {
let result: string;
let host = typescript.createCompilerHost({});
let sourceFile = typescript.createSourceFile(fileName, template, typescript.ScriptTarget.ES5);
let orginalGetSourceFile = host.getSourceFile;
host.getSourceFile = (file, languageVersion, onError, shouldCreateNewSourceFile) =>
file == fileName ?
sourceFile :
orginalGetSourceFile(file, languageVersion, onError, shouldCreateNewSourceFile);
host.getCurrentDirectory = () => "";
host.getDefaultLibLocation = () => "node_modules/typescript/lib";
host.getDefaultLibFileName = () => "node_modules/typescript/lib/lib.d.ts";
let program = typescript.createProgram([fileName], {}, host);
// Capture output,
host.writeFile = (wFileName, data) =>{
if(wFileName.endsWith(".js")) {
result = data;
}
};
if (diagnostics != null) {
diagnostics.push(...program.getSyntacticDiagnostics(sourceFile));
diagnostics.push(...program.getSemanticDiagnostics(sourceFile));
diagnostics.push(...program.getOptionsDiagnostics());
}
program.emit(sourceFile);
return result;
}
Usage:
let diagnostics: typescript.Diagnostic[] = []
let result = transpileWithAllErrors(template, {}, undefined, diagnostics);
for (let err of diagnostics) console.log(err.messageText);
console.log(result);
Note: This method does module resolution relative to the current path so the script has access to any modules installed in the current path. Also I did not do extensive testing on the code, but it should work.

Send the stdout of spawnSync to another spawnSync stdin

How do I emulate linux's | (pipe) in a node.js app to pipe the stdout of a command to the stdin of the next command. Both commands are being spawned with spawnSync.
This (pseudo code) works as expected in the commandline:
$ command1 -arg1 file | command2 arg2
> someoutput
But this does not:
const spawnSync = require('child_process').spawnSync;
const c1Spawn = spawnSync('command1', ['arg1', 'file']);
const c2Spawn = spawnSync('command2', ['arg2'], { input: c1Spawn.output });
const someoutput = c2Spawn.output;
I believe I found the answer by using input: c1Spawn.stdout instead of output as the in for the second command.
const spawnSync = require('child_process').spawnSync;
const c1Spawn = spawnSync('command1', ['arg1', 'file']);
const c2Spawn = spawnSync('command2', ['arg2'], { input: c1Spawn.stdout });
const someoutput = c2Spawn.output;

Chrome Native Messaging - Hanging Child Process

I'm trying to make an extension that uses chrome native messaging to communicate with youtube-dl using a node.js host script. I've been able to successfully parse the stdin from the extension & also been able to run a child process (i.e. touch file.dat), but when I try to exec/spawn youtube-dl it hangs on the command. I've tried the host script independently of chrome native input and it works fine. I think the problem may have something to do with 1MB limitations on buffer size of chrome native messaging. Is there a way around reading the buffer?
#! /usr/bin/env node
"use strict";
const fs = require('fs');
const exec = require('child_process').execSync;
const dlPath = '/home/toughluck/Music';
let first = true;
let buffers = [];
process.stdin.on('readable', () => {
let chunk = process.stdin.read();
if (chunk !== null) {
if (first) {
chunk = chunk.slice(4);
first = false;
}
buffers.push(chunk);
}
});
process.stdin.on('end', () => {
const res = Buffer.concat(buffers);
const url = JSON.parse(res).url;
const outTemplate = `${dlPath}/%(title)s.%(ext)s`;
const cmdOptions = {
shell: '/bin/bash'
};
const cmd = `youtube-dl --extract-audio --audio-format mp3 -o \"${outTemplate}\" ${url}`;
// const args = ['--extract-audio', '--audio-format', 'mp3', '-o', outTemplate, url];
// const cmd2 = 'youtube-dl';
process.stderr.write('Suck it chrome');
process.stderr.write('stderr doesnt stop host');
exec(cmd, cmdOptions, (err, stdout, stderr) => {
if (err) throw err;
process.stderr.write(stdout);
process.stderr.write(stderr);
});
process.stderr.write('\n Okay....');
});
The full codebase can be found at https://github.com/wrleskovec/chrome-youtube-mp3-dl
So I was right about what was causing the problem. It had to do with 1 MB limitation on host to chrome message. You can avoid this by redirecting the stdout/stderr to a file.
const cmd = `youtube-dl --extract-audio --audio-format mp3 -o \"${outTemplate}\" ${url} &> d.txt`;
This worked for me. To be honest I'm not entirely why the message is considered > 1 MB and if someone can give a better explanation that would be great.

Resources