How to pipe to function in node.js? - node.js

I want to read from file to stream, pipe the output to a function that will upperCase the content and then write to file. This is my attempt. What am I doing wrong?
const fs = require('fs')
const fred = q => {
return q.toUpperCase()
}
fs.createReadStream('input.txt')
.pipe(fred)
.pipe(fs.createWriteStream('output.txt'))
Currently the error is:
dest.on is not a function

Based on answer from Marco but tidied up:
const fs = require('fs')
const {Transform} = require('stream')
const upperCaseTransform = new Transform({
transform: (chunk, encoding, done) => {
const result = chunk.toString().toUpperCase()
done(null, result)
}
})
fs.createReadStream('input.txt')
.pipe(upperCaseTransform)
.pipe(fs.createWriteStream('output.txt'))

You have to use Transform if you want to "transform" streams. I recommend you to read: https://community.risingstack.com/the-definitive-guide-to-object-streams-in-node-js/
const fs = require('fs')
const Transform = require('stream').Transform;
/// Create the transform stream:
var uppercase = new Transform({
decodeStrings: false
});
uppercase._transform = function(chunk, encoding, done) {
done(null, chunk.toString().toUpperCase());
};
fs.createReadStream('input.txt')
.pipe(uppercase)
.pipe(fs.createWriteStream('output.txt'))
EDIT: You need to call .toString() in chunk because it's a buffer! :)

Using async iteration is the cleaner new way to transform streams:
const fs = require('fs');
(async () => {
const out = fs.createWriteStream('output.txt');
for await (const chunk of fs.createReadStream('input.txt', 'utf8')) {
out.write(chunk.toUpperCase());
}
})();
As you can see, this way is a lot more terse and readable if you already are working in an async function context.

Related

Add transform before write stream

I have a function e.g.
function writeToStream(writeStream) {
writeStream.write("test\n");
}
How can I make this apply a transform before writing to the final destination? Note: I don't have control over the writeToStream function, I just want to apply a transformation to what it is writing out
const fs = require("fs");
const { Transform } = require("stream");
const upperCaseTr = new Transform({
transform(chunk, encoding, callback) {
//this is never called???
callback(null, chunk.toString().toUpperCase());
},
});
function writeToStream(writeStream) {
writeStream.write("test\n");
}
const r = fs.createWriteStream("test.txt");
writeToStream(upperCaseTr.pipe(r));
With the above code, my custom Transform upperCaseTr is never called
This answer was very helpful in solving this https://stackoverflow.com/a/50537771/2129219
Specifically, upperCaseTr.pipe(r) just returns r, so upperCaseTr is never called.
Here is an answer that uses through2, similar to the linked answer
const fs = require("fs");
const through2 = require("through2");
function writeToStream(writeStream) {
writeStream.write("test test test test test\n");
}
const stream = through2.obj((chunk, enc, callback) => {
callback(null, chunk.toString().toUpperCase());
});
const out = fs.createWriteStream("test.txt");
stream.pipe(out);
writeToStream(stream);
Here is one using a Transform more similar to my original question
const fs = require("fs");
const { Transform } = require("stream");
function writeToStream(writeStream) {
writeStream.write("test test test test test\n");
}
const upperCaseTr = new Transform({
transform(chunk, encoding, callback) {
callback(null, chunk.toString().toUpperCase());
},
});
const out = fs.createWriteStream("test.txt");
upperCaseTr.pipe(out);
writeToStream(upperCaseTr);

nodeJS async function parse csv return data to other file

I'm creating a small tool for internal user with puppeteer.
Basically I got a csv file with some data i "read" and fill form with.
As I try to cleanup my project to be reusable i'm struggle a little bit:
I create a file name parsecsv.js
const config = require('../config.json');
const parse = require('csv-parse');
const fs = require('fs');
const processFile = async () => {
records = []
const parser = fs
.createReadStream(config.sourceFile)
.pipe(parse({
// CSV options
from_line: 1,
delimiter: ";",
}));
let i =1;
for await (const record of parser) {
records.push(record)
i++;
}
return records
}
const processFileData = async () => {
const records = await processFile()
console.info(records);
return records
}
module.exports ={
processFile, processFileData
}
in an other Js file i made
const parseCSV = require('./src/ParseCsv');
const records = parseCSV.processFileData();
const data = parseCSV.processFile();
console.log(typeof records);
console.table(records);
console.log(typeof data);
console.table(data);
But I never get my data only an empty oject.
How I can get my data to be able to "share" it with other function ?
thanks
as your functions are async ones and they return a promises, you can do something like
const parseCSV = require('./src/ParseCsv');
(async () => {
const records = await parseCSV.processFileData();
const data = await parseCSV.processFile();
console.log(typeof records);
console.table(records);
console.log(typeof data);
console.table(data);
})()

Node.js copy a stream into a file without consuming

Given a function parses incoming streams:
async onData(stream, callback) {
const parsed = await simpleParser(stream)
// Code handling parsed stream here
// ...
return callback()
}
I'm looking for a simple and safe way to 'clone' that stream, so I can save it to a file for debugging purposes, without affecting the code. Is this possible?
Same question in fake code: I'm trying to do something like this. Obviously, this is a made up example and doesn't work.
const fs = require('fs')
const wstream = fs.createWriteStream('debug.log')
async onData(stream, callback) {
const debugStream = stream.clone(stream) // Fake code
wstream.write(debugStream)
const parsed = await simpleParser(stream)
// Code handling parsed stream here
// ...
wstream.end()
return callback()
}
No you can't clone a readable stream without consuming. However, you can pipe it twice, one for creating file and the other for 'clone'.
Code is below:
let Readable = require('stream').Readable;
var stream = require('stream')
var s = new Readable()
s.push('beep')
s.push(null)
var stream1 = s.pipe(new stream.PassThrough())
var stream2 = s.pipe(new stream.PassThrough())
// here use stream1 for creating file, and use stream2 just like s' clone stream
// I just print them out for a quick show
stream1.pipe(process.stdout)
stream2.pipe(process.stdout)
I've tried to implement the solution provided by #jiajianrong but was struggling to get it work with a createReadStream, because the Readable throws an error when I try to push the createReadStream directly. Like:
s.push(createReadStream())
To solve this issue I have used a helper function to transform the stream into a buffer.
function streamToBuffer (stream: any) {
const chunks: Buffer[] = []
return new Promise((resolve, reject) => {
stream.on('data', (chunk: any) => chunks.push(Buffer.from(chunk)))
stream.on('error', (err: any) => reject(err))
stream.on('end', () => resolve(Buffer.concat(chunks)))
})
}
Below the solution I have found using one pipe to generate a hash of the stream and the other pipe to upload the stream to a cloud storage.
import stream from 'stream'
const Readable = require('stream').Readable
const s = new Readable()
s.push(await streamToBuffer(createReadStream()))
s.push(null)
const fileStreamForHash = s.pipe(new stream.PassThrough())
const fileStreamForUpload = s.pipe(new stream.PassThrough())
// Generating file hash
const fileHash = await getHashFromStream(fileStreamForHash)
// Uploading stream to cloud storage
await BlobStorage.upload(fileName, fileStreamForUpload)
My answer is mostly based on the answer of jiajianrong.

Reading a csv file async - NodeJS

I am trying to create a function where I can pass file path and the read the file in async way. What I found out was that it supports streams()
const fs = require('fs');
var parse = require('csv-parse');
var async = require('async');
readCSVData = async (filePath): Promise<any> => {
let csvString = '';
var parser = parse({delimiter: ','}, function (err, data) {
async.eachSeries(data, function (line, callback) {
csvString = csvString + line.join(',')+'\n';
console.log(csvString) // I can see this value getting populated
})
});
fs.createReadStream(filePath).pipe(parser);
}
I got this code from here. but I am new to node js so I am not getting how to use await to get the data once all lines are parsed.
const csvData = await this.util.readCSVData(path)
My best workaround for this task is:
const csv = require('csvtojson')
const csvFilePath = 'data.csv'
const array = await csv().fromFile(csvFilePath);
This answer provides legacy code that uses async library. Promise-based control flow with async doesn't need this library. Asynchronous processing with async.eachSeries doesn't serve a good purpose inside csv-parse callback because a callback waits for data to be filled with all collected data.
If reading all data into memory is not an issue, CSV stream can be converted to a promise:
const fs = require('fs');
const getStream = require('get-stream');
const parse = require('csv-parse');
readCSVData = async (filePath): Promise<any> => {
const parseStream = parse({delimiter: ','});
const data = await getStream.array(fs.createReadStream(filePath).pipe(parseStream));
return data.map(line => line.join(',')).join('\n');
}

What is the "reactive" way to read file line-by-line

I'm learning reactive programming using RxJS and encounter a case when I need to read a file line-by-line. Actually I solved it using a solution likes:
https://gist.github.com/yvele/447555b1c5060952a279
It works, but I need to use some normal JS code to transform the stream of Buffers to stream of lines. (use "readline" module in example above)
I wonder if there are other ways to transform an Observable of Buffer to Observable of line, using RxJS operators, likes example below.
var Rx = require('rx');
var fs = require('fs');
var lines = Rx.Observable
.fromEvent(rl, 'data') // emits buffers overtime
// some transforms ...
.subscribe(
(line) => console.log(line), // emit string line by line
err => console.log("Error: %s", err),
() => console.log("Completed")
);
You can probably achieve something pretty close to what you want with scan and concatMap.
Something like:
bufferSource
.concat(Rx.Observable.of("\n")) // parens was missing // to make sure we don't miss the last line!
.scan(({ buffer }, b) => {
const splitted = buffer.concat(b).split("\n");
const rest = splitted.pop();
return { buffer: rest, items: splitted };
}, { buffer: "", items: [] })
// Each item here is a pair { buffer: string, items: string[] }
// such that buffer contains the remaining input text that has no newline
// and items contains the lines that have been produced by the last buffer
.concatMap(({ items }) => items)
// we flatten this into a sequence of items (strings)
.subscribe(
item => console.log(item),
err => console.log(err),
() => console.log("Done with this buffer source"),
);
You can use following class
'use strict'
const lineReader = require('line-reader');
const Rx = require('rxjs');
const RxOp = require('rxjs/operators');
class CSVReader {
constructor(filepath {
this.filepath = filepath;
}
readByLines()
{
const source = new Rx.Subject();
lineReader.open(this.filepath, (err, reader)=> {
Rx.of(0).pipe(
RxOp.expand(val => {
reader.nextLine((err2, line) => source.next(line));
return Rx.of(1 + val);
}),
RxOp.takeWhile(_=> {
let has = reader.hasNextLine();
if(!has) source.complete();
return has;
})
).subscribe(_=>_);
})
return source;
}
}
module.exports = CSVReader
and use it as follows
const { bufferCount } = require('rxjs/operators');
let reader = new CSVReader('path/to/file');
reader.readByLines()
.pipe(bufferCount(2)) // chunk size
.subscribe(chunk=> {
console.log({chunk});
});
I would say like this:
const readline = require('readline');
const fs = require('fs');
const path = require('path');
const {fromEvent, race, Observable} = require('rxjs');
const {tap, takeUntil, take, map} = require('rxjs/operators');
const rl = readline.createInterface({
input: fs.createReadStream(path.resolve('./', 'myfile'))
});
let obs = new Observable(observer=>{
rl.on('line', val => observer.next(val)),
rl.on('error', err => observer.error(err)),
rl.on('close', complete => observer.complete(complete))
})
.pipe(tap(line=>console.log(`line: ${line}`)))
obs.subscribe(()=>{},
(e)=>console.log(`Error reading file: ${e}`),
()=>console.log("Read complete"))
An alternative for creating the observable could be:
let obs = fromEvent(rl, 'line')
.pipe(
takeUntil(race(
fromEvent(rl, 'close').pipe(take(1)) ,
fromEvent(rl, 'error').pipe(map((err)=>{throw err}))
)))
Ideally, rxjs could have provided an operator like: fromEvent(emitter, nextEvent, errorEvent, completeEvent ) to help keep the above code even simpler.
I tried a bunch of the above answers and built my own ugly version. Then, I poked around the code on GitHub and found that RxJS handles stream like objects - there's no point in mucking around with events. Just pass a ReadStream to from and it tests it for ReadableStreamLike and then turns it into an AsyncGenerator.
import * as readline from 'node:readline';
import { from } from 'rxjs';
const file = fs.createReadStream(fileName);
const line = readline.createInterface({ input: file });
const line$ = from(line).subscribe({
next: (dat) => { ... },
error: (err) => { ... },
complete: () => { ... }
});

Resources