http://chucklets.no/getOnlineTime
When I click this I can see the JSON and it looks fine.
But when using fetch it
API Node.js code:
app.get('/getOnlineTime', (req, res) => {
console.log("Reading rows from the Table...");
const arr = [];
connection.execSql(new Request('SELECT * FROM OnlineTime', function (err, rowCount, rows) {
if (err) {
console.error(err);
}
})
.on('doneInProc', function (rowCount, more, rows) {
var row = {};
for (let i = 0; i < rows.length; i++) {
var row = {};
for (let j = 0; j < rows[i].length; j++) {
row[rows[i][j].metadata.colName] = rows[i][j].value;
}
arr.push(row);
}
res.json(arr)
})
);
});
React code:
const [playerD, setPlayerData] = useState([]);
useEffect(() => {
const fetchData = async () => {
const response = await fetch('http://chucklets.no/getOnlineTime', {method:'GET'})
if (!response.ok) {
throw new Error(response.status);
}
const data = await response.text()
setPlayerData(data)
}
fetchData()
}, [])
When using fetch on https://nba-players.herokuapp.com/players-stats I get a nice JSON. Any input would be greatly appreciated.
Because it works when you open the link in the browser and doesn't work per JS fetch, I suspect there might be some CORS issues and the Node.js isn't even responding. That's why I suspect the default value of the playerD variable in react is never updated and stay an empty array.
Try using the cors middleware:
Express CORS Middleware
I need to get all data from API. The data is served in batches (pages). Every batch has its page number.
My solution:
GET page 1, save it to the file
Loop by adding +1 to page number and append to the file the result from GET request
Continue while no error
Currently the file is created and then I get: FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap out of memory
So I used --max-old-space-size=8192 no errors since then. It's just keeps working with no result. File stays empty.
Please help!
const fs = require('fs');
const axios = require('axios');
const { response } = require('express');
var myWriteStream = fs.createWriteStream(
'../dev-data/file.json',
{ flags: 'a' },
{ encoding: 'utf8' },
err => {}
);
let pageNumber = 1;
// Getting initial batch on Page 1
axios
.get(`https://api.example.com/?page=${pageNumber}`)
.then(function (response) {
var json = JSON.stringify(response.data);
// Saving result to the file
fs.writeFile('../dev-data/declarations_list.json', json, 'utf-8', err => {
});
// Looping GET + save to the file by adding + 1 to currentPage
do {
pageNumber = response.data.page.currentPage + 1;
axios
.get(
`https://api.example.com/?page=${pageNumber}`
)
.then(function (response) {
console.log(`Current page: ${response.data.page.currentPage}`);
pageNumber = response.data.page.currentPage;
var json = JSON.stringify(response.data);
myWriteStream.write(json);
})
.catch(function (error) {
console.log(error);
});
// Do while currentPage (no 'error')
} while (response.data.page.currentPage);
});
UPDATE
const fs = require('fs');
const axios = require('axios');
const { response } = require('express');
let pageNumber = 0;
do {
pageNumber = pageNumber + 1;
console.log(pageNumber);
axios
.get(`https://public-api.nazk.gov.ua/v1/declaration/?page=${pageNumber}`)
.then(function (response) {
console.log(response);
console.log(`Current page: ${response.data.page.currentPage}`);
pageNumber = response.data.page.currentPage;
var json = JSON.stringify(response.data);
fs.appendFileSync('../dev-data/declarations_list.json', json);
})
.catch(function (error) {
console.log(error);
});
} while (pageNumber < 15000);
This is not tested (because of lacking api access), but i would try to write to the file every time a new page is loaded, basically like so:
const fs = require('fs');
const axios = require('axios');
const { response } = require('express');
let pageNumber = 0;
var stream = fs.createWriteStream('../dev-data/declarations_list.json', {flags:'a'});
do {
pageNumber = ++;
axios
.get(
`https://api.example.com/?page=${pageNumber}`
)
.then(function (response) {
console.log(`Current page: ${response.data.page.currentPage}`);
pageNumber = response.data.page.currentPage;
var json = JSON.stringify(response.data);
stream.write(json);
})
.catch(function (error) {
console.log(error);
});
// Do while currentPage (no 'error')
} while (pageNumber < <total_number_of_pages>);
stream.end();
Also do not nest these axios calls. There is no need to and several problems might arise from that.
However I think the biggest problem was the way you wrote to that stream.
Apart from that your loop never ends if there is no error. You will need to provide the total number of pages you would like to retrieve.
It seems to me if you are not so experienced with this, so you might look up something like "Nodejs and Express save JSON response to file" first, bevore going any further...
Here is my code:
const fs = require('fs');
const screenshot = require('screenshot-stream');
const urlp = require('url');
var urls=[
'https://archive.org/details/8bitrecs',
'http://hackaday.com/',
'http://techcrunch.com/2012/02/16/auraslate-is-an-open-source-android-tablet-for-hackers/',
'http://www.english.illinois.edu/-people-/faculty/debaron/482/482readings/greenfield.html',
'http://sustain.rca.ac.uk/Sustain-Talks'];
urls.forEach(function(url){
const stream = screenshot(url, '1024x768', {crop: true});
stream.pipe(fs.createWriteStream(urlp.parse(url).hostname + 'test-1024x768.png'));
});
It only screenshots the last item in the url. Rhe others are images with zero bytes. I think I need to do the operation asynchronously so it doesn't overwrite the stream each time.
How would I do this?
UPDATE:
I want the screenshot to work, but catch errors and not block if a url is not accessible
UPDATE:
https://www.npmjs.com/package/screenshot-promise worked better although this code below still slows my computer down a lot!
const screenshotPromise = require('screenshot-promise');
...
urls.forEach(function(url) {
const promise = screenshotPromise(url, '1024x768', {crop: true}).then(buf => {
fs.writeFileSync(urlp.parse(url).hostname + 'test-1024x768.png', buf);
});
promise.then((value) => {
// value is whatever we passed in the resolve(...) function above.
// It doesn't have to be a string, but if it is only a succeed message, it probably will be.
console.log(value);
});
What you failed to add here is the error :
events.js:160
throw er; // Unhandled 'error' event
^
Error: Couldn't load url: http://techcrunch.com/2012/02/16/auraslate-is-an-open-source-android-tablet-for-hackers/
at LineStream.byline.on.data
(e:---\node_modules\screenshot-stream\index.js:77:16)
at emitOne (events.js:96:13)
at LineStream.emit (events.js:188:7)
The issue is that the module screenshot-stream is using PhantomJS, and phantomJS is unable to get to the page that outputs the error.
This error seems related to this issue : https://github.com/ariya/phantomjs/issues/10460.
Techcrunch.com and Aol.com seem to use web fonts (e.g. "BebasNeue-webfont.ttf") which Qt loads as application fonts. Something may be going wrong there.
My suggestion is using Google's Puppeteer that includes a built-in screenshot method : https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#pagescreenshotoptions
Code I did in the end that worked:
const puppeteer = require('puppeteer');
const urlp = require('url');
var URL = require('url-parse');
var urls = [
'https://archive.org/details/8bitrecs',
'http://hackaday.com/',
'http://techcrunch.com/2012/02/16/auraslate-is-an-open-source-android-tablet-for-hackers/',
'http://www.english.illinois.edu/-people-/faculty/debaron/482/482readings/greenfield.html',
'http://sustain.rca.ac.uk/Sustain-Talks',
'https://www.quintessentially.com/',
'https://www.producthunt.com/tech/ux-project-checklist',
'https://freedom.press/',
'http://issuu.com/search?q=vintage+motorcycle',
'http://www.pocketmod.com/v2/',
'https://www.metamind.io/',
'http://nautil.us/blog/chernobyls-hot-mess-the-elephants-foot-is-still-lethal',
'https://www.instructables.com/id/Tool-Storage-Hacks-or-How-to-Hang-Those-Black-Frid/',
'https://www.zippi.co.uk/framed-photo-print'
];
var getLocation = function(href) {
var l = document.createElement("a");
l.href = href;
return l;
};
(async() => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
page.waitForNavigation({
timeout: 40000
});
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
var url1 = new URL(url);
try {
await page.goto(`${url}`);
await page.screenshot({
path: 'images/' + url1.hostname + '.png'
});
} catch (error) {
console.log(error.message);
// await page.close();
// await browser.close();
// process.exit(1);
continue;
}
}
})();
I need to inject a png into a PDF using HummusJS.
In the production version of my API, I'll be receiving a post request containing a base64 that I'll convert to binary reader stream. (In the example below, I'm using a test read from a local file.)
Here's my test input stream (which is defined in a "create" object factory):
hummusReadStreamObject (filePath) {
let fileData = fs.readFileSync(filePath).toString('hex');
let result = []
for (var i = 0; i < fileData.length; i+=2) {
result.push('0x'+fileData[i]+''+fileData[i+1])
}
return new hummus.PDFRStreamForBuffer(result)
},
I generate a PNG (I have confirmed, the PNG is written and valid) and then perform modification during the stream's "finally" event
png.on('finish', function () {
create.modifiedPDFResponseStream({
pngFileName,
readStream: create.hummusReadStreamObject('/foo/file/path'),
res
})
})
the modifiedPDFResponseStream now pulls in the png and is supposed to append it to the file:
modifiedPDFResponseStream ({ pngFileName, readStream, res }) {
const writeStream = new hummus.PDFStreamForResponse(res)
const pdfWriter = hummus.createWriterToModify(
readStream,
writeStream,
{ log: `path/to/error.txt` })
debugger
const pageModifier = new hummus.PDFPageModifier(pdfWriter,0);
if (pngFileName) {
const ctx = pageModifier.startContext().getContext()
ctx.drawImage(2, 2, `./path/to/${pngFileName}`)
pageModifier.endContext().writePage()
pdfWriter.end()
}
}
I sense that I'm close to a solution, the error logs do not report any problems, but i receive the following exception when debugging via Chrome:
events.js:183 Uncaught Error: write after end
at write_ (_http_outgoing.js:622:15)
at ServerResponse.write (_http_outgoing.js:617:10)
at PDFStreamForResponse.write
is the issue something to do with the fact that the stream is being populated during the .png's .on('finish', ...) event? If so, is there a synchronous approach I could take that would mitigate this problem?
inspired by the solution from #JAM in "wait for all streams to finish - stream a directory of files"
I solved this issue by having the .pngWriteStream method return a promise and having the writestream resolve on finish:
return new Promise((resolve, reject) => {
try {
writeStream.on('finish', resolve)
} catch (e) {
$.logger.error(e.message)
reject()
}
so instead of:
const png = create.pngWriteStream({ ... })
png.on('finish', function () {
create.modifiedPDFResponseStream({
pngFileName,
readStream: create.hummusReadStreamObject('/foo/file/path'),
res
})
})
I await the resolution of the promise
const png = await create.pngWriteStream({
fileName: pngFileName,
value: '123456789' })
and the img file is available for hummus.js to write to the pdf!
create.modifiedPDFResponseStream({
pngFileName,
readStream: create.hummusReadStreamObject('/foo/file/path'),
writeStream: new hummus.PDFStreamForResponse(res)
})
I need to do some parsing of large (5-10 Gb)logfiles in Javascript/Node.js (I'm using Cube).
The logline looks something like:
10:00:43.343423 I'm a friendly log message. There are 5 cats, and 7 dogs. We are in state "SUCCESS".
We need to read each line, do some parsing (e.g. strip out 5, 7 and SUCCESS), then pump this data into Cube (https://github.com/square/cube) using their JS client.
Firstly, what is the canonical way in Node to read in a file, line by line?
It seems to be fairly common question online:
http://www.quora.com/What-is-the-best-way-to-read-a-file-line-by-line-in-node-js
Read a file one line at a time in node.js?
A lot of the answers seem to point to a bunch of third-party modules:
https://github.com/nickewing/line-reader
https://github.com/jahewson/node-byline
https://github.com/pkrumins/node-lazy
https://github.com/Gagle/Node-BufferedReader
However, this seems like a fairly basic task - surely, there's a simple way within the stdlib to read in a textfile, line-by-line?
Secondly, I then need to process each line (e.g. convert the timestamp into a Date object, and extract useful fields).
What's the best way to do this, maximising throughput? Is there some way that won't block on either reading in each line, or on sending it to Cube?
Thirdly - I'm guessing using string splits, and the JS equivalent of contains (IndexOf != -1?) will be a lot faster than regexes? Has anybody had much experience in parsing massive amounts of text data in Node.js?
I searched for a solution to parse very large files (gbs) line by line using a stream. All the third-party libraries and examples did not suit my needs since they processed the files not line by line (like 1 , 2 , 3 , 4 ..) or read the entire file to memory
The following solution can parse very large files, line by line using stream & pipe. For testing I used a 2.1 gb file with 17.000.000 records. Ram usage did not exceed 60 mb.
First, install the event-stream package:
npm install event-stream
Then:
var fs = require('fs')
, es = require('event-stream');
var lineNr = 0;
var s = fs.createReadStream('very-large-file.csv')
.pipe(es.split())
.pipe(es.mapSync(function(line){
// pause the readstream
s.pause();
lineNr += 1;
// process line here and call s.resume() when rdy
// function below was for logging memory usage
logMemoryUsage(lineNr);
// resume the readstream, possibly from a callback
s.resume();
})
.on('error', function(err){
console.log('Error while reading file.', err);
})
.on('end', function(){
console.log('Read entire file.')
})
);
Please let me know how it goes!
You can use the inbuilt readline package, see docs here. I use stream to create a new output stream.
var fs = require('fs'),
readline = require('readline'),
stream = require('stream');
var instream = fs.createReadStream('/path/to/file');
var outstream = new stream;
outstream.readable = true;
outstream.writable = true;
var rl = readline.createInterface({
input: instream,
output: outstream,
terminal: false
});
rl.on('line', function(line) {
console.log(line);
//Do your stuff ...
//Then write to output stream
rl.write(line);
});
Large files will take some time to process. Do tell if it works.
I really liked #gerard answer which is actually deserves to be the correct answer here. I made some improvements:
Code is in a class (modular)
Parsing is included
Ability to resume is given to the outside in case there is an asynchronous job is chained to reading the CSV like inserting to DB, or a HTTP request
Reading in chunks/batche sizes that
user can declare. I took care of encoding in the stream too, in case
you have files in different encoding.
Here's the code:
'use strict'
const fs = require('fs'),
util = require('util'),
stream = require('stream'),
es = require('event-stream'),
parse = require("csv-parse"),
iconv = require('iconv-lite');
class CSVReader {
constructor(filename, batchSize, columns) {
this.reader = fs.createReadStream(filename).pipe(iconv.decodeStream('utf8'))
this.batchSize = batchSize || 1000
this.lineNumber = 0
this.data = []
this.parseOptions = {delimiter: '\t', columns: true, escape: '/', relax: true}
}
read(callback) {
this.reader
.pipe(es.split())
.pipe(es.mapSync(line => {
++this.lineNumber
parse(line, this.parseOptions, (err, d) => {
this.data.push(d[0])
})
if (this.lineNumber % this.batchSize === 0) {
callback(this.data)
}
})
.on('error', function(){
console.log('Error while reading file.')
})
.on('end', function(){
console.log('Read entirefile.')
}))
}
continue () {
this.data = []
this.reader.resume()
}
}
module.exports = CSVReader
So basically, here is how you will use it:
let reader = CSVReader('path_to_file.csv')
reader.read(() => reader.continue())
I tested this with a 35GB CSV file and it worked for me and that's why I chose to build it on #gerard's answer, feedbacks are welcomed.
I used https://www.npmjs.com/package/line-by-line for reading more than 1 000 000 lines from a text file. In this case, an occupied capacity of RAM was about 50-60 megabyte.
const LineByLineReader = require('line-by-line'),
lr = new LineByLineReader('big_file.txt');
lr.on('error', function (err) {
// 'err' contains error object
});
lr.on('line', function (line) {
// pause emitting of lines...
lr.pause();
// ...do your asynchronous line processing..
setTimeout(function () {
// ...and continue emitting lines.
lr.resume();
}, 100);
});
lr.on('end', function () {
// All lines are read, file is closed now.
});
The Node.js Documentation offers a very elegant example using the Readline module.
Example: Read File Stream Line-by-Line
const { once } = require('node:events');
const fs = require('fs');
const readline = require('readline');
const rl = readline.createInterface({
input: fs.createReadStream('sample.txt'),
crlfDelay: Infinity
});
rl.on('line', (line) => {
console.log(`Line from file: ${line}`);
});
await once(rl, 'close');
Note: we use the crlfDelay option to recognize all instances of CR LF ('\r\n') as a single line break.
Apart from read the big file line by line, you also can read it chunk by chunk. For more refer to this article
var offset = 0;
var chunkSize = 2048;
var chunkBuffer = new Buffer(chunkSize);
var fp = fs.openSync('filepath', 'r');
var bytesRead = 0;
while(bytesRead = fs.readSync(fp, chunkBuffer, 0, chunkSize, offset)) {
offset += bytesRead;
var str = chunkBuffer.slice(0, bytesRead).toString();
var arr = str.split('\n');
if(bytesRead = chunkSize) {
// the last item of the arr may be not a full line, leave it to the next chunk
offset -= arr.pop().length;
}
lines.push(arr);
}
console.log(lines);
I had the same problem yet. After comparing several modules that seem to have this feature, I decided to do it myself, it's simpler than I thought.
gist: https://gist.github.com/deemstone/8279565
var fetchBlock = lineByline(filepath, onEnd);
fetchBlock(function(lines, start){ ... }); //lines{array} start{int} lines[0] No.
It cover the file opened in a closure, that fetchBlock() returned will fetch a block from the file, end split to array (will deal the segment from last fetch).
I've set the block size to 1024 for each read operation. This may have bugs, but code logic is obvious, try it yourself.
Reading / Writing files using stream with the native nodejs modules (fs, readline):
const fs = require('fs');
const readline = require('readline');
const rl = readline.createInterface({
input: fs.createReadStream('input.json'),
output: fs.createWriteStream('output.json')
});
rl.on('line', function(line) {
console.log(line);
// Do any 'line' processing if you want and then write to the output file
this.output.write(`${line}\n`);
});
rl.on('close', function() {
console.log(`Created "${this.output.path}"`);
});
Based on this questions answer I implemented a class you can use to read a file synchronously line-by-line with fs.readSync(). You can make this "pause" and "resume" by using a Q promise (jQuery seems to require a DOM so cant run it with nodejs):
var fs = require('fs');
var Q = require('q');
var lr = new LineReader(filenameToLoad);
lr.open();
var promise;
workOnLine = function () {
var line = lr.readNextLine();
promise = complexLineTransformation(line).then(
function() {console.log('ok');workOnLine();},
function() {console.log('error');}
);
}
workOnLine();
complexLineTransformation = function (line) {
var deferred = Q.defer();
// ... async call goes here, in callback: deferred.resolve('done ok'); or deferred.reject(new Error(error));
return deferred.promise;
}
function LineReader (filename) {
this.moreLinesAvailable = true;
this.fd = undefined;
this.bufferSize = 1024*1024;
this.buffer = new Buffer(this.bufferSize);
this.leftOver = '';
this.read = undefined;
this.idxStart = undefined;
this.idx = undefined;
this.lineNumber = 0;
this._bundleOfLines = [];
this.open = function() {
this.fd = fs.openSync(filename, 'r');
};
this.readNextLine = function () {
if (this._bundleOfLines.length === 0) {
this._readNextBundleOfLines();
}
this.lineNumber++;
var lineToReturn = this._bundleOfLines[0];
this._bundleOfLines.splice(0, 1); // remove first element (pos, howmany)
return lineToReturn;
};
this.getLineNumber = function() {
return this.lineNumber;
};
this._readNextBundleOfLines = function() {
var line = "";
while ((this.read = fs.readSync(this.fd, this.buffer, 0, this.bufferSize, null)) !== 0) { // read next bytes until end of file
this.leftOver += this.buffer.toString('utf8', 0, this.read); // append to leftOver
this.idxStart = 0
while ((this.idx = this.leftOver.indexOf("\n", this.idxStart)) !== -1) { // as long as there is a newline-char in leftOver
line = this.leftOver.substring(this.idxStart, this.idx);
this._bundleOfLines.push(line);
this.idxStart = this.idx + 1;
}
this.leftOver = this.leftOver.substring(this.idxStart);
if (line !== "") {
break;
}
}
};
}
node-byline uses streams, so i would prefer that one for your huge files.
for your date-conversions i would use moment.js.
for maximising your throughput you could think about using a software-cluster. there are some nice-modules which wrap the node-native cluster-module quite well. i like cluster-master from isaacs. e.g. you could create a cluster of x workers which all compute a file.
for benchmarking splits vs regexes use benchmark.js. i havent tested it until now. benchmark.js is available as a node-module
import * as csv from 'fast-csv';
import * as fs from 'fs';
interface Row {
[s: string]: string;
}
type RowCallBack = (data: Row, index: number) => object;
export class CSVReader {
protected file: string;
protected csvOptions = {
delimiter: ',',
headers: true,
ignoreEmpty: true,
trim: true
};
constructor(file: string, csvOptions = {}) {
if (!fs.existsSync(file)) {
throw new Error(`File ${file} not found.`);
}
this.file = file;
this.csvOptions = Object.assign({}, this.csvOptions, csvOptions);
}
public read(callback: RowCallBack): Promise < Array < object >> {
return new Promise < Array < object >> (resolve => {
const readStream = fs.createReadStream(this.file);
const results: Array < any > = [];
let index = 0;
const csvStream = csv.parse(this.csvOptions).on('data', async (data: Row) => {
index++;
results.push(await callback(data, index));
}).on('error', (err: Error) => {
console.error(err.message);
throw err;
}).on('end', () => {
resolve(results);
});
readStream.pipe(csvStream);
});
}
}
import { CSVReader } from '../src/helpers/CSVReader';
(async () => {
const reader = new CSVReader('./database/migrations/csv/users.csv');
const users = await reader.read(async data => {
return {
username: data.username,
name: data.name,
email: data.email,
cellPhone: data.cell_phone,
homePhone: data.home_phone,
roleId: data.role_id,
description: data.description,
state: data.state,
};
});
console.log(users);
})();
I have made a node module to read large file asynchronously text or JSON.
Tested on large files.
var fs = require('fs')
, util = require('util')
, stream = require('stream')
, es = require('event-stream');
module.exports = FileReader;
function FileReader(){
}
FileReader.prototype.read = function(pathToFile, callback){
var returnTxt = '';
var s = fs.createReadStream(pathToFile)
.pipe(es.split())
.pipe(es.mapSync(function(line){
// pause the readstream
s.pause();
//console.log('reading line: '+line);
returnTxt += line;
// resume the readstream, possibly from a callback
s.resume();
})
.on('error', function(){
console.log('Error while reading file.');
})
.on('end', function(){
console.log('Read entire file.');
callback(returnTxt);
})
);
};
FileReader.prototype.readJSON = function(pathToFile, callback){
try{
this.read(pathToFile, function(txt){callback(JSON.parse(txt));});
}
catch(err){
throw new Error('json file is not valid! '+err.stack);
}
};
Just save the file as file-reader.js, and use it like this:
var FileReader = require('./file-reader');
var fileReader = new FileReader();
fileReader.readJSON(__dirname + '/largeFile.json', function(jsonObj){/*callback logic here*/});