upload to s3 with fileS buffer - node.js

I am trying to upload to s3 with bulk files.
Somehow if I am uploading with callback, it'll work properly but I want to push all the return data into an array then do something after. But it doesn't work.
I looked online, I was saw answers such as using async await or recurssive would work but still it's not working though. I even tried using reduce but no luck too
example of my reduce
return files.reduce((accumulator, current) => {
const {path, buffer} = current;
const s3 = new AWS.S3();
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#putObject-property
s3.putObject(awsS3sdkParams(path, buffer), function ( err, data ) {
const { protocol, host } = this.request.httpRequest.endpoint;
data.params = this.request.params;
data.params.url = `${protocol}//${host}/${data.params.Key}`;
return [...accumulator, data];
});
}, []);
example using recurrsive
const result = [];
const helper = (files) => {
const {path, buffer} = files[0];
const s3 = new AWS.S3();
// https://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#putObject-property
s3.putObject(UploadService.awsS3sdkParams(path, buffer), function (err, data){
const { protocol, host } = this.request.httpRequest.endpoint;
data.params = this.request.params;
data.params.url = `${protocol}//${host}/${data.params.Key}`;
UtilsService.clDebug(data, 'data');
result.push(data);
files.shift();
if(files.length > 0) return helper(files);
});
};
helper(files);
return results;
example using promise
const result = [];
for(let {path, buffer} of files){
const s3 = new AWS.S3();
s3.putObject(awsS3sdkParams(path, buffer)).promise()
.then(file => {
result.push(file);
})
.catch(err => {
console.log(err, 'errs');
});
}
I can pretty much understand why result is always [] but how can I make it work though?
Reason why I cannot use async await is because I tried but then somehow after files are either uploaded with bad data that I cannot even open the file, or keys would be the same...
Does anyone has any other suggestions or advice?
Thanks in advance for any

Related

Downloading many images with node.js + axios stops downloading suddenly after a while with no errors

its that time again when I'm clueless & come humbly to ask for help!
I am trying to download 4500 images at once, average 1mb size, all the images get created & download starts, after about 2gb downloaded (so half) some images are complete, some partial, some empty, task manager confirms the download stops suddenly.
What could possibly be the issue? No matter how much I wait, nothing happens, at least if I got an error I would try something else...
Please advice if possible, thank you!
//get all json files from a folder
const fs = require("fs");
const path = require("path");
const axios = require("axios");
let urlsArray = [];
const collection = "rebels";
const folder = collection + "_json";
const getFiles = (folder) => {
const directoryPath = path.join(__dirname, folder);
return fs.readdirSync(directoryPath);
};
const files = getFiles(folder);
//inside the folder there are json files with metadata
//for each json file parse it and get the image url
files.forEach((file) => {
const filePath = path.join(__dirname, folder, file);
const fileContent = fs.readFileSync(filePath, "utf8");
const parsedJson = JSON.parse(fileContent);
const imageFromMetadata = parsedJson.image;
const url = imageFromMetadata.replace("ipfs://", "https://ipfs.io/ipfs/");
let nr = file.replace(".json", "");
urlsArray.push({ url, nr });
});
//foreach url create a promise to download with axios
const downloadImage = (url, nr) => {
const writer = fs.createWriteStream(
process.cwd() + `/${collection}_images2/${nr}.png`
);
return axios({
url,
method: "GET",
responseType: "stream",
}).then((response) => {
return new Promise((resolve, reject) => {
response.data.pipe(writer);
writer.on("finish", resolve);
writer.on("error", reject);
});
});
};
const promiseAll = async () => {
const promises = urlsArray.map((data) => {
console.log(`trying to download image nr ${data.nr} from ${data.url}`);
return downloadImage(data.url, data.nr);
});
await Promise.allSettled(promises);
};
promiseAll();
//download all
Since Promise.allSettled() never rejects, nothing in your code will report on any rejected promises that it sees. So, I'd suggest you iterate its results and see if you have any rejected promises there.
You can do that like this:
const results = await Promise.allSettled(promises);
console.log(`results.length = ${results.length}`);
for (const r of results) {
if (r.status === "rejected") {
console.log(r.reason);
}
}
console.log("all done");
This will verify that you got through the end of the Promise.allSettled(promises) and will verify that you got non-zero results and will log any rejected promises you got.

generated sitemaps are corrupted using sitemap library for node/js

I'm using a library called sitemap to generate files from an array of objects constructed during runtime. My goal is to upload these generated sitemaps to an S3 bucket.
So far, the function is hosted on AWS lambda and uploading generated files correctly to the bucket.
My problem is that, the generated sitemaps are corrupted. When I run the function locally, they get generated correctly without any issues.
Here's my handler:
module.exports.handler = async () => {
try {
console.log("inside handler....");
await clearGeneratedSitemapsFromTmpDir();
const sms = new SitemapAndIndexStream({
limit: 10000,
getSitemapStream: (i) => {
const sitemapStream = new SitemapStream({
lastmodDateOnly: true,
});
const linkPath = `/sitemap-${i + 1}.xml`;
const writePath = `/tmp/${linkPath}`;
sitemapStream.pipe(createWriteStream(resolve(writePath)));
return [new URL(linkPath, hostName).toString(), sitemapStream];
},
});
const data = await generateSiteMap();
sms.pipe(createWriteStream(resolve("/tmp/sitemap-index.xml")));
// data.forEach((item) => sms.write(item));
Readable.from(data).pipe(sms);
sms.end();
await uploadToS3();
await clearGeneratedSitemapsFromTmpDir();
} catch (error) {
console.log("🚀 ~ file: index.js ~ line 228 ~ exec ~ error", error);
Sentry.captureException(error);
}
};
The data variable has an array of around 11k items, so according to the code above, two sitemap files would be generated(first 10k, rest to second sitemap) in addition to a sitemap index where it lists the two generated sitemaps.
Here's my uploadToS3 function:
const uploadToS3 = async () => {
try {
console.log("uploading to s3....");
const files = await getGeneratedXmlFilesNames();
for (let i = 0; i < files.length; i += 1) {
const file = files[i];
const filePath = `/tmp/${file}`;
// const stream = createReadStream(resolve(filePath));
const fileRead = await readFileAsync(filePath, { encoding: "utf-8" });
const params = {
Body: fileRead,
Key: `${file}`,
ACL: "public-read",
ContentType: "application/xml",
ContentDisposition: "inline",
};
// const result = await s3Client.upload(params).promise();
const result = await s3Client.putObject(params).promise();
console.log(
"🚀 ~ file: index.js ~ line 228 ~ uploadToS3 ~ result",
result
);
}
} catch (error) {
console.log("uploadToS3 => error", error);
// Sentry.captureException(error);
}
};
And here's the function that cleans up the generated files from lambda's /tmp directory after upload to S3:
const clearGeneratedSitemapsFromTmpDir = async () => {
try {
console.log("cleaning up....");
const readLocalTempDirDir = await readDirAsync("/tmp");
const xmlFiles = readLocalTempDirDir.filter((file) =>
file.includes(".xml")
);
for (const file of xmlFiles) {
await unlinkAsync(`/tmp/${file}`);
console.log("deleting file....");
}
} catch (error) {
console.log(
"🚀 ~ file: index.js ~ line 207 ~ clearGeneratedSitemapsFromTmpDir ~ error",
error
);
}
};
My hunch is that the issue is related to streams as I haven't fully understood them yet.
Any help here is highly appreciated.
Side note: I tried to sleep for 10s before uploading, but that didn't work either.
As a workaround, I did this:
const data = await generateSiteMap();
const logger = createWriteStream(resolve("/tmp/all-urls.json.txt"), {
flags: "a",
});
data.forEach((el) => {
logger.write(JSON.stringify(el));
logger.write("\n");
});
logger.end();
const stream = lineSeparatedURLsToSitemapOptions(
createReadStream(resolve("/tmp/all-urls.json.txt"))
)
.pipe(sms)
.pipe(createWriteStream(resolve("/tmp/sitemap-index.xml")));
await new Promise((fulfill) => stream.on("finish", fulfill));
await uploadToS3();
await clearGeneratedSitemapsFromTmpDir();
Will keep question open in case somebody answers it correctly.

Retrieve the attributes from a file using nodejs?

Hi I'm looking for an nodejs code which would probably return the attributes of each file in a folder. I developed the code to retrieve all the file name in a folder and another code to list data's of filename provide by us. But actually I need to return all the files names in a folder with its column name. I'm new to nodejs so someone help me please.
LISTING DATA CODE:
const AWS = require('aws-sdk');
const neatCsv = require('neat-csv');
var s3 = new AWS.S3({});
exports.handler = (event,context,callback)=>{
const params = {
Bucket:'ml-framework-api',
Key: wavicle.csv
};
s3.getObject(params,async(err, result) => {
if (err){
return console.error(err);
}
neatCsv(result.Body).then((parsedData) => {
callback(null,parsedData);
})
})
}
LISTING FILE IN S3:
const AWS = require('aws-sdk')
const s3 = new AWS.S3({
accessKeyId:'-------------',
secretAccessKey:'-------------------',
region:'ap-south-1'
})
const params = {
Bucket:'wavicle'
}
s3.listObjects(params,(err,data)=>{
if(err){
return console.log(err)
}
console.log(data)
})
It's best to start with node's file system api documentation.
Here is a simple example of how to get information about files of a folder (there are many ways, this is quickly from the example in the documentation above):
const fsp = require("fs/promises");
async function dirFilesInfo(path) {
const dir = await fsp.opendir(path);
for await (const dirEntry of dir) {
const fileInfo = await fsp.stat("./" + dirEntry.name);
console.log(dirEntry.name, fileInfo);
}
}
dirFilesInfo("./").catch(console.error);

How to download much files in a loop in node js?

i take a first steps with node.js and i don't understand async code.
I would to make a code who downloads 1k files from links.
In my code i used package "node-downloader-helper".
and this package work to one or 2 files.
when loop is bigger downloads max 70 files and all don't working correctly
all are downloaded incorrectly.
Its my code:
let uniq = [array with 1000 links];
async.forEachOf(uniq, function (value, key, callback) {
const dl = new DownloaderHelper(value, 'E:/XAMPP/htdocs/mydownloads/pdf/',{fileName: key+".pdf"});
dl.on('end', () => console.log('Download Completed'))
dl.start();
}, function (err) {
if (err) console.error(err.message);
});
enter image description here
solution
const save = async (link, index) => {
const dl = new DownloaderHelper(link, linkToDirectory)
await dl.start();
}
const forLoop = async _ => {
console.log('Start')
for (let index = 0; index < uniq.length; index++) {
const link = uniq[index]
const response = await save(link, index)
console.log(response)
}
console.log('End')
}
forLoop();

How to read a txt file, stored in a google storage bucket, from a cloud function, in node

I've written some node.js code which is sitting in the source of a Cloud Function. When it runs I want to read a text file from a google storage bucket, and process it.
The Code runs fine when running locally, but for some reason doesn't work when running in the Cloud Function. I would expect something written out from the console logs.
I can't see any errors, as I thought it might be a permissions problem (might be looking in the wrong place though).
Any ideas?
The awaits and async's were just because I wanted it to wait for the response before continuing, but that seems to have no effect on it either.
const fileName = 'testData.txt';
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('my_bucket_name');
const remoteFile = bucket.file(fileName);
await remoteFile.download(async function(err, contents) {
console.log("file err: "+err);
console.log("file data: "+contents);
});
What you can do is to verify that the runtime account for the function has the necessary permissions to access the bucket. In general the runtime account is PROJECT_ID#appspot.gserviceaccount.com and add at least the Storage Object Viewer (you can check more roles here).
Then, test the function again. If something goes wrong, please check the logs of the function.
EDIT
Not sure, but maybe seems to be something with the code. I've used the following to test the function and works perfect:
index.js:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const bucket = storage.bucket('bucket_name');
const fileName = 'test.txt';
const remoteFile = bucket.file(fileName);
exports.helloWorld = (req, res) => {
console.log('Reading File');
var archivo = remoteFile.createReadStream();
console.log('Concat Data');
var buf = '';
archivo.on('data', function(d) {
buf += d;
}).on('end', function() {
console.log(buf);
console.log("End");
res.send(buf);
});
};
package.json:
{
"name": "sample-http",
"version": "0.0.1",
"dependencies": {
"#google-cloud/storage": "^4.7.0"
}
}
async function readStorageFile(obj){
try{
obj.Result = ''
if (obj.filename===undefined) return
bucket = gcs.bucket('gs:yourinfo');
//Get File
await bucket.file(obj.filename).download()
.then(async data => {
obj.data = data
obj.Result = 'OK'
return
})
.catch(err => {
return
})
return
}
catch (err){
return
}
}
obj = {filename:'TEST1.json'}
await readStorageFile(obj,'testing')
if (obj.Result==='OK') { console.log('obj.data='+obj.data) }
else { console.log('Not Found')}
return

Resources