Firebase function always timeout on large files? - node.js

I have created a firebase function that is triggered when a video is uploaded to the firebase storage and by using ffmpeg it add a watermark to it, it works fine with small video sizes but it always timeout in large ones. Any idea how I can overcome these limits
const functions = require('firebase-functions');
const { Storage, Bucket } = require('#google-cloud/storage');
const projectId = 'video-sharing-a57fa';
const admin = require('firebase-admin');
admin.initializeApp();
let gcs = new Storage({
projectId
});
const os = require('os');
const path = require('path');
const spawn = require('child-process-promise').spawn;
exports.addLogo = functions.runWith({ memory: '4GB', timeoutSeconds: 540 }).storage.object().onFinalize(async event => {
const bucket = event.bucket;
const contentType = event.contentType;
const filePath = event.name;
console.log('File change detected, function execution started');
if (path.basename(filePath).startsWith('resized-')) {
console.log('We already renamed that file!');
return;
}
const destBucket = gcs.bucket(bucket);
const tmpFilePath = path.join(os.tmpdir(), path.basename(filePath));
const metadata = { contentType: contentType };
const tmpLogoPath = path.join(os.tmpdir(), 'watermark.png');
await destBucket.file('watermark.png').download({
destination: tmpLogoPath
})
const newPath = path.join(os.tmpdir(), 'output.mp4')
return destBucket.file(filePath).download({
destination: tmpFilePath
}).then(() => {
console.log('entered spawn');
var str = "overlay=10:10"
return spawn('ffmpeg', ['-i', tmpFilePath, '-i', tmpLogoPath, '-filter_complex', str, newPath]);
}).then(() => {
console.log('chaning the name');
return destBucket.upload(newPath, {
destination: path.dirname(filePath) + '/resized-' + path.basename(filePath),
metadata: metadata
})
});
})

Cloud functions have a limited time for execution, it is limited to 9 mins max. More information here. Most likely the problem is that ffmpeg does not manage to add the watermark in time. Your actions should be:
Check the log of the function to confirm that this is exactly the error firebase functions:log --only <FUNCTION_NAME>
Consider different a different architecture option for processing really large files:
a. Limit the amount of data ffmpeg processes, e.g. with -ss 50 -t 10. In this scenario, there will the following architecture: a) one function that read files and put them into a queue, b) one function that reads the size of the file and puts the data into another queue, e.g. {name: "file1.mp4", start: 10, duration: 15}
b. Use an on-demand container such as Cloud Run
c. Use App Engine in case you are constantly processing some files

Related

Firebase function: Error: unable to open for write

so I was trying to implement a firebase function. I went to firebase functions repository example and copied it. Everything is working properly "Deploy complete!" with no signs of an error. However, when I'm trying to upload image to the firebase store, firebase functions can't open it?
There is a code that I used:
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp()
const {Storage} = require("#google-cloud/storage");
const gcs = new Storage();
const path = require('path');
const os = require('os');
const fs = require('fs');
const sharp = require("sharp");
exports.generateThumbnail = functions.storage.object().onFinalize(async (object) => {
const fileBucket = object.bucket; // The Storage bucket that contains the file.
const filePath = object.name; // File path in the bucket.
const contentType = object.contentType; // File content type.
const metageneration = object.metageneration; // Number of times metadata has been generated. New objects have a value of 1.
if (!contentType.startsWith('image/')) {
return console.log('This is not an image.');
}
const fileName = path.basename(filePath);
if (fileName.startsWith('thumb_')) {
return console.log('Already a Thumbnail.');
}
const bucket = admin.storage().bucket(fileBucket);
const tempFilePath = path.join(os.tmpdir(), fileName);
console.log('Created temporary path',tempFilePath);
const metadata = {
contentType: contentType,
};
await bucket.file(filePath).download({destination: tempFilePath});
console.log('Image downloaded locally to', tempFilePath);
const thumbFileName = `thumb_${fileName}`;
const thumbFilePath = path.join(path.dirname(filePath), thumbFileName);
console.log('Created thumb path',tempFilePath);
const size = 200;
/*await*/ sharp(tempFilePath).resize(size,size).toFile(thumbFilePath);
await bucket.upload(tempFilePath, {
destination: filePath,
metadata: metadata,
});
return fs.unlinkSync(tempFilePath);
});
Error:
Cloud Functions has a read-only filesystem except for the /tmp directory. You have to make sure you are writing your data to a path /tmp/your-file
The only writeable part of the filesystem is the /tmp directory, which
you can use to store temporary files in a function instance. This is a
local disk mount point known as a "tmpfs" volume in which data written
to the volume is stored in memory. Note that it will consume memory
resources provisioned for the function.
Cloud Functions Execution Environment
I guess that this will work also in Firebase, else please comment:
gcsfs
If you put gcsfs in the "requirements.txt" and import gcsfs in the Python code, you can use the module like this (as an example taken from Have a look at an example for saving a csv:
fs = gcsfs.GCSFileSystem(project=MY_PROJECT)
fs.ls(BUCKET_NAME)
# or choose 'w' here:
with fs.open(filepath, 'wb') as outcsv:
...
Further links:
How to open a file from google cloud storage into a cloud function
https://gcsfs.readthedocs.io/en/latest/index.html#examples

Upload and unzip .zip file from E-Mail in Google Cloud Functions deleting .zip after completion nodeJS

I am receiving an E-Mail containing one single .zip file. Right now im trying to upload it to GCS and handle (Bulk-Decompress) the file into its own folder. Running a Bulk Dataflow for such a small action seems overkill to me.
I was thinking about using the "unzip-stream" package but until now, i did not come up with an efficient solution to my problem.
Is it even possible to process a file like this in a Cloud-Function? Or is there no way without a dedicated server handling decompression and then uploading the content into GCS?
Heres my code:
const os = require('os');
const fs = require('fs');
const { Storage } = require('#google-cloud/storage');
const storage = new Storage();
// Node.js doesn't have a built-in multipart/form-data parsing library.
// Instead, we can use the 'busboy' library from NPM to parse these requests.
const Busboy = require('busboy');
exports.uploadZIPFile = (req, res) => {
if (req.method !== 'POST') {
// Return a "method not allowed" error
return res.status(405).end();
}
const busboy = new Busboy({ headers: req.headers });
const tmpdir = os.tmpdir();
for (a in req.headers) {
console.log(`Header: ${a}`);
}
// This object will accumulate all the fields, keyed by their name
const fields = {};
// This code will process each non-file field in the form.
busboy.on('field', (fieldname, val) => {
// TODO(developer): Process submitted field values here
console.log(`Processed field ${fieldname}: ${val}.`);
fields[fieldname] = val;
});
// This object will accumulate all the uploaded files, keyed by their name.
const uploads = {};
const fileWrites = [];
// This code will process each file uploaded.
busboy.on('file', (fieldname, file, filename) => {
// Note: os.tmpdir() points to an in-memory file system on GCF
// Thus, any files in it must fit in the instance's memory.
console.log(`Processed file ${filename} - ${fieldname} - ${file}`);
const filepath = path.join(tmpdir, filename);
uploads[fieldname] = filepath;
const writeStream = fs.createWriteStream(filepath);
file.pipe(writeStream);
// File was processed by Busboy; wait for it to be written to disk.
const promise = new Promise((resolve, reject) => {
file.on('end', () => {
writeStream.end();
});
writeStream.on('finish', resolve);
writeStream.on('error', reject);
});
fileWrites.push(promise);
});
// Triggered once all uploaded files are processed by Busboy.
// We still need to wait for the disk writes (saves) to complete.
busboy.on('finish', async () => {
await Promise.all(fileWrites);
// TODO(developer): Process saved files here
for (const file in uploads) {
async function upload2bucket() {
// Uploads a local file to the bucket
const bucketName = 'myBucket';
const todayDate = new Date().toISOString().slice(0, 10);
await storage.bucket(bucketName).upload(uploads[file], {
// Support for HTTP requests made with `Accept-Encoding: gzip`
gzip: true,
// By setting the option `destination`, you can change the name of the
// object you are uploading to a bucket.
destination:
'zip-inbox/' + todayDate + '_' + uploads[file].substring(5),
metadata: {
// Enable long-lived HTTP caching headers
// Use only if the contents of the file will never change
// (If the contents will change, use cacheControl: 'no-cache')
cacheControl: 'no-cache',
},
});
console.log(`${filename} uploaded to ${bucketName}.`);
}
if (uploads[file].endsWith('.zip')) {
await upload2bucket();
}
console.log(`${file}: ${uploads[file]}`);
//fs.unlinkSync(file);
}
res.status(200).send('Success');
});
busboy.end(req.rawBody);
};
Cloud Functions are great for small tasks, which take little time. When you have a similar requirement - you want to spin an instance for a unit of work only when needed, but cannot predict the time to execute it, I suggest using Cloud Run, have a look at this use case
You may replace your "do-everything" cloud function with several functions and create a pipeline with PubSub messages or with Google Workflows (released in Jan, 2021).
I also recommend to use firebase-functions package to simplify cloud functions definitions.
Pipeline could be like this:
Create and schedule a function which gets attachments from the mailbox and uploads them to Google Storage. After each attachment has been uploaded it sends a PubSub message with the filepath payload to the next step.
Create another function which executes on message from PubSub and unpacks uploaded earlier file.
Note: a function can execute for not more than 9 minutes. Split the function if it takes longer than the limit.
Here is a pseudo code for the proposed pipeline:
import * as functions from 'firebase-functions';
import {PubSub} from '#google-cloud/pubsub';
const REGION = 'your-google-cloud-region'; // e.g. europe-west1
const PUBSUB_TOPIC_UNPACK_ZIP = 'your-topic-name-for-step-2';
/**
* Step 1.
*/
exports.getFromEmailAndUploadToStorage = functions
.region(REGION)
.runWith({
memory: '256MB',
timeoutSeconds: 540, // Max execution duration is 9 minutes
maxInstances: 1
})
.pubsub
.schedule('every 15 minutes')
.timeZone('Asia/Vladivostok')
.onRun(async _ => {
const filepath = await yourLogicToFetchEmailAndUploadToStorage();
if (filepath) {
const messagePayload = {filepath};
console.log('Publishing message to the topic:', PUBSUB_TOPIC_UNPACK_ZIP);
console.log('Payload:', JSON.stringify(messagePayload));
await new PubSub().topic(PUBSUB_TOPIC_UNPACK_ZIP).publishMessage({
json: messagePayload
});
}
});
/**
* Step 2.
*/
exports.unzipFilesInStorage = functions
.region(REGION)
.runWith({
memory: '256MB',
timeoutSeconds: 540, // Max execution duration is 9 minutes
maxInstances: 1
})
.pubsub
.topic(PUBSUB_TOPIC_UNPACK_ZIP )
.onPublish(async (message, context) => {
const {filepath} = message?.json;
if (!filepath) {
throw new Error('filepath is not set.');
}
// your unzip logic here.
// after unzipping you can pipe the results further via PubSub messages.
});

Error when trying to count number of files in a specific directory using event driven Google Cloud Functions Node JS 10

I get the following error when using file.length in a Google Function using Node JS 10:
textPayload: "TypeError [ERR_INVALID_ARG_TYPE]: The "path" argument
must be one of type string, Buffer, or URL. Received type object"
My current code is as follows:
const {Storage} = require('#google-cloud/storage');
const {path} = require('path');
var fs = require('fs');
exports.copyRenders = (event, context) => {
const gcsEvent = event;
const sourcePathOnly = gcsEvent.name
const sourceFolder = sourcePathOnly.split('/').slice(-2)
fs.readdir(sourceFolder, (err, files) => {
console.log(files.length);
//console.log(`Files are: ${sourceFolder}`);
});
}
What am I doing wrong here?
Thanks
Try adding .join("") to make your sourceFolder constant a String, it seems to me you made it an Array with .slice
exports.copyRenders = (event, context) => {
const gcsEvent = event;
const sourcePathOnly = gcsEvent.name
const sourceFolder = sourcePathOnly.split('/').slice(-2)
.join("") // <- here
fs.readdir(sourceFolder, (err, files) => {
//console.log(files.length);
console.log(`Files are: ${sourceFolder}`);
});
}
In Cloud Storage there are no folders here It is explained how directories work, as they are only a longer names for the objects that include directory/subdirectory/objectName.
Therefor the usual ways to get all the elements in a directory doesn't work, here is an example of listing objects on a bucket and the way to list object that share a prefix (that are on the same directory).
Here is how you can get the number of files that share a prefix on a bucket.
async function countFilesByPrefix(bucketName, prefix) {
// [START storage_list_files_with_prefix]
// Imports the Google Cloud client library
const storage = require('#google-cloud/storage')();
const options = {
prefix: prefix,
};
const [files] = await storage.bucket(bucketName).getFiles(options);
console.log(` Number of Files ${ files.length}`);
}

Check if image exists at storage with firebase cloud function

i need your help with one function that i createed to manipulate the images that my users send to my app.
What i need is get the image that the user sent, resize and check if the image was changed to avoid the function to do all again. The examples that i saw change the image name and check if the beggined if the name is equals with the name set, but in my case i need keep the original name of the picture, So, how can i do that? Or exists a better way to solve this problem?
My function code is:
import * as functions from 'firebase-functions';
import * as Storage from '#google-cloud/storage';
const gcs = new Storage();
import { tmpdir } from 'os';
import { join, dirname } from 'path';
import * as sharp from 'sharp';
import * as fs from 'fs-extra';
export const generateThumbs = functions.storage
.object()
.onFinalize(async object => {
const bucket = gcs.bucket(object.bucket);
const filePath = object.name;
const fileName = filePath.split('/').pop();
const bucketDir = dirname(filePath);
const workingDir = join(tmpdir(), 'thumbs');
const tmpFilePath = join(workingDir, 'source.png');
if (!object.contentType.includes('image')) {
console.log('exiting function');
return false;
}
// 1. Ensure thumbnail dir exists
await fs.ensureDir(workingDir);
// 2. Download Source File
await bucket.file(filePath).download({
destination: tmpFilePath
});
// 3. Resize the images and define an array of upload promises
const sizes = [64, 128, 256];
const uploadPromises = sizes.map(async size => {
const thumbName = `thumb#${size}_${fileName}`;
const thumbPath = join(workingDir, thumbName);
// Resize source image
await sharp(tmpFilePath)
.resize(size, size)
.toFile(thumbPath);
// Upload to GCS
return bucket.upload(thumbPath, {
destination: join(bucketDir, thumbName)
});
});
// 4. Run the upload operations
await Promise.all(uploadPromises);
// 5. Cleanup remove the tmp/thumbs from the filesystem
return fs.remove(workingDir);
});
If you need to overwrite the original file, and you want to avoid an infinite loop with this function, you could attach custom metadata to the file when you upload it back to Cloud Storage. Then, when the function is invoked again for that file, you can check the metadata on the incoming ObjectMetadata object to know when the function should bail out without making any more changes.

Can I zip files in Firebase Storage via Firebase Cloud Functions?

Is it possible to compress multiple files in Firebase Storage using Cloud Functions?
For example, there are 5 images uploaded by users and Firebase Cloud Functions will create a zip file for these 5 images
Could not find e2e guide for similar scenario in functions myself, so had to combine solutions for zipping, accessing files in cloud storage etc. See result below:
import * as functions from 'firebase-functions';
import admin from 'firebase-admin';
import archiver from 'archiver';
import { v4 as uuidv4 } from 'uuid';
export const createZip = functions.https.onCall(async () => {
const storage = admin.storage();
const bucket = storage.bucket('bucket-name');
// generate random name for a file
const filePath = uuidv4();
const file = bucket.file(filePath);
const outputStreamBuffer = file.createWriteStream({
gzip: true,
contentType: 'application/zip',
});
const archive = archiver('zip', {
gzip: true,
zlib: { level: 9 },
});
archive.on('error', (err) => {
throw err;
});
archive.pipe(outputStreamBuffer);
// use firestore, request data etc. to get file names and their full path in storage
// file path can not start with '/'
const userFilePath = 'user-file-path';
const userFileName = 'user-file-name';
const userFile = await bucket.file(userFilePath).download();
archive.append(userFile[0], {
name: userFileName, // if you want to have directory structure inside zip file, add prefix to name -> /folder/ + userFileName
});
archive.on('finish', async () => {
console.log('uploaded zip', filePath);
// get url to download zip file
await bucket
.file(filePath)
.getSignedUrl({ expires: '03-09-2491', action: 'read' })
.then((signedUrls) => console.log(signedUrls[0]));
});
await archive.finalize();
});

Resources