how to restrict making http calls from aws lambda - node.js

I am creating application which takes nodejs code from the user, and I am creating lambda function on the fly using that code.
eg: The code can be
var http = require('http');
exports.handler = function(event, context) {
console.log('start request to ' + event.url)
http.get('http://##someapi', function(res) {
console.log("Any Response : " + res.statusCode);
}).on('error', function(e) {
console.log("Error from API : " + e.message);
});
console.log('end request to ' + event.url)
context.done(null);
}
But some how I want to restrict http/https calls to be made from that code , as I don't have control on what code will passed by the user.
So is there any way to restrict that, like some sort of ROLE or POLICY or any configuration to achieve that?
I am able to restrict DynamoDB access by specifying Policy in Role. So I have control over db access but not http calls.

Simply prepend the user's code with the following:
(function(){
function onlyAWS (module) {
var isAWS = /amazonaws.com$/i
var orig = module.request
module.request = function restrictedRequest (opts, done) {
if (typeof opts === 'string') opts = require('url').parse(opts)
if (isAWS.test(opts.host || opts.hostname)) {
return orig.call(module, opts, done)
} else {
throw new Error('No HTTP requests allowed')
}
}
}
onlyAWS(require('http'))
onlyAWS(require('https'))
})()

One alternative would be, putting these lambdas in a VPC with restricted Outbound access.

It sounds like funny solution but I found simple solution to my problem. I am adding below code along with code entered by User.
var require = function(){
return "You are not allowed to do this operation";
}
Now if used user tries to include any 3rd party library like required('http') , then it will not allow to instantiate http lib in the node code.
using this solution I am able to block loading all 3rd party library which i don't want User to use in AWS lambda function.
I am still searching for proper solution instead of using that hack in code.

Related

AWS Cloudfront + lambda#edge modify html content (making all links absolute -> relative)

I (maybe falsely) assumed lambda#edge can modify origin.responce content,
so wrote a lambda function like this:
/* this does not work. response.Body is not defined */
'use strict';
exports.handler = (event, context, callback) => {
var response = event.Records[0].cf.response;
var data = response.Body.replace(/OLDTEXT/g, 'NEWTEXT');
response.Body = data;
callback(null, response);
};
Which fails because you can not reference origin responce body with this syntax.
Can I modify this script to make it work as I intended, or maybe should I consider using another service on AWS?
My background :
We are trying to set up an AWS Cloudfront distribution, that consolidates access to several websites, like this:
ttp://foo.com/ -> https:/newsite.com/foo/
ttp://bar.com/ -> https:/newsite.com/bar/
ttp://boo.com/ -> https:/newsite.com/boo/
the sites are currently managed by external parties. We want to disable direct public access to foo/bar/boo, and have just newsite.com as the only site visible on the internet.
Mapping the origins into a single c-f distribution is relatively simple.
however doing so will break html contents that specify files with an absolute url,
if their current domain names are removed from the web.
ttp://foo.com/images/1.jpg
-> (disable foo.com dns)
-> image not found
to benefit from cloudfront caching and other merits,
I want to modify/rewrite all absolute file references in html files to a relative url -
so
<img src="ttp://foo.com/images/1.jpg">
becomes
<img src="/foo/images/1.jpg">
//(accessed as https:/newsite.com/foo/images/1.jpg from a user)
//(maybe I should make it an absolte url for SEO purpose)
(http is changed to ttp, due to restriction of using the banned domain name foo.com)
(edit)
I found this AWS blog, which may be a great hint but feel a little too convoluted to my expectation. (set up a linux container so I can just use sed to process html files, maybe using S3 as a temp storage)
Hope I can find a simpler way:
https://aws.amazon.com/blogs/networking-and-content-delivery/resizing-images-with-amazon-cloudfront-lambdaedge-aws-cdn-blog/
From what I have just learnt myself you unfortunately cannot modify the response body within a Lambda#edge. You can only wipe out or totally replace the body content. I was hoping to be able to clean all responses from a legacy site, but using a Cloudfront Lambda#Edge will not allow this to be done.
As the AWS documentation states here :
When you’re working with the HTTP response, Lambda#Edge does not expose the body that is returned by the origin server to the origin-response trigger. You can generate a static content body by setting it to the desired value, or remove the body inside the function by setting the value to be empty. If you don’t update the body field in your function, the original body returned by the origin server is returned back to viewer.
I ran into the same issue, and have been able to pull some info out of the request headers to piece together a URL from which I can fetch the original body.
Beware: I haven't yet been able to confirm that this is a "safe" method, like maybe it's relying on undocumented behaviour etc, but for now it DOES fetch the original body properly, for me. Of course it also takes another request / round trip, possibly inferring some extra transfer costs, execution time, etc.
const fetchOriginalBody = (request) => {
const host = request['headers']['host'][0]['value']; // xxxx.yyy.com
const uri = request['uri'];
const fetchOriginalBodyUrl = 'https://' + host + uri;
return httpsRequest(fetchOriginalBodyUrl);
}
// Helper that turns https.request into a promise
function httpsRequest(options) {
return new Promise((resolve, reject) => {
const req = https.request(options, (res) => {
if (res.statusCode < 200 || res.statusCode >= 300) {
return reject(new Error('statusCode=' + res.statusCode));
}
var body = [];
res.on('data', function(chunk) {
body.push(chunk);
});
res.on('end', function() {
try {
body = Buffer.concat(body).toString();
// body = JSON.parse(Buffer.concat(body).toString());
} catch(e) {
reject(e);
}
resolve(body);
});
});
req.on('error', (e) => {
reject(e.message);
});
req.end();
});
}
exports.handler = async (event, context, callback) => {
const records = event.Records;
if (records && records.length > 0) {
const request = records[0].cf.request;
const body = await fetchOriginalBody(request);
}
...

How to use the full request URL in AWS Lambda to execute logic only on certain pages

I have a website running on www.mywebsite.com. The files are hosted in an S3 bucket in combination with cloudFront. Recently, I have added a new part to the site, which is supposed to be only for private access, so I wanted to put some form of protection on there. The rest of the site, however, should remain public. My goal is for the site to be accessible for everyone, but as soon as someone gets to the new part, they should not see any source files, and be prompted for a username/password combination.
The URL of the new part would be for example www.mywebsite.com/private/index.html ,...
I found that an AWS Lambda function (with node.js) is good for this, and it kind of works. I have managed to authenticate everything in the entire website, but I can't figure out how to get it to work on only the pages that contain for example '/private/*' in the full URL name. The lambda function I wrote looks like this:
'use strict';
exports.handler = (event, context, callback) => {
// Get request and request headers
const request = event.Records[0].cf.request;
const headers = request.headers;
if (!request.uri.toLowerCase().indexOf("/private/") > -1) {
// Continue request processing if authentication passed
callback(null, request);
return;
}
// Configure authentication
const authUser = 'USER';
const authPass = 'PASS';
// Construct the Basic Auth string
const authString = 'Basic ' + new Buffer(authUser + ':' + authPass).toString('base64');
// Require Basic authentication
if (typeof headers.authorization == 'undefined' || headers.authorization[0].value != authString) {
const body = 'Unauthorized';
const response = {
status: '401',
statusDescription: 'Unauthorized',
body: body,
headers: {
'www-authenticate': [{key: 'WWW-Authenticate', value:'Basic'}]
},
};
callback(null, response);
}
// Continue request processing if authentication passed
callback(null, request);
};
The part that doesn't work is the following part:
if (!request.uri.toLowerCase().indexOf("/private/") > -1) {
// Continue request processing if authentication passed
callback(null, request);
return;
}
My guess is that the request.uri does not contain what I expected it to contain, but I can't seem to figure out what does contain what I need.
My guess is that the request.uri does not contain what I expected it to contain, but I can't seem to figure out what does contain what I need.
If you're using a Lambda#Edge function (appears you are). Then you can view the Request Event structure here: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/lambda-event-structure.html#lambda-event-structure-request
You can see the actual value of the request URI field by using console.log and checking the respective logs in Cloudwatch.
The problem might be this line:
if (!request.uri.toLowerCase().indexOf("/private/") > -1) {
If you're strictly looking to check if a JavaScript string contains another string in it, you probably want to do this instead:
if (!request.uri.toLowerCase().indexOf("/private/") !== -1) {
Or better yet, using more modern JS:
if (!request.uri.toLowerCase().includes("/private/")) {

How to get external api data using inline editor in dialogflow

I've got a Dialogflow agent for which I'm using the Inline Editor (powered by Cloud Functions for Firebase). When I try to get external api data by using request-promise-native I keep getting Ignoring exception from a finished function in my firebase console.
function video(agent) {
agent.add(`You are now being handled by the productivity intent`);
const url = "https://reqres.in/api/users?page=2";
return request.get(url)
.then(jsonBody => {
var body = JSON.parse(jsonBody);
agent.add(body.data[0].first_name)
return Promise.resolve(agent);
});
}
Your code looks correct. The exception in this case might be that you're not using a paid account, so network access outside Google is blocked. You can probably see the exact exception by adding a catch block:
function video(agent) {
agent.add(`You are now being handled by the productivity intent`);
const url = "https://reqres.in/api/users?page=2";
return request.get(url)
.then(jsonBody => {
var body = JSON.parse(jsonBody);
agent.add(body.data[0].first_name)
return Promise.resolve(agent);
})
.catch(err => {
console.error('Problem making network call', err);
agent.add('Unable to get result');
return Promise.resolve(agent);
});
}
(If you do this, you may want to update your question with the exact error from the logs.)
Inline Editor uses Firebase. If you do not have a paid account with Firebase, you will not be able to access external APIs.

CKAN resource_create API

Am trying to read from list of files placed in a location using NodeJS and create resources for those files in CKAN.
Am using CKAN API v3 (/api/3) to create the resource.
NodeJS library used for iterating through files in a location : filehound
CKAN API Used : /api/3/action/resource_create
Code to iterate over files (please don't run it; it's only a sample snippet) :
// file operations helper
const fileHound = require("filehound");
// filesystem
const fs = require("fs");
// to make http requests
const superagent = require("superagent");
var basePath = "/test-path", packageId = "8bf37d22-0c25-40c0-8faa-7de12ff927f5";
var files = fileHound.create()
.paths(basePath)
.find();
files.then(function (fileNames) {
if (Array.isArray(fileNames)){
for (var fileName of fileNames) {
superagent
.post("http://test-ckan-domain.com/api/3/action/resource_create")
.set("Authorization", "78d5d219-37de-41f7-9443-188bc564051e")
.field("package_id", packageId)
.field("name", fileName)
.field("format", "CSV")
.attach("upload", fs.createReadStream(fileName))
.end(function (err, res) {
if (!err) console.log(fileName ,"Status Code ", res.statusCode);
else console.log("Error ", err);
});
}
}
});
After iterating through the files and uploading them to CKAN by creating a new resource, CKAN responds back with "success: true" and "id" of the resource created with HTTP status code 200.
However, only a few resource(s) are created and few are not. Am using CKAN's frontend to verify if the resource was created under a package/dataset (As the response from the Resource Creation API is successful).
Is it a known issue with CKAN? or am i going wrong in my code?
You've probably solved this by now, but given that some packages are just missing but you only get 200 success messages, I'm wondering if your for loop just isn't working correctly. Be careful with the syntax; I think declaring var filename inside the loop like that might be wrong.
https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Statements/for...of

check on server side if youtube video exist

How to check if youtube video exists on node.js app server side:
var youtubeId = "adase268_";
// pseudo code
youtubeVideoExist = function (youtubeId){
return true; // if youtube video exists
}
You don't need to use the youtube API per-se, you can look for the thumbnail image:
Valid video = 200 - OK:
http://img.youtube.com/vi/gC4j-V585Ug/0.jpg
Invalid video = 404 - Not found:
http://img.youtube.com/vi/gC4j-V58xxx/0.jpg
I thought I could make this work from the browser since you can load images from a third-party site without security problems. But testing it, it's failing to report the 404 as an error, probably because the content body is still a valid image. Since you're using node, you should be able to look at the HTTP response code directly.
I can't think of an approach that doesn't involve making a separate HTTP request to the video link to see if it exists or not unless you know beforehand of a set of video IDs that are inactive,dead, or wrong.
Here's an example of something that might work for you. I can't readily tell if you're using this as a standalone script or as part of a web server. The example below assumes the latter, assuming you call a web server on /video?123videoId and have it respond or do something depending on whether or not the video with that ID exists. It uses Node's request library, which you can install with npm install request:
var request = require('request');
// Your route here. Example on what route may look like if called on /video?id=123videoId
app.get('/video', function(req, response, callback){
var videoId = 'adase268_'; // Could change to something like request.params['id']
request.get('https://www.youtube.com/watch?v='+videoId, function(error, response, body){
if(response.statusCode === 404){
// Video doesn't exist. Do what you need to do here.
}
else{
// Video exists.
// Can handle other HTTP response codes here if you like.
}
});
});
// You could refactor the above to take out the 'request.get()', wrap it in a function
// that takes a callback and re-use in multiple routes, depending on your problem.
#rodrigomartell is on the right track, in that your check function will need to make an HTTP call; however, just checking the youtube.com URL won't work in most cases. You'll get back a 404 if the videoID is a malformed ID (i.e. less than 11 characters or using characters not valid in their scheme), but if it's a properly formed videoID that just happens to not correspond to a video, you'll still get back a 200. It would be better to use an API request, like this (note that it might be easier to use the request-json library instead of just the request library):
request = require('request-json');
var client = request.newClient('https://www.googleapis.com/youtube/v3/');
youtubeVideoExist = function (youtubeId){
var apikey ='YOUR_API_KEY'; // register for a javascript API key at the Google Developer's Console ... https://console.developers.google.com/
client.get('videos/?part=id&id='+youtubeId+'&key='+apikey, function(err, res, body) {
if (body.items.length) {
return true; // if youtube video exists
}
else {
return false;
}
});
};
Using youtube-feeds module. Works fast (~200ms) and no need API_KEY
youtube = require("youtube-feeds");
existsFunc = function(youtubeId, callback) {
youtube.video(youtubeId, function(err, result) {
var exists;
exists = result.id === youtubeId;
console.log("youtubeId");
console.log(youtubeId);
console.log("exists");
console.log(exists);
callback (exists);
});
};
var notExistentYoutubeId = "y0srjasdkfjcKC4eY"
existsFunc (notExistentYoutubeId, console.log)
var existentYoutubeId = "y0srjcKC4eY"
existsFunc (existentYoutubeId, console.log)
output:
❯ node /pathToFileWithCodeAbove/FileWithCodeAbove.js
youtubeId
y0srjcKC4eY
exists
true
true
youtubeId
y0srjasdkfjcKC4eY
exists
false
false
All you need is to look for the thumbnail image. In NodeJS it would be something like
var http = require('http');
function isValidYoutubeID(youtubeID) {
var options = {
method: 'HEAD',
host: 'img.youtube.com',
path: '/vi/' + youtubeID + '/0.jpg'
};
var req = http.request(options, function(res) {
if (res.statusCode == 200){
console.log("Valid Youtube ID");
} else {
console.log("Invalid Youtube ID");
}
});
req.end();
}
API_KEY is not needed. It is quite fast because there is only header check for statusCode 200/404 and image is not loaded.

Resources