Amazon sp-API aws signature issue - signature

AMAZON SP-API AWS SIGNATURE The request signature we calculated does not match the signature you provided. Check your AWS Secret Access Key and signing method. Consult the service documentation for details
public function getOrdersFromApi(){
// Configuration values
$host = 'sellingpartnerapi-eu.amazon.com';
$accessKey = 'XXXXX';
$secretKey = 'XXXXXX';
$region = 'eu-west-1';
$service = 'execute-api';
/**
* You should modify the script
* for
* 1. full request url
* 2. uri for AWS signature
* 3. request method GET / POST / PUT
* 4. actual data of the request
* and call the above functions
*/
$requestUrl = 'https://sellingpartnerapi-eu.amazon.com/orders/v0/orders?MarketplaceIds=A21TJRUUN4KGV&CreatedAfter=2021-01-15T05:26:31.308992';
$uri = '?MarketplaceIds=A21TJRUUN4KGV&CreatedAfter=2021-01-15T05:26:31.308992';
$httpRequestMethod = 'GET';
$data = json_encode(array());
$headers = $this->calcualteAwsSignatureAndReturnHeaders($host, $uri, $requestUrl,
$accessKey, $secretKey, $region, $service,
$httpRequestMethod, $data, TRUE);
$result = $this->callToAPI($requestUrl, $httpRequestMethod, $headers, $data, TRUE);
print_r($result);
}
public function calcualteAwsSignatureAndReturnHeaders($host, $uri, $requestUrl,
$accessKey, $secretKey, $region, $service,
$httpRequestMethod, $data, $debug = TRUE){
$terminationString = 'aws4_request';
$algorithm = 'AWS4-HMAC-SHA256';
$phpAlgorithm = 'sha256';
$canonicalURI = $uri;
$canonicalQueryString = '';
$signedHeaders = 'content-type;host;x-amz-date'; $signedHeaders = "content-type;host;user-agent;x-amz-access-token;x-amz-date;x-amz-security-token";
$currentDateTime = new DateTime('UTC');
$reqDate = $currentDateTime->format('Ymd');
$reqDateTime = $currentDateTime->format('Ymd\THis\Z');
// Create signing key
$kSecret = $secretKey;
$kDate = hash_hmac($phpAlgorithm, $reqDate, 'AWS4' . $kSecret, true);
$kRegion = hash_hmac($phpAlgorithm, $region, $kDate, true);
$kService = hash_hmac($phpAlgorithm, $service, $kRegion, true);
$kSigning = hash_hmac($phpAlgorithm, $terminationString, $kService, true);
// Create canonical headers
$canonicalHeaders = array();
$canonicalHeaders[] = 'content-type:application/json';
$canonicalHeaders[] = 'host:' . $host;
$canonicalHeaders[] = 'x-amz-date:' . $reqDateTime;
$canonicalHeadersStr = implode("\n", $canonicalHeaders);
// Create request payload
$requestHasedPayload = hash($phpAlgorithm, $data);
// Create canonical request
$canonicalRequest = array();
$canonicalRequest[] = $httpRequestMethod;
$canonicalRequest[] = $canonicalURI;
$canonicalRequest[] = $canonicalQueryString;
$canonicalRequest[] = $canonicalHeadersStr . "\n";
$canonicalRequest[] = $signedHeaders;
$canonicalRequest[] = $requestHasedPayload;
$requestCanonicalRequest = implode("\n", $canonicalRequest);
$requestHasedCanonicalRequest = hash($phpAlgorithm, utf8_encode($requestCanonicalRequest));
if($debug){
echo "<h5>Canonical to string</h5>";
echo "<pre>";
echo $requestCanonicalRequest;
echo "</pre>";
}
// Create scope
$credentialScope = array();
$credentialScope[] = $reqDate;
$credentialScope[] = $region;
$credentialScope[] = $service;
$credentialScope[] = $terminationString;
$credentialScopeStr = implode('/', $credentialScope);
// Create string to signing
$stringToSign = array();
$stringToSign[] = $algorithm;
$stringToSign[] = $reqDateTime;
$stringToSign[] = $credentialScopeStr;
$stringToSign[] = $requestHasedCanonicalRequest;
$stringToSignStr = implode("\n", $stringToSign);
if($debug){
echo "<h5>String to Sign</h5>";
echo "<pre>";
echo $stringToSignStr;
echo "</pre>";
}
// Create signature
$signature = hash_hmac($phpAlgorithm, $stringToSignStr, $kSigning);
// Create authorization header
$authorizationHeader = array();
$authorizationHeader[] = 'Credential=' . $accessKey . '/' . $credentialScopeStr;
$authorizationHeader[] = 'SignedHeaders=' . $signedHeaders;
$authorizationHeader[] = 'Signature=' . ($signature);
$authorizationHeaderStr = $algorithm . ' ' . implode(', ', $authorizationHeader);
// Request headers
$headers = array();
$headers[] = 'authorization:'.$authorizationHeaderStr;
$headers[] = 'content-length:'.strlen($data);
$headers[] = 'content-type: application/json';
$headers[] = 'user-agent:sellergeni/1.0 (Php=7.2)';
$headers[] = 'x-amz-access-token:Atza|IwEBII0yZlQvoo95puL9xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx';
$headers[] = 'host: ' . $host;
$headers[] = 'x-amz-date: ' . $reqDateTime;
return $headers;
}
public function callToAPI($requestUrl, $httpRequestMethod, $headers, $data, $debug=TRUE)
{
// Execute the call
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => $requestUrl,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_TIMEOUT => 30,
CURLOPT_POST => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => $httpRequestMethod,
CURLOPT_POSTFIELDS => $data,
CURLOPT_VERBOSE => 0,
CURLOPT_SSL_VERIFYHOST => 0,
CURLOPT_SSL_VERIFYPEER => 0,
CURLOPT_HEADER => false,
CURLINFO_HEADER_OUT=>true,
CURLOPT_HTTPHEADER => $headers,
));
$response = curl_exec($curl);
$err = curl_error($curl);
$responseCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if($debug){
$headers = curl_getinfo($curl, CURLINFO_HEADER_OUT);
echo "<h5>Request</h5>";
echo "<pre>";
echo $headers;
echo "</pre>";
}
curl_close($curl);
if ($err) {
if($debug){
echo "<h5>Error:" . $responseCode . "</h5>";
echo "<pre>";
echo $err;
echo "</pre>";
}
} else {
if($debug){
echo "<h5>Response:" . $responseCode . "</h5>";
echo "<pre>";
echo $response;
echo "</pre>";
}
}
return array(
"responseCode" => $responseCode,
"response" => $response,
"error" => $err
);
}
Response of this code like this
Canonical to string
GET
?MarketplaceIds=A21TJRUUN4KGV&CreatedAfter=2021-01-15T05:26:31.308992
content-type:application/json
host:sellingpartnerapi-eu.amazon.com
x-amz-date:20210204T060039Z
content-type;host;user-agent;x-amz-access-token;x-amz-date;x-amz-security-token
4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945
String to Sign
AWS4-HMAC-SHA256
20210204T060039Z
20210204/eu-west-1/execute-api/aws4_request
f11a59afb96cf62bbdd17e44405da8c5567178531f839826e89e9b1fcb77a24a
Request
GET /orders/v0/orders?MarketplaceIds=A21TJRUUN4KGV&CreatedAfter=2021-01-15T05:26:31.308992 HTTP/1.1
Host: sellingpartnerapi-eu.amazon.com
Accept: */*
authorization:AWS4-HMAC-SHA256 Credential=xxxxxxxxxxxxxx/20210204/eu-west-1/execute-api/aws4_request, SignedHeaders=content-type;host;user-agent;x-amz-access-token;x-amz-date;x-amz-security-token, Signature=3ddc540c9a1e38ea0466e4d4570a5ea8a1bc7274cfbc2fffd50609c7f379c495
content-length:2
content-type: application/json
user-agent:sellergeni/1.0 (Php=7.2)
x-amz-access-token:Atza|xxxxxxxxxxxxxxxxxxxxxxxxx
x-amz-date: 20210204T060039Z
Response:403
{
"errors": [
{
"message": "The request signature we calculated does not match the signature you provided. Check your AWS Secret Access Key and signing method. Consult the service documentation for details.
The Canonical String for this request should have been
'GET
/orders/v0/orders
CreatedAfter=2021-01-15T05%3A26%3A31.308992&MarketplaceIds=A21TJRUUN4KGV
content-type:application/json
host:sellingpartnerapi-eu.amazon.com
user-agent:sellergeni/1.0 (Php=7.2)
x-amz-access-token:Atza|IwEBII0yZlQvoo95puL9xxxxxxxxxxxxxxxxxx
x-amz-date:20210204T060039Z
x-amz-security-token:
content-type;host;user-agent;x-amz-access-token;x-amz-date;x-amz-security-token
4f53cda18c2baa0c0354bb5f9a3ecbe5ed12ab4d8e11ba873c2f11161202b945'
The String-to-Sign should have been
'AWS4-HMAC-SHA256
20210204T060039Z
20210204/eu-west-1/execute-api/aws4_request
e995c02098c1a2d5d775380f1fc49804b32af9534b5e330145ca125fe098f0af'
",
"code": "InvalidSignature"
}
]
}

Run:
sign pathparam/resource
Replace:
/messaging/v1/orders/{orderId}
with:
/messaging/v1/orders/xxx-xxxxxxx-xxxxxxx

Related

Get post count from instagram api

I am trying to get the post count of an instagram account, I have managed to do followers and following but cant get to get the post count right.
<?php
$username = 'instagram';
$response = #file_get_contents( "https://www.instagram.com/$username/?__a=1" );
if ( $response !== false ) {
$data = json_decode( $response, true );
if ( $data !== null ) {
$full_name = $data['graphql']['user']['full_name'];
$follower = $data['graphql']['user']['edge_followed_by']['count'];
$follows = $data['graphql']['user']['edge_follow']['count'];
echo "<p>{$full_name}</p> <p>{$follower} followers {$follows} following.</p>";
}
} else {
echo 'Username not found.';
}
?>
If anyone ever needs the answer, I managed to pull it through...
<?php
$username = 'instagram';
$response = #file_get_contents( "https://www.instagram.com/$username/?__a=1" );
if ( $response !== false ) {
$data = json_decode( $response, true );
if ( $data !== null ) {
$full_name = $data['graphql']['user']['full_name'];
$follower = $data['graphql']['user']['edge_followed_by']['count'];
$follows = $data['graphql']['user']['edge_follow']['count'];
$posts = $data['graphql']['user']['edge_owner_to_timeline_media']['count'];
echo "<h2><a href='https://www.instagram.com/{$username}'>{$full_name}</a></h2>
<p><span>{$posts} posts</span> <span>{$follower} followers</span> <span>{$follows} following</span></p>";
}
} else {
echo 'Username not found.';
}
?>
You can get many options by just going to
https://www.instagram.com/$username/?__a=1
and changing the $username to the account you need to see

file_get_contents in wordpress function

This code works correctly and saves a remote image to localhost (this wordpress plugin save tmdb cast image to local address):
function dt_cast_2($id, $type, $limit = false)
{
$name = get_post_meta($id, "dt_cast", $single = true);
if ($type == "img") {
if ($limit) {
$val = explode("]", $name);
$passer = $newvalor = array();
foreach ($val as $valor) {
if (!empty($valor)) {
$passer[] = substr($valor, 1);
}
}
for ($h = 0; $h <= 500; $h++) {
$newval = explode(";", $passer[$h]);
$fotoor = $newval[0];
$actorpapel = explode(",", $newval[1]);
if (!empty($actorpapel[0])) {
if ($newval[0] == "null") {
$fotoor = DT_DIR_URI . '/assets/img/no_foto_cast.png';
} else {
$fotoor = 'https://image.tmdb.org/t/p/w90' . $newval[0];
$uploaddir = wp_upload_dir();
$uploadfile = $uploaddir['basedir'] . $newval[0];
if(!file_exists($uploadfile))
{
$contents= file_get_contents($fotoor);
$savefile = fopen($uploadfile, 'w');
fwrite($savefile, $contents);
fclose($savefile);
}
$fotoor = $uploaddir['baseurl'] . $newval[0];
}}}}}}
I have a problem in this function, the image is not saved to local.
Can someone give me the correct code?
function dt_image($name, $id, $size, $type = false, $return = false, $gtsml = false) {
$img = get_post_meta($id, $name, $single = true);
$val = explode("\n", $img);
$mgsl = array();
$count = 0;
foreach ($val as $valor) {
if (!empty($valor)) {
if (substr($valor, 0, 1) == "/") {
$mgsl[] = 'https://image.tmdb.org/t/p/' . $size . '' . $valor . '';
} else {
$mgsl[] = $valor;
}
$count++;
} else {
if ($name == "dt_poster" && $img == NULL) {
$mgsl[] = esc_url( DT_DIR_URI ) . '/assets/img/no_poster.png';
}
}
}
$fotoor = 'https://image.tmdb.org/t/p/w90' . $newval[0];
i replaced this code and work it.
$fotoor = 'https://image.tmdb.org/t/p/w90' . $newval[0];
$uploaddir = wp_upload_dir();
$uploadfile = $uploaddir['basedir'] . $newval[0];
if(!file_exists($uploadfile))
{
$contents= file_get_contents($fotoor);
$savefile = fopen($uploadfile, 'w');
fwrite($savefile, $contents);
fclose($savefile);
}
$fotoor = $uploaddir['baseurl'] . $newval[0];

Paypal Success transation return values empty-- sometimes

There is a problem with my paypal Integration in live site.
For 90% of transactions, I didn't get any problem. Some times only I am facing this below problem.
Problem: After successful transaction, I am not getting Pay pal values like Transaction Id, Message etc. All values shows empty. It comes for 10% of transactions only. please tell me what may be the problem. Can't able to find out the reason
Thanks in Advance..
$paypal_email = '5345345345345f';
$cur_dir='http://'.$_SERVER['SERVER_NAME'] . dirname($_SERVER['REQUEST_URI']);
$return_url = $cur_dir.'/paymentsucess.php';
$cancel_url = $cur_dir.'/payment-not-sucess.php';
$notify_url = $cur_dir.'/payments.php';
extract($_POST);
$item_name = $exam_name;
$item_amount = $amount;
$userid=$userid;
if (!isset($_POST["txn_id"]) && !isset($_POST["txn_type"])){
$querystring = "?business=".urlencode($paypal_email)."&";
$querystring .= "item_name=".urlencode($item_name)."&";
$querystring .= "amount=".urlencode($item_amount)."&";
//loop for posted values and append to querystring
foreach($_POST as $key => $value){
$value = urlencode(stripslashes($value));
$querystring .= "$key=$value&";
}
$querystring .= "return=".urlencode(stripslashes($return_url))."&";
$querystring .= "cancel_return=".urlencode(stripslashes($cancel_url))."&";
$querystring .= "notify_url=".urlencode($notify_url);
header('location:https://www.paypal.com/cgi-bin/webscr'.$querystring);
exit();
}else{
$req = 'cmd=_notify-validate';
foreach ($_POST as $key => $value) {
$value = urlencode(stripslashes($value));
$value = preg_replace('/(.*[^%^0^D])(%0A) (.*)/i','${1}%0D%0A${3}',$value);// IPN fix
$req .= "&$key=$value";
}
$data['item_name'] = $_POST['item_name'];
$data['item_number'] = $_POST['item_number'];
$data['payment_status'] = $_POST['payment_status'];
$data['payment_amount'] = $_POST['mc_gross'];
$data['payment_currency'] = $_POST['mc_currency'];
$data['txn_id'] = $_POST['txn_id'];
$data['receiver_email'] = $_POST['receiver_email'];
$data['payer_email'] = $_POST['payer_email'];
$data['custom'] = $_POST['custom'];
$data['userid'] = $_POST['userid'];
// post back to PayPal system to validate
$header = "POST /cgi-bin/webscr HTTP/1.0\r\n";
$header .= "Content-Type: application/x-www-form-urlencoded\r\n";
$header .= "Content-Length: " . strlen($req) . "\r\n\r\n";
$fp = fsockopen ('ssl://www.paypal.com', 443, $errno, $errstr, 30);
if (!$fp) {
// HTTP ERROR
} else {
fputs ($fp, $header . $req);
while (!feof($fp)) {
$res = fgets ($fp, 1024);
if (strcmp ($res, "VERIFIED") == 0) {
// Validate payment (Check unique txnid & correct price)
$valid_txnid = check_txnid($data['txn_id']);
$valid_price = check_price($data['payment_amount'], $data['item_number']);
// PAYMENT VALIDATED & VERIFIED!
if($valid_txnid && $valid_price){
$orderid = updatePayments($data);
if($orderid){
}else{
}
}else{
}
}else if (strcmp ($res, "INVALID") == 0) {
}
}
fclose ($fp);
}
}

why php mailer not send attachement?

I m using php mailer to send mail with attachment but i don't know why attachment not send
else msg in body send properly and no error show plz give best solution for this
$to = 'ok123#gmail.com';
$msg = "This a body of a mail";
require_once("functions/class.phpmailer.php");
$mailer = new PHPMailer();
$mailer->From = "noreply#gmail.com";
$mailer->Subject = "attachment file";
$mailer->AddAddress($to);
$mailer->ContentType = 'text/html';
$mailer->CharSet = "UTF-8";
$mailer->Body = $msg;
$mailer->IsHTML(true);
$mailer->AddAttachment("images/20130319182911.zip","20130319182911.zip");
$mailer->Send();
echo "Message Sent OK<p></p>\n";
This is my favorite mail attachment class. Use it if you like it.
class AttachmentEmail {
private $from = 'yours#email.com';
private $from_name = 'Your Name';
private $reply_to = 'yours#email.com';
private $to = '';
private $subject = '';
private $message = '';
private $attachment = '';
private $attachment_filename = '';
public function __construct($to, $subject, $message, $attachment = '', $attachment_filename = '') {
$this -> to = $to;
$this -> subject = $subject;
$this -> message = $message;
$this -> attachment = $attachment;
$this -> attachment_filename = $attachment_filename;
}
public function mail() {
if (!empty($this -> attachment)) {
$filename = empty($this -> attachment_filename) ? basename($this -> attachment) : $this -> attachment_filename ;
$path = dirname($this -> attachment);
$mailto = $this -> to;
$from_mail = $this -> from;
$from_name = $this -> from_name;
$replyto = $this -> reply_to;
$subject = $this -> subject;
$message = $this -> message;
$file = $path.'/'.$filename;
$file_size = filesize($file);
$handle = fopen($file, "r");
$content = fread($handle, $file_size);
fclose($handle);
$content = chunk_split(base64_encode($content));
$uid = md5(uniqid(time()));
$name = basename($file);
$header = "From: ".$from_name." <".$from_mail.">\r\n";
$header .= "Reply-To: ".$replyto."\r\n";
$header .= "MIME-Version: 1.0\r\n";
$header .= "Content-Type: multipart/mixed; boundary=\"".$uid."\"\r\n\r\n";
$header .= "This is a multi-part message in MIME format.\r\n";
$header .= "--".$uid."\r\n";
$header .= "Content-type:text/plain; charset=iso-8859-1\r\n";
$header .= "Content-Transfer-Encoding: 7bit\r\n\r\n";
$header .= $message."\r\n\r\n";
$header .= "--".$uid."\r\n";
$header .= "Content-Type: application/octet-stream; name=\"".$filename."\"\r\n"; // use diff. tyoes here
$header .= "Content-Transfer-Encoding: base64\r\n";
$header .= "Content-Disposition: attachment; filename=\"".$filename."\"\r\n\r\n";
$header .= $content."\r\n\r\n";
$header .= "--".$uid."--";
if (mail($mailto, $subject, "", $header)) {
return true;
} else {
return false;
}
} else {
$header = "From: ".($this -> from_name)." <".($this -> from).">\r\n";
$header .= "Reply-To: ".($this -> reply_to)."\r\n";
if (mail($this -> to, $this -> subject, $this -> message, $header)) {
return true;
} else {
return false;
}
}
}
}
And use it like
$sendit = new AttachmentEmail('test#example.com', 'Testing attachment!', 'Hi', '/home/test/test.jpg');
$sendit -> mail();
Gmail removes .zip attachment when it detects particular files inside, like EXEs. Add zip attachment as 20130319182911.piz and it should arrive.
It's not a phpmailer problem, but a Gmail policy.

How to retrieve all the reviews publicly available for an extension in the Google Chrome webstore - JSON & cross-domain issue

I'm interested in gathering/scraping data about the reviews earned by popular extensions available in the Chrome Webstore.
In particular, I need to retrieve the number of total reviews left for a specific extension and then retrieve all the reviews publicly available for this addon. My problem is the following: I cannot write a standard PHP Curl scraper since the data I'm interested in is available through json requests, in particular, I need to call:
https://chrome.google.com/reviews/components for the number of
reviews ('numRatings')
https://chrome.google.com/reviews/json/search
for the reviews ("comment")
I tried to write this:
<script src="http://code.jquery.com/jquery-latest.js"></script>
<script type="text/javascript">
function getReviews(extensionId, callback) {
var entities = [{'url' : 'http://chrome.google.com/extensions/permalink?id=' + extensionId}];
var param = {"searchSpecs":[{"requireComment":true,"entities": entities,"groups":["public_comment"],"matchExtraGroups":true,"sortBy":"quality","startIndex":10,"numResults":10,"includeNickNames":true}],"applicationId":94};
$.ajax({
type: 'POST',
url: 'https://chrome.google.com/reviews/json/search',
contentType: 'application/xml',
xhrFields: {withCredentials: true },
dataType: 'json',
data: 'req=' + JSON.stringify(param) + '&requestSource=widget'
}).success(callback);
}
</script>
<script type="text/javascript">
$(document).ready(getReviews('gighmmpiobklfepjocnamgkkbiglidom', function(reviews) { console.log(reviews); }));
</script>
I'm not very keen in jQuery/JSON(-P) and the code above is certainly wrong.
My questions are as follows:
How to bypass the same-domain policy? I tried YQL without success...
How to format my url/'data' to only retrieve the number of
reviews ('numRatings') on chrome.google.com/reviews/components and the reviews ('comments') on chrome.google.com/reviews/json/search for a specific extension identified by its id, e.g. gighmmpiobklfepjocnamgkkbiglidom?
I already accomplished this kind of scraping for popular Mozilla Addons using PHP and gathered the data I needed using a standard curl/XPath.
Thanks for your help!
1) The easiest way would be to create a Chrome extension;
2) See https://github.com/xpressyoo/MyExtensions
[...]
getComments : function() {
var entities = [];
//each(Ext.extensions, function(data, id) {
entities.push({'url' : 'http://chrome.google.com/extensions/permalink?id=' + this.hash});
//});
Ext.XHR['comments'] = new Ajax({
'method' : 'POST',
'encodeURI' : false, // Needed
'url' : 'https://chrome.google.com/reviews/json/search',
'headers' : {
'Content-type' : 'application/xml'
},
'parameters' : {
'req' : JSON.stringify({'searchSpecs' : [{'entities' : entities, 'groups' : ['public_comment'], 'matchExtraGroups' : true,"sortBy":"quality", 'startIndex' : 0, 'numResults' : 80, 'includeNickNames' : true}], 'applicationId' : 94 }) + '&requestSource=widget'
},
'onSuccess' : function(xhr) {
var json = xhr.responseJSON;
if(json && json.searchResults ) {
this.comments = {
'total' : Number(json.searchResults[0].numAnnotations.toString().replace(/,/, '').toInt()),
'latest' : json.searchResults[0].annotations ? json.searchResults[0].annotations[0] :{},
'previous' : this.comments.total || null,
'latestPrevious' : $merge(this.comments.latest) || null,
'new' : this.comments['new'] || false
}
Ext.XHR['comments'] = null;
}
}.bind(this)
}).send();
return this;
},
[...]
and
var nbreviews = this.comments.total; //The number of reviews
var latestcomment = (this.comments.latest0 && this.comments.latest0.comment ? this.comments.latest0.comment.replace(/\n/gi, '') : '');// get the latest comment
var nthcomment = (this.comments.latestn && this.comments.latestn.comment ? this.comments.latestn.comment.replace(/\n/gi, '') : '');//Get the nth comment
where:
'latestn' : json.searchResults[0].annotations ? json.searchResults[0].annotations[n] :{},
Here is a way of doing it in PHP with parallel cURL. This script scrapes all the extensions present in the Chrome webstore (ranked by popularity) and retrieves information such as:
Number of users
Number of star-ratings
Number of text reviews
Number of characters for each text-review (max 100 reviews scraped for each extension)
//GET URL
$url0 = "https://chrome.google.com/";
//AUTO LOOP
foreach(range(0, 705, 5) as $x) {
//Nb PAGES TO DOWNLOAD
$frompge = $x+1;
$topge = $x+5;
$nbpages = ($topge - $frompge)+1;
$zitems = $nbpages*20;
//MULTI cURL INIT
$mh = curl_multi_init();
$running = null;
//GENERATE URLs ARRAY
$urls = array();
for ($a = $frompge; $a <= $topge; $a++){
$aa = $url0 . 'webstore/list/most_popular/'. $a .'?category=ext';
$urls[] = $aa;
}
foreach ($urls as $name => $url)
{
$c[$name]=curl_init($url);
curl_setopt($c[$name], CURLOPT_HEADER, false);
curl_setopt($c[$name], CURLOPT_FAILONERROR, true);
curl_setopt($c[$name], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($c[$name], CURLOPT_AUTOREFERER, true);
curl_setopt($c[$name], CURLOPT_RETURNTRANSFER, true);
curl_setopt($c[$name], CURLOPT_TIMEOUT, 10);
curl_multi_add_handle ($mh,$c[$name]);
}
// execute all queries simultaneously, and continue when all are complete
do {
curl_multi_exec($mh, $running);
} while ($running >0);
$html = array();
foreach ($urls as $name => $url)
{
$html[]=curl_multi_getcontent($c[$name]);
curl_multi_remove_handle($mh,$c[$name]);
curl_close($c[$name]);
}
curl_multi_close($mh);
for ($b = 0; $b <= $nbpages-1; $b++) {
// Parse the HTML information and return the results.
$dom = new DOMDocument();
#$dom->loadHtml($html[$b]);
$xpath = new DOMXPath($dom);
$links = $xpath->query("//a[contains(#class, 'title-a')]");
$result = array();
foreach ( $links as $item ) {
$newDom = new DOMDocument;
$newDom->appendChild($newDom->importNode($item,true));
$xpath = new DOMXPath( $newDom );
$cleaner = array(" users", " user", "(", ")", ","," ");
$data = str_replace($cleaner,"",trim($xpath->query("//script")->item(0)->nodeValue));
list($b1,$id,$b2,$b3,$b4,$name,$b5,$b6,$b7,$b8,$b9,$b10,$b11,$b12,$b13,$nbusers) = explode("\"", $data);
$label = str_replace(" ", "", strtolower(ereg_replace("[^A-Za-z0-9 ]", "", $name)));
//CATEGORIES (based on nb of users)
if($nbusers<100){$category = '1';$color = 'inherit';}
else if($nbusers>=100 && $nbusers<1000){$category = '2';$color = '#E6EEEE';}
else if($nbusers>=1000 && $nbusers<10000){$category = '3';$color = '#CDDEDE';}
else if($nbusers>=10000 && $nbusers<100000){$category = '4';$color = '#B5CDCD';}
else if($nbusers>=100000 && $nbusers<1000000){$category = '5';$color = '#9CBDBD';}
else if($nbusers == '1000000+'){$category = '6';$color = '#83ACAC';}
else{$category = '-9';}
/////////////////////////////////////////////LOOP REVIEWS
$extURL = 'http://chrome.google.com/extensions/permalink?id='.$id;
$c1 = curl_init('https://chrome.google.com/reviews/json/search');
$c1a = curl_init('https://chrome.google.com/reviews/json/search');
$c2 = curl_init('https://chrome.google.com/reviews/json/lookup');
$fields1 = http_build_query(array(
'req' => '{"searchSpecs":[{"requireComment":true,"entities":[{"url":"'.$extURL.'"}],"groups":["public_comment"],"matchExtraGroups":true,"sortBy":"quality","startIndex":0,"numResults":100,"includeNickNames":false}],"applicationId":94}',
));
$options1 = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_POST => true,
CURLOPT_TIMEOUT => 10,
CURLOPT_POSTFIELDS => $fields1,
);
$fields1a = http_build_query(array(
'req' => '{"searchSpecs":[{"requireComment":true,"entities":[{"url":"'.$extURL.'"}],"groups":["public_comment"],"matchExtraGroups":true,"startIndex":0,"numResults":100,"includeNickNames":false}],"applicationId":94}',
));
$options1a = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_POST => true,
CURLOPT_TIMEOUT => 10,
CURLOPT_POSTFIELDS => $fields1a,
);
$fields2 = http_build_query(array(
'req' => '{"entities":[{"url" : "'.$extURL.'", "includeAggregateInfo" : true}],"applicationId":94}',
));
$options2 = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_POST => true,
CURLOPT_TIMEOUT => 10,
CURLOPT_POSTFIELDS => $fields2,
);
curl_setopt_array($c1, $options1);
curl_setopt_array($c1a, $options1a);
curl_setopt_array($c2, $options2);
$mh2 = curl_multi_init();
curl_multi_add_handle($mh2,$c1);
curl_multi_add_handle($mh2,$c1a);
curl_multi_add_handle($mh2,$c2);
$active = null;
do {
curl_multi_exec($mh2, $active);
} while ($active >0);
//close the handles$c1 = curl_init('https://chrome.google.com/reviews/json/search');
$json1=curl_multi_getcontent($c1);
$json1a=curl_multi_getcontent($c1a);
$json2=curl_multi_getcontent($c2);
curl_multi_remove_handle($mh2, $c1);
curl_multi_remove_handle($mh2, $c1a);
curl_multi_remove_handle($mh2, $c2);
curl_multi_close($mh2);
$data1 = json_decode(utf8_encode($json1), true);
$data1a = json_decode(utf8_encode($json1a), true);
$data2 = json_decode(utf8_encode($json2), true);
if ($data1['channelHeader']['errorCode']) return;
$nbreviews = $data1['searchResults'][0]['numAnnotations'];
if ($nbreviews > 100){$nbreviews2=100;}
else{$nbreviews2=$nbreviews;}
//Sum strings
$comments = $data1['searchResults'][0]['annotations'];
$sum =0;
foreach($comments as $comment){
$msg = preg_replace('/[\n\r\t]/', ' ', htmlspecialchars($comment['comment']));
$msg = str_replace(">", "", $msg);
$msg = str_replace(" ", "", $msg);
$strlen = strlen($msg);
$sum += $strlen;
}
$add = $sum;
$final = $add/$nbreviews2;
//Sum strings A
if ($data1a['channelHeader']['errorCode']) return;
$nbreviewsa = $data1a['searchResults'][0]['numAnnotations'];
$commentsa = $data1a['searchResults'][0]['annotations'];
$suma =0;
foreach($commentsa as $commenta){
$msga = preg_replace('/[\n\r\t]/', ' ', htmlspecialchars($commenta['comment']));
$msga = str_replace(">", "", $msga);
$msga = str_replace(" ", "", $msga);
$strlena = strlen($msga);
$suma += $strlena;
}
$adda = $suma;
$finala = $adda/$nbreviews2;
//Ratings
if ($data2['channelHeader']['errorCode']) return;
$nbratings = $data2['annotations'][0]['aggregateInfo']['numRatings'];
$nbstars = $data2['annotations'][0]['aggregateInfo']['averageRating'];
$delta = $nbratings - $nbreviews;
$ratio = $nbratings/$nbusers;
$ratio2 = $nbreviews/$nbusers;
////////////////////////////////////////////END LOOP REVIEWS
//PUT VALUES TOGETHER
$result[] = array($name,$label,$id,$category,$nbusers,$nbratings,$nbreviews,$nbreviewsa,$delta,$ratio,$ratio2,$nbstars,$nbreviews2,$add,$final,$adda,$finala);
}//END FOREACH
//print_r($result,false);
//DISPLAY RESULTS
for ($z = 0; $z <= 20; $z++) {
echo "<tr><td class=\"non\">" .$result[$z][0] . "</td><td class=\"non\">" .$result[$z][1] . "</td><td>" .$result[$z][3] . "</td><td>" .$result[$z][4] . "</td><td>" .$result[$z][5] . "</td><td>" .$result[$z][6] . "</td><td>" .$result[$z][7] . "</td><td>" .$result[$z][8] . "</td><td>" .$result[$z][9] . "</td><td>" .$result[$z][10] . "</td><td>" .$result[$z][11] . "</td><td>" .$result[$z][12] . "</td><td>" .$result[$z][13] . "</td><td>" .$result[$z][14] . "</td><td>" .$result[$z][15] . "</td><td>" .$result[$z][16] . "</td></tr>";
ob_flush();
flush();
}
}
}//END FOREACH

Resources