How to make my program asynchronous with asyncio in Python 3.7? - python-3.x

My code is to search the data from the ES database And add some new data in the database . I used to run in a single process, but this is too inefficient, so I want to add asyncio to my code. How can I do it? I removed the URL of my ES data based on security considerations. is there anyone can help me?
import json
import requests
from elasticsearch import Elasticsearch
import asyncio
class Cited:
def __init__(self):
self.es = Elasticsearch(
[''],
)
async def get_es_item(self):
query_body = {
"from": 0,
"size": 10000
,
"query": {
"bool": {
"must":
{
"exists": {
"field": "extra.S2PaperId"
}
}
, "must_not":
{"exists": {
"field": "extra.citations"
}}
}
}
}
items = self.es.search(index='item', body=query_body, doc_type=None, request_timeout=6000)
items = items['hits']['hits']
for item in items:
item_type = item['_type']
item_id = item['_id']
S2PaperId = item['_source']['extra']['S2PaperId']
self.search_ss(item_id=item_id, paperId=S2PaperId, item_type=item_type)
def search_ss(self, item_id, paperId, item_type):
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh Intel Mac OS X 10_13_4) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
}
req = requests.get('https://api.semanticscholar.org/v1/paper/' + paperId, headers=headers, timeout=100)
# logging.info(req.url)
if req.status_code == 200:
req = json.loads(req.text)
citations = len(req['citations'])
citationVelocity = req['citationVelocity']
influentialCitationCount = req['influentialCitationCount']
self.es.update(index='item', doc_type=item_type, id=item_id,
body={'doc': {'extra': {'citations': citations, 'citationVelocity': citationVelocity,
'influentialCitationCount': influentialCitationCount}}},
request_timeout=6000)
print(item_id, item_type, citations, citationVelocity, influentialCitationCount)
else:
print('s2paper 出错了 直接补0' + item_id, item_type)
self.es.update(index='item', doc_type=item_type, id=item_id,
body={'doc': {'extra': {'citations': 0, 'citationVelocity': 0,
'influentialCitationCount': 0}}},
request_timeout=6000)
cited = Cited()
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(cited.get_es_item(), cited.get_es_item(), cited.get_es_item(), cited.get_es_item()))

Related

writeJSON groovy writes only first value to file

Below is my jenkins groovy script to read data and write as json file.
import groovy.json.JsonOutput
def arn = ""
def name = ""
def os = ""
pipeline {
agent any
stages {
stage('Hello') {
steps {
script{
def ret = sh(script: 'aws devicefarm list-devices', returnStdout: true)
def jsonObj = readJSON text: ret
def currentVersion = "13.0"
def values = currentVersion.split('\\.')
def json_str = ""
for(String item: jsonObj.devices) {
os = item.os
if(!os.contains("\\.")) {
osV = os + ".0"
} else{
osV = os
}
def osValues = osV.split('\\.')
if(values[0].toInteger() <= osValues[0].toInteger()) {
name = item.name
def data = [
name: "$name",
os: "$os",
]
json_str += JsonOutput.toJson(data)
}
}
def json_beauty = JsonOutput.prettyPrint(json_str)
writeJSON(file: 'message124.json', json: json_beauty)
}
}
}
}
}
But here, it only saves the first value. Not all the values. Could you tell me where I am wrong here
It's not 100% clear what you actually want to end up with, but I think you want a JSON file containing the items where OS is greater than a magic number.
It's helpful to provide enough data to duplicate the problem, and eliminate everything that isn't directly related. I think what you want is something like this:
jsonObj = [
devices: [
[ os: '3', name: 'Name 1'],
[ os: '10.2', name: 'Name 10.2'],
[ os: '7', name: 'Name 7'],
[ os: '3', name: 'Name 3'],
],
]
values = ['5']
def normalizeOs(os) {
os.contains(".") ? os : "$os.0"
}
def shouldSkip(normalizedOs) {
osValues = os.split('\\.')
values[0].toInteger() > osValues[0].toInteger()
}
selected = []
for (item: jsonObj.devices) {
os = normalizeOs(item.os)
if (shouldSkip(os)) continue
selected.push([name: item.name, os: os])
}
json = new groovy.json.JsonBuilder(selected)
println(json)
Outputs:
[{"name":"Name 7","os":"7.0"},{"name":"Name 10.2","os":"10.2"}]

SF OCAPI pagination loop

I am trying to get a pagination loop inside of another loop where I query multiple environments in salesforce OCAPI. My code goes like this. I first declare some variables to use in the loop:
URLs=["it","at","lv","ee","lt"]
base_url = "https://example.net/s/-/dw/data/v22_4/customer_lists/"
start = 0
count = 200
Then I create the body for the requests:
body = """
{
"query": {
"bool_query": {
"must": [
{
"filtered_query": {
"query": {
"match_all_query": {}
},
"filter": {
"range_filter": {
"field": "creation_date",
"from": """ + start_time_formatted + """,
"to": """ + end_time_formatted + """
}
}
}
}
]
}
},
"expand":["primary_address"],
"select": "(**)",
"count": """ + str(count) + """,
"start": """ + str(start) + """
}"""
Based on these variables I create an initial For loop where I query multiple URL environments, I tried to input a while loop for pagination within the For loop but it doesn't give me the expected result. The code is below:
for country in URLs:
url = base_url + str(country) + "/customer_search"
response = requests.post(url, headers={'Content-Type': 'application/json','Accept': 'application/json','Authorization': 'Bearer ' + oauth_token}, data=body)
data = response.json()
while (start < total):
start = start * count
response = requests.post(url, headers={'Content-Type': 'application/json','Accept': 'application/json','Authorization': 'Bearer ' + oauth_token}, data=body)
data = response.json()
dataframe = pd.json_normalize(data['hits'], max_level=2)
dfs.append(dataframe)
df = pd.concat(dfs, ignore_index=True)
The json output that I usually get from the query looks similar to this(with records inside hits course):
{{'_v': '22.4',
'_type': 'customer_search_result',
'count': 200,
'expand': ['primary_address'],
'hits': [],
'next': {'_type': 'result_page', 'count': 200, 'start': 200},
'query': {'bool_query': {'_type': 'bool_query',
'must': [{'filtered_query': {'_type': 'filtered_query',
'filter': {'range_filter': {'_type': 'range_filter',
'field': 'creation_date',
'from': '2022-01-01T00:00:00.000Z',
'to': '2022-05-09T17:03:16.000Z'}},
'query': {'match_all_query': {'_type': 'match_all_query'}}}}]}},
'select': '(**)',
'start': 0,
'total': 650}
Now what I need is that the while loop increases the starting point with each iteration and collects all total records for each URL by then stopping in the end and give the results. Do you have any idea on how I can create this loop? count is 200 because that's the maximum amount of records I'm allowed to query with each call. Also count should then change based on total amount of records so for example if it goes in batches of 200 then for the last 150 count should change to 150 instead of 200.

Web scraping- how can i get the price of all the available posts in a web page

This code was provided for me, it gives back a lot of info about one post i want to use it to get the same info in this url and increase the posts number in this page now it's only 20
import requests
# https://haraj.com.sa/1179070147
def main(url):
params = {
'queryName': 'detailsPosts_singlePost',
'token': '',
'clientid': '812f41b2-9936-4405-aa9c-378db19b8cc4',
'version': '8.2.9 , 10 18 - 7 - 21'
}
data = {
"query": "query($ids:[Int]) { posts( id:$ids) {\n\t\titems {\n\t\t\tid status authorUsername title city postDate updateDate hasImage thumbURL authorId bodyHTML bodyTEXT city tags imagesList commentStatus commentCount upRank downRank geoHash\n\t\t}\n\t\tpageInfo {\n\t\t\thasNextPage\n\t\t}\n\t\t} }",
"variables": {
"ids": [
79070147
]
}
}
r = requests.post(url, params=params, json=data)
print(r.json())
main('https://graphql.haraj.com.sa/')
any help is appreciated
thanks
Loop over the pages in order to obtain the desired information.
Note, you can pickup all information from the JSON response directly without needs to call the API again.
import requests
from pprint import pp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0'
}
def main(url):
with requests.Session() as req:
req.headers.update(headers)
for page in range(1, 3): # increase the number of pages from here.
params = {
'queryName': 'detailsPosts_tag_page{}'.format(page),
'token': '',
'clientid': '812f41b2-9936-4405-aa9c-378db19b8cc4',
'version': '8.2.9 , 10 18 - 7 - 21'
}
data = {
"query": "query($tag:String,$page:Int) { posts( tag:$tag, page:$page) {\n\t\titems {\n\t\t\tid status authorUsername title city postDate updateDate hasImage thumbURL authorId bodyHTML bodyTEXT city tags imagesList commentStatus commentCount upRank downRank geoHash geoCity geoNeighborhood\n\t\t}\n\t\tpageInfo {\n\t\t\thasNextPage\n\t\t}\n\t\t} }",
"variables": {
"page": page,
"tag": "حراج العقار"
}
}
r = req.post(url, params=params, json=data)
if r.status_code == 200:
for i in r.json()['data']['posts']['items']:
pp(i)
# check i.keys()
else:
exit(f"Page# {page} is not exist, However program stopped.")
main('https://graphql.haraj.com.sa/')

How can I include a Count in my Django Rest Framework Pagination to count the total number of rows?

I have written a code in Python using Django Rest Framework to fetch data from database(MySql) and view it in browser using Pagination(PageNumberPagination).It is working fine but the problem is the "count" is not coming in view. I want to show the total number of rows in count. I am getting response like this:
GET /fetchdata/?page=1&per_page=10
HTTP 200 OK
Allow: GET, OPTIONS, POST
Content-Type: application/json
Vary: Accept
[
{
"id": 53,
"first_name": "Rounak",
},
{
"id": 54,
"first_name": "Rounak",
},
]
But I want the response like this.
HTTP 200 OK
{
"count": 1023
"next": "https://api.example.org/persons/?page=2&per_page=10",
"previous": null,
"results": [
…
]
}
Below is my code, please suggest me something.
view.py
from rest_framework.pagination import PageNumberPagination
from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
from .pagination import StandardResultsSetPagination
#api_view(['GET','POST'])
def index(request):
if request.method=='GET':
all_dataobj=fetchdata.objects.all()
paginator = StandardResultsSetPagination()
result_page = paginator.paginate_queryset(all_dataobj, request)
pserializer=fetchdataSerializers(result_page,many=True)
return Response(pserializer.data,status=status.HTTP_201_CREATED)
elif request.method=='POST':
serializer=fetchdataSerializers(data=request.data)
if serializer.is_valid():
serializer.save()
return Response(serializer.data,status=status.HTTP_201_CREATED)
return Response(serializer.errors,status=status.HTTP_400_BAD_REQUEST)
pagination.py
from rest_framework import pagination
class StandardResultsSetPagination(pagination.PageNumberPagination):
page_size = 10
page_query_param = 'page'
page_size_query_param = 'per_page'
max_page_size = 1000
settings.py
REST_FRAMEWORK = {
'DEFAULT_PAGINATION_CLASS':
'rest_framework.pagination.PageNumberPagination',
'PAGE_SIZE': 10
}

Moosend API for new subscribers in django

I tried to follow this tutorial to create new subscriber in Moosend from my django app .
from urllib2 import Request, urlopen
values = """
{
"Name": "Paul",
"Email": "someEmail#email.com",
"HasExternalDoubleOptIn": false,
"CustomFields": [
"Age=25",
"Country=USA"
]
}"""
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
request = Request('https://api.moosend.com/v3/subscribers/7cfad3edfd9ad-07d7-4c51-810e-74e27sdsf8505c2e/subscribe.json?apikey=77f6s34dfd8914-4e3c-4d61-8435-9346f5b4adf6&New%20item=', data=values, headers=headers)
response_body = urlopen(request).read()
print response_body
That code WORKS IN python 2 TO CREATE new subscriber in maling list i have made some changes so that it should work in python3 but still it is still it is not inserting new subscribers on my mailing list in Moossend
changes done to work in python 3 that i have done
from urllib.request import urlopen
request2 = urllib.request.urlopen('https://api.moosend.com/v3/subscribers/7cfad3edfd9ad-07d7-4c51-810e-74e27sdsf8505c2e/subscribe.json?apikey=77f6s34dfd8914-4e3c-4d61-8435-9346f5b4adf6&New%20item=', data=values, headers=headers)
response_body = request2.read()
print(response_body)
Need help in creating new subscriber for using python3 on my Moonsend mailing list
Below is the updated code if you are using python3.5 that works and manage to add new subscriber to Moosend mailing list:
from urllib.request import urlopen
from urllib.request import Request
import urllib.parse
values = """
{
"Name": "Paul",
"Email": "someEmail#email.com",
"HasExternalDoubleOptIn": false,
"CustomFields": [
"Age=25",
"Country=USA"
]
}"""
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
data = values.encode()
req = Request('https://api.moosend.com/v3/subscribers/7cfad3edfd9ad-07d7-4c51-810e-74e27sdsf8505c2e/subscribe.json?apikey=77f6s34dfd8914-4e3c-4d61-8435-9346f5b4adf6&New%20item=', data=values, headers=headers)
response_body = urlopen(req).read()
print(response_body)

Resources