import requests as r
for key in open('exmp.txt', 'r+', encoding='utf-8').read().splitlines():
try:
data = { 'secret_id': key }
first = r.post('https://myurl.com', data = data)
print(f"{key} | {first.json()['message']}")
except:
pass
It's just a simple code that gives response one by one which takes forever to complete I want to send 100 requests at once but I have no idea what should I use
Related
I am trying to use the requests_mock as part of my unit test to test API Calls which run in a while loop so in order to end the while loop or to meet the condition of the while loop I need to send different response to my API. My URL remains the same but the param changes but requests_mock doesn't really cares about it.
My function goes like this :
def func():
response = requests.get(
url=<url>
params={"limit": 1000}
headers=<headers>
).json()
while "next" in response["info"].keys():
response = requests.get(
url=<url>
params={"limit": 1000, "info": response["info"]}
headers=<headers>
).json()
My test looks like:
def test_url(requests_mock):
requests_mock.get(url, json=<response_with_info_key>)
func()
data_request = requests_mock.request_history[0]
assert data_request.query = "limit=1000"
What i want is my while loop to end with my second response to be without the "next" key. What I have already tried :
def test_url(requests_mock):
requests_mock.get(url, json=[<response_with_info_key>, <response_without_info_key>])
func()
data_request = requests_mock.request_history[0]
assert data_request.query = "limit=1000"
The simplest way to explain the whole question would be : How do i make requests-mock send two different response for the same API?
I have the following code to make multiple REST calls. Basically I have a dictionary where key is a string and value is a JSON date that I need to use as payload to pass to a REST API POST method.
At the moment, the dictionary contains 10 entries, so I need to make 10 REST calls.
At the moment, I have implemented using requests package in python3 which is synchronous in nature. So after 1 REST call, it waits for its response and similarly for 10 REST calls, it will wait 10 times for the response from API.
def createCategories(BACKEND_URL, token, category):
url = os.path.join(BACKEND_URL, 'api/v1/category-creation')
category_dict = read_payloads(category)
headers = {
"token": f'{token}',
"Content-Type": "application/json",
"accept": "application/json"
}
for name, category_payload in category_dict.items():
json_payload = json.dumps(category_payload)
response = requests.request("POST", url, headers=headers, data=json_payload)
##########################
## Load as string and parsing
response_data = json.loads(response.text)
print(response_data)
category_id = response_data['id']
message = 'The entity with id: ' + str(category_id) + ' is created successfully. '
logging.info(message)
return "categories created successfully."
I read that we need to use asyncio to make these asynchronous. What code changes do I need to make?
You can continue using requests library. You need to use threading or concurrent.futures modules to make several requests simutaneoudly.
Another option is to use some async library like aiohttp or some others.
import requests
from threading import current_thread
from concurrent.futures import ThreadPoolExecutor, Future
from time import sleep, monotonic
URL = "https://api.github.com/events"
def make_request(url: str) -> int:
r = requests.get(url)
sleep(2.0) # wait n seconds
return r.status_code
def done_callback(fut: Future):
if fut.exception():
res = fut.exception()
print(f"{current_thread().name}. Error: {res}")
elif fut.cancelled():
print(f"Task was canceled")
else:
print(f"{current_thread().name}. Result: {fut.result()}")
if __name__ == '__main__':
urls = [URL for i in range(20)] # 20 tasks
start = monotonic()
with ThreadPoolExecutor(5) as pool:
for i in urls:
future_obj = pool.submit(make_request, i)
future_obj.add_done_callback(done_callback)
print(f"Time passed: {monotonic() - start}")
I have a function that submits a search job to a REST API, waits for the API to respond, then downloads 2 sets of JSON data, converts the both JSON's into Pandas dataframes, and returns both dataframes. below is a very simplified version of the function(minus error handling, logging, data scrubbing, etc...)
def getdata(searchstring, url, uname, passwd):
headers = {'content-type': 'application/json'}
json_data = CreateJSONPayload(searchstring)
rPOST = requests.post(url, auth=(uname, passwd), data=json_data, headers=headers)
statusURL = (str(json.loads(rPOST.text)[u'link'][u'href']))
Processing = True
while Processing == True:
rGET = requests.get(statusURL, auth=(uname, passwd))
if rGET.status_code== 200:
url1 = url + "/dataset1"
url2 = url + "/dataset2"
rGET1 = requests.get(url1, auth=(uname, passwd))
rGET2 = requests.get(url2, auth=(uname, passwd))
dfData1 = pd.read_json(rGET1.text)
dfData2 = pd.read_json(rGET2.text)
Processing = False
elif StatusCode == "Other return code handling":
print("handle errors") # Not relevant to question.
else:
sleep(15)
return dfData1, dfData2
The function itself works as expected. However the API being called can take anywhere from a couple of minutes to an hour to return the data depending on the parameters I pass to it and I need to submit multiple searches to it, so I'd rather not submit each search one after the other.
What's the best way to parallelize the calling of a function like this so that I can submit multiple requests to it at the same time, wait for all calls of the function have returned data, and finally continue on with data processing in the script?
I also need to be able to throttle the requests too, as the API rate limits me to no more than 15 concurrent connections at a time.
I use Scrapy 1.5.1
My Goal is to go through entire chain of requests for each variable before moving to the next variable. For some reason Scrapy takes 2 variables, then sends 2 requests, then takes another 2 variables and so on.
CONCURRENT_REQUESTS = 1
Here is my code sample:
def parsed ( self, response):
# inspect_response(response, self)
search = response.meta['search']
for idx, i in enumerate(response.xpath("//table[#id='ctl00_ContentPlaceHolder1_GridView1']/tr")[1:]):
__EVENTARGUMENT = 'Select${}'.format(idx)
data = {
'__EVENTARGUMENT': __EVENTARGUMENT,
}
yield scrapy.Request(response.url, method = 'POST', headers = self.headers, body = urlencode(data),callback = self.res_before_get,meta = {'search' : search}, dont_filter = True)
def res_before_get ( self, response):
# inspect_response(response, self)
url = 'http://www.moj-yemen.net/Search_detels.aspx'
yield scrapy.Request(url, callback = self.results, dont_filter = True)
My desired behavior is:
1 value from Parse is sent to res_before_get and then i do smth with it.
then another values from Parse is sent to res_before_get and so on.
Post
Get
Post
Get
But currently Scrapy takes 2 values from Parse and adds them to queue , then sends 2 requests from res_before_get. Thus im getting duplicate results.
Post
Post
Get
Get
What do I miss?
P.S.
This is asp.net site. Its logic is as follows:
makes POST request with search payload.
Make GET request to get actual data.
Both request share the same sessionID
Thats why it is important to preserve the order.
At the moment im getting POST1 and POST2. And since the sessionID is associated with POST2, both GET1 and GET2 return the same page.
Scrapy works asynchronously, so you cannot expect it to respect the order of your loops or anything.
If you need it to work sequentially, you'll have to accommodate the callbacks to work like that, for example:
def parse1(self, response):
...
yield Request(..., callback=self.parse2, meta={...(necessary information)...})
def parse2(self, response):
...
if (necessary information):
yield Request(...,
callback=self.parse2,
meta={...(remaining necessary information)...},
)
So I am trying to run a defined function that is a requests.post that gets the input from a pandas dataframe and save it to the same dataframe but different column
import requests, json
import pandas as pd
import argparse
def postRequest(input, url):
'''Post response from url'''
headers = {'content-type': 'application/json'}
r = requests.post(url=url, json=json.loads(input), headers=headers)
response = r.json()
return response
def payload(text):
# get proper payload from text
std_payload = { "auth_key":"key",
"org":{ "id":org_id, "name":"org" },
"ver":{"id":ver_id, "name":"ver" },
"mess":{ "id":80}}
std_payload['message']['text'] = text
std_payload = json.dumps(std_payload)
return std_payload
def find(df):
ff=pd.DataFrame(columns=['text','expected','word','payload','response'])
count=0
for leng in range(0,len(df)):
search=df.text[leng].split()
ff.loc[count]=df.iloc[leng]
ff.loc[count,'word']='orginalphrase'
count=count+1
for w in range(0,len(search)):
if df.text[leng]=="3174":
ff.append(df.iloc[leng],ignore_index=True)
ff.loc[count,'text']="3174"
ff.loc[count,'word']=None
ff.loc[count,'expected']='[]'
continue
word=search[:]
ff.loc[count,'word']=word[w]
word[w]='z'
phrase=' '.join(word)
ff.loc[count,'text']=phrase
ff.loc[count,'expected']=df.loc[leng,'expected']
count=count+1
if df.text[leng]=="3174":
continue
return ff
# read in csv of phrases to be tested
df = pd.read_csv(filename,engine='python')
#allows empty cells by setting them to the phrase empty
df=df.fillna("3174")
sf=find(df)
for i in sf.index:
sf['payload']=payload(sf.text[i])
for index in df.index:
sf.response[index]=postRequest(df.text[index],url)
From all my tests this operation is running over the dataframe one by one which when my dataframe is large this operation can take a few hours.
Searching online for running things in parallel give me a few methods but I do not understand what the methods are doing, I have seen pooling and threading examples while i can get the examples to work. Such as:
Simultaneously run POST in Python
Asynchronous Requests with Python requests
When I try and apply them with my code, specifically I cannot get any method to work with the postRequest it still goes one by one.
Can any one provide assistance in getting the paralleling to work correctly. If more informations is required please let me know.
Thanks
Edit:
here is the last thing I was working with
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {executor.submit(postRequest, sf.payload[index],trends_url): index for index in range(10)}
counts=0
for future in concurrent.futures.as_completed(future_to_url):
repo = future_to_url[future]
data = future.result()
sf.response[count]=data
count=count+1
also the dataframe has anywhere between 2000 and 4000 rows so doing it in sequence can take up to 4 hours,