global twitter_user_info
twitter_user_info=[]
def get_user_info(twitter_user):
"""
An example of using the query_user_info method
:param twitter_user: the twitter user to capture user data
:return: twitter_user_data: returns a dictionary of twitter user data
"""
user_info = query_user_info(user=twitter_user)
twitter_user_data = {}
twitter_user_data["user"] = user_info.user
twitter_user_data["fullname"] = user_info.full_name
twitter_user_data["location"] = user_info.location
twitter_user_data["blog"] = user_info.blog
twitter_user_data["date_joined"] = user_info.date_joined
twitter_user_data["id"] = user_info.id
twitter_user_data["num_tweets"] = user_info.tweets
twitter_user_data["following"] = user_info.following
twitter_user_data["followers"] = user_info.followers
twitter_user_data["likes"] = user_info.likes
twitter_user_data["lists"] = user_info.lists
return twitter_user_data
absd=[]
def main():
start = time.time()
csv = pd.read_csv('operationbandar_users.csv')
users = csv['username']
pool = Pool(4)
for user in pool.map(get_user_info,users):
twitter_user_info.append(user)
cols=['id','fullname','date_joined','location','blog', 'num_tweets','following','followers','likes','lists']
data_frame = pd.DataFrame(twitter_user_info, index=absd, columns=cols)
data_frame.index.name = "Users"
data_frame.sort_values(by="followers", ascending=False, inplace=True, kind='quicksort', na_position='last')
elapsed = time.time() - start
print(f"Elapsed time: {elapsed}")
display(data_frame)
This code is returning the following error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, *kwds))
File "/home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py", line 44, in mapstar
return list(map(*args))
File "examples/get_twitter_user_data_1.py", line 43, in get_user_info
twitter_user_data["user"] = user_info.user
AttributeError: 'NoneType' object has no attribute 'user'
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "examples/get_twitter_user_data_1.py", line 89, in
main()
File "examples/get_twitter_user_data_1.py", line 66, in main
for user in pool.map(get_user_info,users):
File "/home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "/home/nrjkumar/anaconda3/envs/Scraping/lib/python3.7/multiprocessing/pool.py", line 657, in get
raise self._value
AttributeError: 'NoneType' object has no attribute 'user'
I have searched here for possible cases with pool.map and passing dictionary as parameter but couldn't find the issue. I am a newbie to python. Can anybody help?
query_user_info()
def query_user_info(user):
"""
Returns the scraped user data from a twitter user page.
:param user: the twitter user to web scrape its twitter page info
"""
try:
user_info = query_user_page(INIT_URL_USER.format(u=user))
if user_info:
logger.info("Got user information from username {}".format(user))
return user_info
except KeyboardInterrupt:
logger.info("Program interrupted by user. Returning user information gathered so far...")
except BaseException:
logger.exception("An unknown error occurred! Returning user information gathered so far...")
logger.info("Got user information from username {}".format(user))
return user_info
query_user_page()
def query_user_page(url, retry=10, timeout=60):
"""
Returns the scraped user data from a twitter user page.
:param url: The URL to get the twitter user info from (url contains the user page)
:param retry: Number of retries if something goes wrong.
:return: Returns the scraped user data from a twitter user page.
"""
print("reached url:",url)
try:
proxy = next(proxy_pool)
logger.info('Using proxy {}'.format(proxy))
response = requests.get(url, headers=HEADER, proxies={"http": proxy})
html = response.text or ''
user_info = User.from_html(html)
if not user_info:
return None
return user_info
except requests.exceptions.HTTPError as e:
logger.exception('HTTPError {} while requesting "{}"'.format(
e, url))
except requests.exceptions.ConnectionError as e:
logger.exception('ConnectionError {} while requesting "{}"'.format(
e, url))
except requests.exceptions.Timeout as e:
logger.exception('TimeOut {} while requesting "{}"'.format(
e, url))
if retry > 0:
logger.info('Retrying... (Attempts left: {})'.format(retry))
return query_user_page(url, retry-1)
logger.error('Giving up.')
return None
Related
I am a beginner in Python, and trying out making a bot which automatically Tweets anything which is posted on a Subreddit that I have made.
I took help from some of the tutorials online which has the following code
import praw
import json
import requests
import tweepy
import time
access_token = '************************************'
access_token_secret = '************************************'
consumer_key = '************************************'
consumer_secret = '************************************'
def strip_title(title):
if len(title) == 94:
return title
else:
return title[:93] + "..."
def tweet_creator(subreddit_info):
post_dict = {}
post_ids = []
print("[bot] Getting posts from Reddit")
for submission in subreddit_info.get_hot(limit=20):
post_dict[strip_title(submission.title)] = submission.url
post_ids.append(submission.id)
print("[bot] Generating short link using goo.gl")
mini_post_dict = {}
for post in post_dict:
post_title = post
post_link = post_dict[post]
short_link = shorten(post_link)
mini_post_dict[post_title] = short_link
return mini_post_dict, post_ids
def setup_connection_reddit(subreddit):
print("[bot] setting up connection with Reddit")
r = praw.Reddit(' %s' %(subreddit))
subreddit = r.get_subreddit(subreddit)
return subreddit
def shorten(url):
headers = {'content-type': 'application/json'}
payload = {"longUrl": url}
url = "https://www.googleapis.com/urlshortener/v1/url"
r = requests.post(url, data=json.dumps(payload), headers=headers)
link = json.loads(r.text)['id']
return link
def duplicate_check(id):
found = 0
with open('posted_posts.txt', 'r') as file:
for line in file:
if id in line:
found = 1
return found
def add_id_to_file(id):
with open('posted_posts.txt', 'a') as file:
file.write(str(id) + "\n")
def main():
subreddit = setup_connection_reddit('*Name of the subreddit*')
post_dict, post_ids = tweet_creator(subreddit)
tweeter(post_dict, post_ids)
def tweeter(post_dict, post_ids):
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
for post, post_id in zip(post_dict, post_ids):
found = duplicate_check(post_id)
if found == 0:
print("[bot] Posting this link on twitter")
print(post + " " + post_dict[post] + " #Python #reddit #bot")
api.update_status(post+" "+post_dict[post]+" #Python #reddit #bot")
add_id_to_file(post_id)
time.sleep(30)
else:
print("[bot] Already posted")
if __name__ == '__main__':
main()
The code seems fine in PyCharm, however I am getting the following error when I try to run it directly from the folder via Terminal using the rolling code, reddit_bot2.py is my file name:
python3 reddit_bot2.py
When I try to run the code I am getting the following error:
mahesh#Maheshs-MacBook-Air Atoms % python3 reddit_bot2.py
[bot] setting up connection with Reddit
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/configparser.py", line 846, in items
d.update(self._sections[section])
KeyError: '**Name of the subreddit to fetch posts from**'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/mahesh/Python_Bot/Atoms/reddit_bot2.py", line 82, in <module>
main()
File "/Users/mahesh/Python_Bot/Atoms/reddit_bot2.py", line 62, in main
subreddit = setup_connection_reddit('Bot167')
File "/Users/mahesh/Python_Bot/Atoms/reddit_bot2.py", line 36, in setup_connection_reddit
r = praw.Reddit(' %s' %(subreddit))
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/praw/reddit.py", line 227, in __init__
self.config = Config(
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/praw/config.py", line 85, in __init__
self.custom = dict(Config.CONFIG.items(site_name), **settings)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/configparser.py", line 849, in items
raise NoSectionError(section)
configparser.NoSectionError: No section: ' Bot167'
You provided the name of a praw.ini configuration which does not exist.
For help with creating a Reddit instance, visit
https://praw.readthedocs.io/en/latest/code_overview/reddit_instance.html
For help on configuring PRAW, visit
https://praw.readthedocs.io/en/latest/getting_started/configuration.html
Any help in this regards would be highly appreciated.
Thanks :)
I'm trying to crete linebot through python Django, I want to send some messages and let it scrape the website. Since there is form on the website, I use post request to send the form.
Although I scrape the data successfully, there is error message showed in the python. It seems linebot occupy post method in Django and I send another post request again. I'm not sure my understanding is correct. I can't find any solution about this. Could someone teach me how to fix it?
Exception happened during processing of request from ('127.0.0.1', 50246)
Traceback (most recent call last):
File "/usr/local/anaconda3/lib/python3.8/socketserver.py", line 650, in process_request_thread
self.finish_request(request, client_address)
File "/usr/local/anaconda3/lib/python3.8/socketserver.py", line 360, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/usr/local/anaconda3/lib/python3.8/socketserver.py", line 720, in __init__
self.handle()
File "/usr/local/anaconda3/lib/python3.8/site-packages/django/core/servers/basehttp.py", line 174, in handle
self.handle_one_request()
File "/usr/local/anaconda3/lib/python3.8/site-packages/django/core/servers/basehttp.py", line 182, in handle_one_request
self.raw_requestline = self.rfile.readline(65537)
File "/usr/local/anaconda3/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [Errno 54] Connection reset by peer
Below is my code, I receive some keyword and post request to website. Finally, reply to user
#csrf_exempt
def callback(request):
if request.method == 'POST':
signature = request.META['HTTP_X_LINE_SIGNATURE']
body = request.body.decode('utf-8')
content = "None"
try:
events = parser.parse(body, signature)
except InvalidSignatureError:
return HttpResponseForbidden()
except LineBotApiError:
return HttpResponseBadRequest()
for event in events:
if isinstance(event, MessageEvent):
msg = event.message.text.strip()
if msg.startswith('!'):
msg = msg.replace('!', '')
if msg == 'temp':
content = "HELP"
elif msg.startswith(' '):
content = 'Command not found'
elif ' ' in msg:
info = msg.split(' ')
if len(info) > 2:
content = 'Too many arguments'
else:
ID = info[1]
temp = temperature.TempReport(ID)
content = temp.scrape()
#content = 'Test'
else:
content = 'Unknown command'
line_bot_api.reply_message(
event.reply_token,
TextSendMessage(text=content)
)
print('submit success')
else:
line_bot_api.reply_message(
event.reply_token,
TextSendMessage(text=msg)
)
return HttpResponse()
else:
return HttpResponseBadRequest()
Here is scrape code
def scrape(self):
if not self.postToServer():
return f'ID:{self.ID} submit temperature fail!'
return f'ID:{self.ID} submit temperature successfully
def postToServer(self):
self.session.post(self.url_in, data=self.payload)
sleep(0.1)
resp = self.session.get(self.url_out)
sleep(0.1)
soup = BeautifulSoup(resp.text, features='lxml')
result = soup.find(class_='title-text').text.strip()
return 0 if not 'completed' in result else
The post request work fine and the error happened when I return HttpResponse in callback function. I don't know what is the issue here...
(first post sorry if i do this wrong) So I am making a bot (on discord) for me and my friends using discord.py (since python is the easiest code ever) and I've came across this. I need to get values from keys INSIDE OTHER keys. How do I do this?
So I've tried to change res to res.text and res.json and res.content and I could only find the "data" but not "id","name" or "description" which I need.
import discord
from discord.ext.commands import Bot
from discord.ext import commands
import requests, json
import asyncio
Client = discord.Client()
client = commands.Bot(command_prefix='?')
#client.event
async def on_ready():
print('started')
#client.command()
async def findfriends(ctx,userid):
res = requests.get("https://friends.roblox.com/v1/users/"+userid+"/friends")
var = json.loads(res.text)
def a(a):
ID = a['id']
return ID
def b(b):
Name = b['name']
return Name
def c(c):
description = c['description']
return description
data = var['data'] #I can get this working
print(data)
#cv = data['name'] #but this wont work
#id = a(var) #nor this
#name = b(var) #nor this
#desc = c(var) #nor this
#await ctx.send("\nID: " + id + "\nName: " + name + "\nDesc: " + desc) # this is just sending the message
client.run(BOT TOKEN HERE) #yes i did indeed add it but just for the question i removed it
As I said in the code, I can only get "data" working and not id,name or desc. For id name and desc it just throws an error
Ignoring exception in command findfriends:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 79, in wrapped
ret = await coro(*args, **kwargs)
File "C:/Users/Calculator/PycharmProjects/ryhrthrthrhrebnfbngfbfg/a.py", line 277, in findfriends
id = a(var) #nor this
File "C:/Users/Calculator/PycharmProjects/ryhrthrthrhrebnfbngfbfg/a.py", line 266, in a
ID = a['id']
KeyError: 'id'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\bot.py", line 863, in invoke
await ctx.command.invoke(ctx)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 728, in invoke
await injected(*ctx.args, **ctx.kwargs)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 88, in wrapped
raise CommandInvokeError(exc) from exc
discord.ext.commands.errors.CommandInvokeError: Command raised an exception: KeyError: 'id'
and
Ignoring exception in command findfriends:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 79, in wrapped
ret = await coro(*args, **kwargs)
File "C:/Users/Calculator/PycharmProjects/ryhrthrthrhrebnfbngfbfg/a.py", line 274, in findfriends
data = var['data']['id'] #I can get this working
TypeError: list indices must be integers or slices, not str
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\bot.py", line 863, in invoke
await ctx.command.invoke(ctx)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 728, in invoke
await injected(*ctx.args, **ctx.kwargs)
File "C:\Users\Calculator\PycharmProjects\ryhrthrthrhrebnfbngfbfg\venv\lib\site-packages\discord\ext\commands\core.py", line 88, in wrapped
raise CommandInvokeError(exc) from exc
discord.ext.commands.errors.CommandInvokeError: Command raised an exception: TypeError: list indices must be integers or slices, not str
The https://friends.roblox.com/v1/users/<userid>/friends endpoint returns a list of all the friends that the user has, which can be of varying size.
With var = json.loads(res.text) you are loading the response text into a json object, which contains the key data, which you access using data = var['data']. The new data variable now contains a list object, which is why cv = data['name'] fails to work as list objects do not take strings as keys, they are accessed using integers.
You need to iterate over the list to get all information on the friends. The below code goes through the list, pulls information for each item in the list and sends a message of the information once it has gone through all items.
import discord
from discord.ext.commands import Bot
from discord.ext import commands
import requests, json
import asyncio
client = commands.Bot(command_prefix='?')
#client.event
async def on_ready():
print('started')
#client.command()
async def findfriends(ctx,userid):
res = requests.get("https://friends.roblox.com/v1/users/"+userid+"/friends")
var = json.loads(res.text)
data = var['data']
print(data)
friends_msg = 'Friends information:'
for friend in data:
id = friend['id']
name = friend['name']
desc = friend['description']
friends_msg = friends_msg + "\nID: " + id + "\nName: " + name + "\nDesc: " + desc
await ctx.send(friends_msg)
client.run(BOT TOKEN HERE)
I have following code which creates a testrail client and executes testrail's GET_SUITES API call.
I have a function to call the GET_SUITES API and I am passing testrail client & test_rail_project_id as params
I am trying to use multiprocessing to execute over my list of projects to speed up things and I am can't pickle error
My code:
from itertools import product
def get_suites(client, project_id):
try:
path = 'get_suites/{projectid}'.format(projectid=project_id)
test_rail_response = client.send_get(path)
return test_rail_response
except Exception as e:
raise Exception(str(e))
if __name__ == "__main__":
testRailClient = APIClient(TESTRAIL_URL)
pool = Pool(2)
all_project_ids = [100, 200, 300]
data = pool.starmap(get_suites, product([testRailClient], all_project_ids))
Error stack:
Traceback (most recent call last):
File "main.py", line 57, in <module>
data = pool.starmap(testrailapi.get_suites, product([testRailClient], all_project_ids))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 274, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 644, in get
raise self._value
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/pool.py", line 424, in _handle_tasks
put(task)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: can't pickle SSLContext objects
Any suggestions please?
Thank you
PS: I am using Python3.6
UPDATE:
As suggested I tried removing the API client as a parameter and it worked but I am getting the same error when I have "get_suites" as a method. Please see my updated code below
class TestRailExecution:
def __init__(self, url, username, password):
self.url = url
self.username = username
self.password = password
self.client = APIClient(self.url)
self.client.user = username
self.client.password = password
def get_suites(self, project_id):
try:
path = 'get_suites/{projectid}'.format(projectid=project_id)
test_rail_response = self.client.send_get(path)
return test_rail_response
except Exception as e:
raise Exception(str(e))
if __name__ == "__main__":
testRailClient = TestRailExecution(TESTRAIL_URL, user, password)
pool = Pool(2)
data = pool.map(get_suites, [100, 200, 300])
Hi i am writing a n/w bound server application using python asyncio which can accept a post request.
In post request i am accepting a symbol parameter
please tell me the best way to deal with n/w bound application.where i am collecting the data from another web api's by sending the post request to them.
Following is the code :
import asyncio
import aiohttp
import json
import logging
# async def fetch_content(url, symbols):
# yield from aiohttp.post(url, symbols=symbols)
#asyncio.coroutine
def fetch_page(writer, url, data):
response = yield from aiohttp.post(url, data=data)
resp = yield from response.read_and_close()
print(resp)
writer.write(resp)
return
#asyncio.coroutine
def process_payload(writer, data, scale):
tasks = []
data = data.split('\r\n\r\n')[1]
data = data.split('\n')
data = [x.split(':') for x in data]
print(data)
data = {x[0]: x[1] for x in data}
print(data)
# data = data[0].split(':')[1]
data = data['symbols']
print(data)
data = data.split(',')
data_len = len(data)
data_first = 0
data_last = scale
url = 'http://xxxxxx.xxxxxx.xxx/xxxx/xxxx'
while data_last < data_len:
tasks.append(asyncio.ensure_future(fetch_page(writer, url,{'symbols': ",".join(data[data_first:data_last])})))
data_first += scale
data_last += scale
tasks.append(asyncio.ensure_future(fetch_page(writer, url,{'symbols': ",".join(data[data_first:data_last])})))
loop.run_until_complete(tasks)
return
#asyncio.coroutine
def process_url(url):
pass
#asyncio.coroutine
def echo_server():
yield from asyncio.start_server(handle_connection, 'xxxxxx.xxxx.xxx', 3000)
#asyncio.coroutine
def handle_connection(reader, writer):
data = yield from reader.read(8192)
if data:
message = data.decode('utf-8')
print(message)
yield from process_payload(writer, message, 400)
writer.write_eof()
writer.close()
#url = 'http://XXXXXXX.xxxxx.xxx/xxxx/xxxxxx/xxx'
data = {'symbols': 'GD-US,14174T10,04523Y10,88739910,03209R10,46071F10,77543110,92847N10'}
loop = asyncio.get_event_loop()
loop.run_until_complete(echo_server())
try:
loop.run_forever()
finally:
loop.close()
But i am receiving the following error:
future: <Task finished coro=<handle_connection() done, defined at fql_server_async_v2.py:53> exception=AttributeError("'module' object has no attribute 'ensure_future'",)>
Traceback (most recent call last):
File "/home/user/anugupta/lib/python3.4/asyncio/tasks.py", line 234, in _step
result = coro.send(value)
File "fql_server_async_v2.py", line 60, in handle_connection
yield from process_payload(writer, message, 400)
File "/home/user/anugupta/lib/python3.4/asyncio/coroutines.py", line 141, in coro
res = func(*args, **kw)
File "fql_server_async_v2.py", line 41, in process_payload
tasks.append(asyncio.ensure_future(fetch_page(writer, url, {'symbols':",".join(data[data_first:data_last])})))
AttributeError: 'module' object has no attribute 'ensure_future'
^CTraceback (most recent call last):
File "fql_server_async_v2.py", line 72, in <module>
loop.run_forever()
File "/home/user/anugupta/lib/python3.4/asyncio/base_events.py", line 236, in run_forever
self._run_once()
File "/home/user/anugupta/lib/python3.4/asyncio/base_events.py", line 1017, in _run_once
event_list = self._selector.select(timeout)
File "/home/user/anugupta/lib/python3.4/selectors.py", line 424, in select
fd_event_list = self._epoll.poll(timeout, max_ev)
ensure_future was added in asyncio 3.4.4, use async for earlier versions.
While async is deprecated now it will be supported in oversable future.