Python: binary mode vs encoding and its sha256 - python-3.x

I am again in the mysteries of python:
Can you tell me why created sha256 are not the same (release.md stays the same):
import re, os, os.path, hashlib, time, sys
with open("release.md", "rb") as f:
h = f.read()
#print(h)
print(hashlib.sha256(h).hexdigest())
with open("release.md", "r", encoding=sys.getdefaultencoding()) as f:
h = f.read()
#print(h)
print(hashlib.sha256(h.encode(sys.getdefaultencoding())).hexdigest())
Output:
8c1938c9b495afe666d41a23cb6d108b3c351d6c8b5aca7019e214df1c47e240
32b8f1a46cea09e6c358390c8a81b80e233bd6c991c010cad6ad5489362e20d3
Its python 3.9.1 on windows10

I found out, that its up to windows specific linefeed, the open function will use transitions to "\r\n"
see Parameter newline: https://docs.python.org/3/library/functions.html?highlight=open#open
import re, os, os.path, hashlib, time, sys
with open("release.md", "rb") as f:
h = f.read()
#print(h)
print(hashlib.sha256(h).hexdigest())
e=sys.getdefaultencoding()
with open("release.md", "r", encoding=e, newline="\n") as f:
h = f.read()
#print(h)
print(hashlib.sha256(h.encode(e)).hexdigest())
makes it equal.

Related

Extract a part of a string and append it to a tag

I want to find a string that starts with "section_" and add this as a value to a tag in the same line.
Example: Following is the input in a file of type ditamap.
<topicref href="xyz/debug_logging_in_xyz-section_i_y_mn.dita"/>
<topicref href="xyz/workflows_id-section_exf_zaz_lo.dita"/>
<topicref href="xyz/images_id-section_ekl_bbz_lo.dita"/>
Desired output:
<topicref href="xyz/debug_logging_in_xyz-section_i_y_mn.dita" keys="section_i_y_mn"/>
<topicref href="xyz/workflows_id-section_exf_zaz_lo.dita" keys="section_exf_zaz_lo"/>
<topicref href="xyz/images_id-section_ekl_bbz_lo.dita" keys="section_ekl_bbz_lo"/>
I understand BeautifulSoup can be used to achieve this. But, I am new and do not know the syntax. Can anyone help?
Here is the code I am trying to use:
import os
from bs4 import BeautifulSoup as bs
globpath = "C:/DATA" #add your directory path here
def main(path):
with open(path, encoding="utf-8") as f:
s = f.read()
s = bs(s, "xml")
imgs = s.find_all("topicref")
for i in imgs:
if "section" in i["href"]:
i["keys"] = i["href"].replace("*-","").replace(".dita*","")
s = str(s)
with open(path, "w", encoding="utf-8") as f:
f.write(s)
for dirpath, directories, files in os.walk(globpath):
for fname in files:
if fname.endswith(".ditamap"):
path = os.path.join(dirpath, fname)
main(path)
But, it's adding the entire path in the keys attribute. I need only the portion that starts with section and ends before .dita.
Regex worked:Here is the final code
from bs4 import BeautifulSoup as bs
import re
globpath = "C:/DATA" #add your directory path here
def main(path):
with open(path, encoding="utf-8") as f:
s = f.read()
s = bs(s, "xml")
imgs = s.find_all("topicref")
for i in imgs:
if "section" in i["href"]:
try:
i["keys"] = re.findall("section[^\.]*",i["href"])[0]
except:
print("Could not replace")
s = str(s)
with open(path, "w", encoding="utf-8") as f:
f.write(s)```
I think it should be done with Regex (cuz thats the most i can do)
from bs4 import BeautifulSoup
import re
soup = BeautifulSoup('your-string-input-of-tags-goes-here', 'html.parser')
soup.find_all('topicref', {'keys': re.compile(r'(section_([^ "])+)')})
Returns a list of matched tags
Check this code whether it works or not

Chose which function to run in script

I'm trying to make it so that the user chooses which function to run using if.
import os
import csv
import collections
import datetime
import pandas as pd
import time
import string
import re
import glob, os
folder_path = 'C:/ProgramData/WebPort/system/tags'
folder2_path = 'C:/ProgramData/WebPort/system'
search2_str = '"Prefix"'
print("Choices:\n 1 - Read from CSV\n 2 - Read from WPP")
x = input("Please enter your choice:\n")
x = int(x)
if x == 1:
csv_file_list = glob.glob(folder_path + '/*.csv')
with open("csv.txt", 'w') as wf:
for file in csv_file_list:
print(glob.glob(folder_path + '/*.csv'))
with open(file) as rf:
for line in rf:
if line.strip(): # if line is not empty
if not line.endswith("\n"):
line+="\n"
wf.write(line)
print('Reading from .csv')
elif x == 2:
for root, dirs, files in os.walk(folder2_path):
for file in files:
if file.endswith(".wpp"):
print(os.path.join(root, file))
with open(os.path.join(root, file), 'r') as fr, open ("wpp.txt",'a', encoding='utf-8') as fw:
for i,line in enumerate(fr):
if line.find(search2_str) != -1:
fw.write(line)
print('Reading from .wpp')
else:
print('wrong choice')
Getting Invalid syntax in line 34 using this.

How to write to text file in python?

I am a beginner in using python. I have created a plain text file and have to encrypt it to output file. But I am getting an error as below and unable to write it to output file. The code is running but the output file which should be encrypted is created.
#!/usr/bin/env python3
import os
import binascii
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.backends import default_backend
import argparse
def readfile_binary(file):
with open(file, 'rb') as f:
content = f.read()
return content
def writefile_binary(file, content):
with open(file, 'wb') as f:
f.write(content)
def main():
parser = argparse.ArgumentParser(description = 'Encryption and Decryption of the file')
parser.add_argument('-in', dest = 'input', required = True)
parser.add_argument('-out', dest = 'output', required = True)
parser.add_argument('-K', dest = 'key', help = 'The key to be used for encryption must be in hex')
parser.add_argument('-iv', dest = 'iv', help = 'The inintialisation vector, must be in hex')
args = parser.parse_args()
input_content = readfile_binary(args. input)
output_content = writefile_binary(args. output)
if __name__ == "__main__":
main()
The output file should be encrypted and it should be available in the directory.
These two lines:
input_content = readfile_binary(args. input)
output_content = writefile_binary(args. output)
There should not be a space in args.input. Here is an example,
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('filename')
args = parser.parse_args()
# using type hints can help reasoning about code
def write(filename: str, content: str) -> None:
with open(filename, 'wb') as f:
f.write(str.encode(content))
# if the filename was successfully parsed from stdin
if args.filename == 'filename.txt':
print(f"args: {args.filename}")
# write to the appropriate output file
write(filename=args.filename, content="content")
You might need to correct your code's indentation. Python requires indenting code within each function definition, loop, etc.
And as eric points out, there should be no spaces after the periods in args. input and args. output. Change those to args.input and args.output instead.
So:
#!/usr/bin/env python3
import os
import binascii
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.backends import default_backend
import argparse
def readfile_binary(file):
with open(file, 'rb') as f:
content = f.read()
return content
def writefile_binary(file, content):
with open(file, 'wb') as f:
f.write(content)
def main():
parser = argparse.ArgumentParser(description = 'Encryption and Decryption of the file')
parser.add_argument('-in', dest = 'input', required = True)
parser.add_argument('-out', dest = 'output', required = True)
parser.add_argument('-K', dest = 'key', help = 'The key to be used for encryption must be in hex')
parser.add_argument('-iv', dest = 'iv', help = 'The inintialisation vector, must be in hex')
args = parser.parse_args()
input_content = readfile_binary(args.input)
output_content = writefile_binary(args.output)
if __name__ == "__main__":
main()

Unable to read data about FCN,i don't know how to do.the result is:Found pickle file! 0 0

import numpy as np
import os
import random
from six.moves import cPickle as pickle
from tensorflow.python.platform import gfile
import glob
import TensorflowUtils as utils
DATA_URL = 'http:\\data.csail.mit.edu\\places\\ADEchallenge\\ADEChallengeData2016.zip'
#download and read dataset
def read_dataset(data_dir):
pickle_filename = "MITSceneParsing.pickle"
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True)
SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0]
result = create_image_lists(os.path.join(data_dir, SceneParsing_folder))
print ("Pickling ...")
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
else:
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f:
result = pickle.load(f)
training_records = result['training']
validation_records = result['validation']
del result
return training_records, validation_records
train_records, valid_records = read_dataset('Data_zoo/MIT_SceneParsing')
print(len(train_records))
print(len(valid_records))
the result is:Found pickle file! 0 0
why the lens about train_records and valid_records are 0?
i don't know whree is wrong and how to correct it.
This code is right. The bug is in 'create_image_lists'.
Note this code in create_image_lists:
filename = os.path.splitext(f.split('/')[-1])[0]
This is no problem in Linux, but in windows, the separator is '\\', so you should modify this code to:
filename = os.path.splitext(f.split('\\')[-1])[0]
Then delete this file 'MITSceneParsing.pickle', and run read_dataset again.

segmentation fault in python3

I am running python3 on a Ubuntu machine and have noticed that the following block of code is fickle. Sometimes it runs just fine, other times it produces a segmentation fault. I don't understand why. Can someone explain what might be going on?
Basically what the code does is try to read S&P companies from Wikipedia and write the list of tickers to a file in the same directory as the script. If no connection to Wikipedia can be established, the script tries instead to read an existing list from file.
from urllib import request
from urllib.error import URLError
from bs4 import BeautifulSoup
import os
import pickle
import dateutil.relativedelta as dr
import sys
sys.setrecursionlimit(100000)
def get_standard_and_poors_500_constituents():
fname = (
os.path.abspath(os.path.dirname(__file__)) + "/sp500_constituents.pkl"
)
try:
# URL request, URL opener, read content.
req = request.Request(
"http://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
)
opener = request.urlopen(req)
# Convert bytes to UTF-8.
content = opener.read().decode()
soup = BeautifulSoup(content, "lxml")
# HTML table we actually need is the first.
tables = soup.find_all("table")
external_class = tables[0].findAll("a", {"class":"external text"})
c = [ext.string for ext in external_class if not "reports" in ext]
with open(fname, "wb") as f:
pickle.dump(c, f)
except URLError:
with open(fname, "rb") as f:
c = pickle.load(f)
finally:
return c
sp500_constituents = get_standard_and_poors_500_constituents()
spdr_etf = "SPY"
sp500_index = "^GSPC"
def main():
X = get_standard_and_poors_500_constituents()
print(X)
if __name__ == "__main__":
main()

Resources