Is there anyway to Handle Empty response in Python? - python-3.x

I have been trying to fetch recommendation details from API Like top 5 matching
Colab code for in detail understanding
cached based API calling
Input : Smile value : O=C(N1C=CN=C1)NC2=NC(C)=C(C3=CC=C(S(=O)(C)=O)C(F)=C3)S2
output
Reac = NC1=CSC=C1C(OC)=O.O=C(Cl)CCl Reag = CCN-CC.C(Cl)Cl
Reac = NC1=CSC=C1C(OC)=O.O=C(Cl)CCl Reag = []
Reac = NC1=CSC=C1C(OC)=O.O=C(Cl)CCl Reag = CCN-CC.C(Cl)Cl
Reac = NC1=CSC=C1C(OC)=O.O=C(Cl)CCl Reag = []
This is value
reac - ['O=C(OC(C)(C)C)NCCC(NC(C)(C)C)=O', 'O=[C]C(F)(F)F']
reag - ['']
While got Empty value in reag variable it throws error message
File "C:\ProgramData\Anaconda3\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
My actual logic is here
from cachetools import cached
from diskcache import Cache
BASE_URL = "https://ai.chemistryinthecloud.com/smilies/"
CACHEDIR = "api_cache"
#cached(cache=Cache(CACHEDIR))
def get_similies(value):
return requests.get(BASE_URL + value).json()
def parse_eq(eq):
reac, _, reag = (list(v.split(".")) for v in eq.partition(">"))
print("reac -",reac)
print("reag -",reag)
print(type(reag[0]))
return {
"reactants": {i: get_similies(i) for i in reac},
"reagents": {i: get_similies(i) for i in reag if i is not None}
}
result = [parse_eq(eqs) for eqs in l]
How Can I skip the empty value and go to the next loop value?

Related

Plotting multiple lines with a Nested Dictionary, and unknown variables to Line Graph

I was able to find somewhat of an answer to my question, but it was not as nested as my dictionary and so I am really unsure how to proceed as I am still very new to python. I currently have a nested dictionary like
{'140.10': {'46': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}, '28': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}}}:
I am wanting to plot it so that '140.10' becomes the title of the graph and '46' and '28' become the individual lines and key '1' for example is on the y axis and the x axis is the final number (in this case '-49.50918). Essentially a graph like this:
I generated this graph with a csv file that is written at another part of the code just with excel:
[![enter image description here][2]][2]
The problem I am running into is that these keys are autogenerated from a larger csv file and I will not know their exact value until the code has been run. As each of the keys are autogenerated in an earlier part of the script. As I will be running it over various files called the Graph name, and each file will have a different values for:
{key1:{key2_1: {key3_1: value1, key3_2: value2, key3_3: value3}, key_2_2 ...}}}
I have tried to do something like this:
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in time_an_dict:
atom = list(time_an_dict[s])
ion = time_an_dict[s]
for f in time_an_dict[s]:
x_val = []
y_val = []
fz = ion[f]
for i in time_an_dict[s][f]:
pos = (fz[i])
frame = i
y_val.append(frame)
x_val.append(pos)
'''ions = atom
frame = frames
position = pos
plt.plot(frame, position, label = frames)
plt.xlabel("Frame")
plt.ylabel("Position")
plt.show()
#plt.savefig('{}_Pos.png'.format(s))'''
But it has not run as intended.
I have also tried:
for filename in os.listdir(Directory):
if filename.endswith('_Atom.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
new = '{}_A_pos.csv'.format(s)
ions = list(time_an_dict.values())[0].keys()
for i in ions:
x_axis_values = []
y_axis_values = []
frame = list(time_an_dict[s][i])
x_axis_values.append(frame)
empty = []
print(x_axis_values)
for x in frame:
values = time_an_dict[s][i][x]
empty.append(values)
y_axis_values.append(empty)
plt.plot(x_axis_values, y_axis_values, label = x )
plt.show()
But keep getting the error:
Traceback (most recent call last): File "Atoms_pos.py", line 175, in
plt.plot(x_axis_values, y_axis_values, label = x ) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py",
line 2840, in plot
return gca().plot( File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_axes.py",
line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)] File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py",
line 273, in call
yield from self._plot_args(this, kwargs) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py",
line 394, in _plot_args
self.axes.xaxis.update_units(x) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axis.py",
line 1466, in update_units
default = self.converter.default_units(data, self) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 107, in default_units
axis.set_units(UnitData(data)) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 176, in init
self.update(data) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 209, in update
for val in OrderedDict.fromkeys(data): TypeError: unhashable type: 'numpy.ndarray'
Here is the remainder of the other parts of the code that generate the files and dictionaries I am using. I was told in another question I asked that this could be helpful.
# importing dependencies
import math
import sys
import pandas as pd
import MDAnalysis as mda
import os
import numpy as np
import csv
import matplotlib.pyplot as plt
################################################################################
###############################################################################
Directory = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/Blah'
os.chdir(Directory)
################################################################################
''' We are only looking at the positions of the CLAs and SODs and not the DRUDE counterparts. We are assuming the DRUDE
are very close and it is not something that needs to be concerned with'''
def Positions(dcd, topo):
fields = ['Window', 'ION', 'ResID', 'Location', 'Position', 'Frame', 'Final']
with open('{}_Atoms.csv'.format(s), 'a') as d:
writer = csv.writer(d)
writer.writerow(fields)
d.close()
CLAs = u.select_atoms('segid IONS and name CLA')
SODs = u.select_atoms('segid IONS and name SOD')
CLA_res = len(CLAs)
SOD_res = len(SODs)
frame = 0
for ts in u.trajectory[-10:]:
frame +=1
CLA_pos = CLAs.positions[:,2]
SOD_pos = SODs.positions[:,2]
for i in range(CLA_res):
ids = i + 46
if CLA_pos[i] < 0:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'CLA', ids, 'Bottom', CLA_pos[i], frame,10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
else:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'CLA', ids, 'Top', CLA_pos[i], frame, 10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
for i in range(SOD_res):
ids = i
if SOD_pos[i] < 0:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'SOD', ids, 'Bottom', SOD_pos[i], frame,10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
else:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'SOD', ids, 'Top', SOD_pos[i], frame, 10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
csv_Data = pd.read_csv('{}_Atoms.csv'.format(s))
filename = s + '_Atom.csv'
sorted_df = csv_Data.sort_values(["ION", "ResID", "Frame"],
ascending=[True, True, True])
sorted_df.to_csv(filename, index = False)
os.remove('{}_Atoms.csv'.format(s))
''' this function underneath looks at the ResIds, compares them to make sure they are the same and then counts how many
times the ion flip flops around the boundaries'''
def turn_dict(f):
read = open(f)
reader = csv.reader(read, delimiter=",", quotechar = '"')
my_dict = {}
new_list = []
for row in reader:
new_list.append(row)
for i in range(len(new_list[:])):
prev = i - 1
if new_list[i][2] == new_list[prev][2]:
if new_list[i][3] != new_list[prev][3]:
if new_list[i][2] in my_dict:
my_dict[new_list[i][2]] += 1
else:
my_dict[new_list[i][2]] = 1
return my_dict
def plot_flips(f):
dict = turn_dict(f)
ions = list(dict.keys())
occ = list(dict.values())
plt.bar(range(len(dict)), occ, tick_label = ions)
plt.title("{}".format(s))
plt.xlabel("Residue ID")
plt.ylabel("Boundary Crosses")
plt.savefig('{}_Flip.png'.format(s))
def analyze_time(f, dicts):
read = open(f)
reader = csv.reader(read, delimiter=",", quotechar='"')
new_list = []
keys = list(dicts.keys())
time_dict = {}
pos_matrix = {}
for row in reader:
new_list.append(row)
fields = ['ResID', 'Position', 'Frame']
with open('{}_A_pos.csv'.format(s), 'a') as k:
writer = csv.writer(k)
writer.writerow(fields)
k.close()
for i in range(len(new_list[:])):
if new_list[i][2] in keys:
with open('{}_A_pos.csv'.format(s), 'a') as k:
new_line = [new_list[i][2], new_list[i][4], new_list[i][5]]
writes = csv.writer(k)
writes.writerow(new_line)
k.close()
read = open('{}_A_pos.csv'.format(s))
reader = csv.reader(read, delimiter=",", quotechar='"')
time_list = []
for row in reader:
time_list.append(row)
for j in range(len(keys)):
for i in range(len(time_list[1:])):
if time_list[i][0] == keys[j]:
pos_matrix[time_list[i][2]] = time_list[i][1]
time_dict[keys[j]] = pos_matrix
return time_dict
window_dict = {}
for filename in os.listdir(Directory):
s = filename.split('.dcd')[0]
fors = s + '.txt'
topos = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/topo.psf'
if filename.endswith('.dcd'):
print('We are starting with {} \n '.format(s))
u = mda.Universe(topos, filename)
Positions(filename, topos)
name = s + '_Atom.csv'
plot_flips(name)
window_dict[s] = turn_dict(name)
continue
time_an_dict = {}
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in time_an_dict:
atom = list(time_an_dict[s])
ion = time_an_dict[s]
for f in time_an_dict[s]:
x_val = []
y_val = []
fz = ion[f]
for i in time_an_dict[s][f]:
pos = (fz[i])
frame = i
y_val.append(frame)
x_val.append(pos)
'''ions = atom
frame = frames
position = pos
plt.plot(frame, position, label = frames)
plt.xlabel("Frame")
plt.ylabel("Position")
plt.show()
#plt.savefig('{}_Pos.png'.format(s))'''
Everything here runs well except this last bottom block of code. That deals with trying to make a graph from a nested dictionary. Any help would be appreciated!
Thanks!
I figured out the answer:
for filename in os.listdir(Directory):
if filename.endswith('_Atom.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
new = '{}_A_pos.csv'.format(s)
ions = list(time_an_dict[s])
plt.yticks(np.arange(-50, 50, 5))
plt.xlabel('Frame')
plt.ylabel('Z axis position(Ang)')
plt.title([s])
for i in ions:
x_value = []
y_value = []
time_frame =len(time_an_dict[s][i]) +1
for frame in range(1,time_frame):
frame = str(frame)
x_value.append(int(frame))
y_value.append(float(time_an_dict[s][i][frame]))
plt.plot(x_value, y_value, label=[i])
plt.xticks(np.arange(1, 11, 1))
plt.legend()
plt.savefig('{}_Positions.png'.format(s))
plt.clf()
os.remove("{}_A_pos.csv".format(s))
From there, with the combo of the other parts of the code, it produces these graphs:
For more than 1 file as long as there is more '.dcd' files.

Pytest unittest function doesn't return any value

Could you please help me understand issue with below unittest?
Here's my function for which i am writing unittest.
def running_config_from_database(device):
try:
data = databaseproxy(cluster='https://xxx.xxxx.xxx.net')
datadb = 'test'
query = f'''fGetrunningconfigData('{device}')
'''
raw_data = data.execute_query(datadb, query)
# pdb.set_trace()
for items in raw_data.fetchall():
config = items['Config'].split('\r\n')
for index, line in enumerate(config):
if '$PASS$' in line:
if line.startswith('set groups ospf_test'):
config_line = line.replace('$PASS$', get_auth('ospf'))
config[index] = config_line
elif line.startswith('set groups rip_test'):
config_line = line.replace('$PASS$', get_auth('rsvp'))
config[index] = config_line
config = config + overload_config
return True, '\r\n'.join(config)
except Exception as e:
return False, f'Failed to get the running config from database, error: {e}'
Here's my unittest for this function:
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
assert status and out1 == data
When I run this unittest to test the function, I get the below assertion error - looks like the function doesn't return any value.
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
> assert status and out1 == data
E AssertionError: assert (True and '' == 'set groups d...rload_config2'
E + set groups ospf_test secret xyz
E +
E + sample_overload_config1
E + sample_overload_config2)
tests/test_scripts.py:80: AssertionError
I edited my function to reduce the complexity but it still doesn't work. not sure why.
Main Function:
==============
def running_config_from_database(device):
try:
pdb.set_trace()
config = running_config_database(device)
for index, line in enumerate(config):
if '$PASS$' in line:
if line.startswith('set groups ospf_test'):
config_line = line.replace('$PASS$', get_cred('ospf'))
config[index] = config_line
config = config + overload_config
return True, '\r\n'.join(config)
except Exception as e:
return False, f'Failed to get the running config from Database, error: {e}'
UnitTest Result for above Function:
=========================================================================================================== FAILURES ============================================================================================================
________________________________________________________________________________________________ test_running_config_from_database _________________________________________________________________________________________________
mock_cred = <MagicMock name='get_cred' id='140210277622336'>, mock_overload = ['sample_overload_config1', 'sample_overload_config2'], mock_running_config = <MagicMock name='running_config_database' id='140210277652128'>
#patch("test.test1.scripts.running_config_database")
#patch("test.test1.scripts.overload_config")
#patch("test.test1.scripts.get_cred")
def test_running_config_from_database(mock_cred, mock_overload, mock_running_config):
mock_running_config.return_value = ['set groups ospf_test secret $PASS$', '']
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = test.test1.scripts.test_running_config_from_database('devA')
> assert status and out1 == data
E AssertionError: assert (True and '' == 'set groups d...rload_config2'
E + set groups ospf_test secret xyz
E +
E + sample_overload_config1
E + sample_overload_config2)
validation_tests/test_scripts.py:152: AssertionError
================================================================================================== 1 failed, 6 passed in 4.79s ==================================================================================================
The problem here is the assignment to mock_overload. If you want to adapt your mocked object you have to make sure that the object itself is changed. If you just assign another object (in this case, a list), your variable now points to the list object, while the original mock_overload is no longer referenced (and is not changed). So instead of writing:
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
you can for example write
mock_overload[:] = ['sample_overload_config1', 'sample_overload_config2']
For clarification, here is a simplified version of the original code:
>>> mock_overload = []
>>> id(mock_overload)
1477793866440
>>> mock_overload = [5, 6]
>>> id(mock_overload)
1477791015560 <- changed id, no longer pointing to the mock
Now the same with the fixed code:
>>> mock_overload = []
>>> id(mock_overload)
140732764763024
>>> mock_overload[:] = [5, 6]
>>> id(mock_overload)
140732764763024 <- unchanged id, still points to the mock
Note that mock_overload[:] = [5, 6] is basically a shortcut for:
mock_object.clear()
mock_object.extend([5, 6])
Answer is already provided in comment section by #MrBean Bremen. here's the UT after making changes suggested.
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
***mock_overload[:]*** = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
assert status and out1 == data

IMAP4LIB When using the store command I get the error "BAD [b'Could not parse command']"

I am new to all of this so I'm sorry if I mess this up or have already made a mess. I have two classes a GUI and my MailSorter class in the GUI class I have method which logins, then one that fetches all the EmailIds then finally fetches all the From emails and stores it in a dict. which stores the From email and amount of times it appears and an array with the From email and the ID.
def fetchFrom(self,emailIDs):
EmailAmount = dict()
Email = []
count = 0
for emailId in emailIDs:
#Converts email into string
result2,email_data = self.mail.fetch(emailId,'(RFC822)')
try:
raw_email = email_data[0][1].decode("utf-8")
email_message = email.message_from_string(raw_email)
#Fetches email address sent from
From = email_message["From"]
Email.append((From,emailId))
#print(From)
if From in EmailAmount:
EmailAmount[From] = EmailAmount[From] + 1
else:
EmailAmount[From] = 1
count += 1
if count > 10:
break
except Exception as e:
self.log.append((emailId,e))
def mainScreenInterface(self):
#Process
print("Loading program")
EmailIds = self.Mail.fetchEmailId()
EmailDict, self.EmailArray = self.Mail.fetchFrom(EmailIds)
self.master.geometry("750x600")
self.master.title("Main Screen")
self.destoryWidget()
#New Frame
self.mainScreen = tk.Frame(self.master)
self.mainScreen.pack()
#Labels
mainText = tk.Label(self.mainScreen,text = "All Emails")
mainText.config(font=("Courier","25"))
#Buttons
delete = tk.Button(self.mainScreen,text="Delete", command = self.Delete)
deleteAll = tk.Button(self.mainScreen,text="Delete All", command = self.DeleteAll)
Help = tk.Button(self.mainScreen,text="Help", command = self.Help_)
#Scrollbar
scrollbar = tk.Scrollbar(root)
scrollbar.pack(side="right",fill="y")
#Listbox
self.listbox = tk.Listbox(root,width = root.winfo_screenwidth(), height = 25)
#Attach a scrool wheel to the listbox
self.listbox.config(yscrollcommand=scrollbar.set)
scrollbar.config(command=self.listbox.yview)
#Add items to the list box
count = 1
for x,y in EmailDict.items():
self.listbox.insert(count,(x,y))
count += 1
#Placement
paddingValue = 40
mainText.pack(side="top")
self.listbox.pack(side="top")
delete.pack(side="left",padx=paddingValue)
deleteAll.pack(side="left",padx=paddingValue)
Help.pack(side="left",padx=paddingValue)
def Delete(self):
emailName = self.listbox.get(tk.ANCHOR)[0]
self.Mail.deleteEmail(emailName,self.EmailArray)
So the fetchFrom is from the mailSorter class and the other two are the GUI class, when I call the deleteEmail I get the error:
Exception in Tkinter callback
Traceback (most recent call last):
File "C:\Python\lib\tkinter\__init__.py", line 1705, in __call__
return self.func(*args)
File "C:\Users\******\Desktop\Email Sorter v3.py", line 197, in Delete
self.Mail.deleteEmail(emailName,self.EmailArray)
File "C:\Users\******\Desktop\Email Sorter v3.py", line 66, in deleteEmail
self.mail.store(Id[1].strip(), '+X-GM-tk.LabelS', '\\Trash')
File "C:\Python\lib\imaplib.py", line 840, in store
typ, dat = self._simple_command('STORE', message_set, command, flags)
File "C:\Python\lib\imaplib.py", line 1196, in _simple_command
return self._command_complete(name, self._command(name, *args))
File "C:\Python\lib\imaplib.py", line 1027, in _command_complete
raise self.error('%s command error: %s %s' % (name, typ, data))
imaplib.IMAP4.error: STORE command error: BAD [b'Could not parse command']
but when I run it as a text base with no GUI and use an example email it all works fine:
test = MailSorter("hamadnassor5#gmail.com","snerfulbubble1.")
test.login()
EmailIds = test.fetchEmailId()
EmailDict, EmailArray = test.fetchFrom(EmailIds)
test.displayEmails(EmailDict)
test.deleteEmail("Xbox <Xbox#outlook.com>",EmailArray)
test.closeCon()
DeleteMail code
def deleteEmail(self, emailName, EmailArray):
for Id in EmailArray:
if Id[0] == emailName:
print(Id[0])
print(emailName)
print(Id[1])
self.mail.store(Id[1].strip(), '+X-GM-tk.LabelS', '\\Trash')

Converting a textfile to a dictionary

My file:
Goal: to read a txt file and return dictionaries
What I have:
def load_snacks(snack_file: TextIO) -> Tuple[Dict[str, List[str]],
Dict[str, List[str]]]:
"""Return a two-item tuple containing a "healthysnack_to_junkfood" dictionary
and a "healthysnack_to_healthysnack" dictionary with the data from snack_file.
"""
snack_H2J = {}
snack_H2H = {}
line = snack_file.readline().strip()
while line != '':
# due to structure of the file, line contains a healthy snack name
healthysnack_name = line
# properly format the 1st healthy snack name, use helper fcn (see below for helper fcn)
flip_name_and_del_comma(healthysnack_name)
healthy_list = []
junk_list = []
line = snack_file.readline().strip()
while line != '\n':
if ',' in line:
snack_H2J[healthysnack_name] = line
a = flip_name_and_del_comma(line)
healthy_list.append(a)
else:
snack_H2H[healthysnack_name] = line
junk_list.append(line)
line = snack_file.readline().strip()
return (snack_H2J, snack_H2H)
Below is my helperfcn; I have verified that this works
def flip_name_and_del_comma(s: str) -> str:
""" Retrun a new str that reverses the format name order from 'colour, healthy snack name' to
'healthy snack name to colour'
>>> flip_name_and_del_comma('orange, carrot')
'carrot orange'
>>> flip_name_and_del_comma('yellow, mango')
'mango yellow'
"""
s_reversed = ', '.join(reversed(s.split(', ')))
s_comma_delete = s_reversed.replace(', ', ' ')
return s_comma_delete

PdfMiner: Erro processing the page literal required: /b'begin'

I am trying to read .pdf file using python3 with package called pdfminer which I have done successfully but for some of the page in .pdf file while reading the page using interpreter.process_page in getAllPages() of the following code I am getting an errors as follows:
error processing the page literal required: /b'begin'.
error processing the page Unknown operator: 'Qq'.
This is happening only for few docs but not able to find out what is the problem , in which case this could happen?
Code:-
class PDFDoc():
def __init__(self):
self.rsrcmgr = PDFResourceManager()
self.laparams = LAParams()
self.device = PDFPageDetailedAggregator(self.rsrcmgr, laparams=self.laparams)
self.interpreter = PDFPageInterpreter(self.rsrcmgr, self.device)
self.doc_values = []
self.total_no_of_pages = 0
self.doc_page_dict = collections.OrderedDict()
# self.doc = None
"""
Read PDF Document
"""
def readDoc(self, doc_name):
fp = open(doc_name, 'rb')
self.parser = PDFParser(fp)
self.doc = PDFDocument(self.parser)
"""
Read all pages in the document and saved in List of tuples format.
It contains the text and their coordinate info along with page number
"""
def getAllPages(self):
for page in PDFPage.create_pages(self.doc):
self.interpreter.process_page(page)
# receive the LTPage object for this page
self.device.get_result()
self.doc_values = self.device.rows
"""
Get the total number of pages
"""
def getTotalPages(self):
self.total_no_of_pages = max(self.doc_page_dict)+1
"""
Convert the document info into Page-wise dict. {Key:Value}-->{Page no:[Page text, coordinates]}
"""
def getPageDict(self):
for i in range(len(self.doc_values)):
left = self.doc_values[i][1]
bottom = self.doc_values[i][2]
content = self.doc_values[i][-1]
if self.doc_page_dict.get(self.doc_values[i][0]):
self.doc_page_dict[self.doc_values[i][0]].append({'left':left, 'bottom':bottom, 'content':content})
else:
self.doc_page_dict[self.doc_values[i][0]]=[{'left':left, 'bottom':bottom, 'content':content}]
"""
Align the page text in case they are misaligned
"""
def create_page_table_modified(self, pagedict_list):
# ##print(pagedict_list)
page_dict = collections.OrderedDict()
page_table_1 = []
page_table = []
exc_arr = []
count = 0
for line in pagedict_list:
row = []
temp_key = float(line['bottom'])
if not line in exc_arr and line["content"]:
row.append(line)
exc_arr.append(line)
for line_1 in pagedict_list:
if not line_1 in exc_arr and line_1["content"]:
# #print('last_top:', last_top, each_dict_adjusted['bottom'])
if abs(int(line["bottom"]) - int(line_1["bottom"])) <= 6:
row.append(line_1)
exc_arr.append(line_1)
if row:
page_dict[temp_key] = row
page_table.append(row)
count += 1
# ##print("\n\nPage:",page_table)
page_dict_keys = sorted(page_dict, reverse=True)
for i in page_dict_keys:
# i = sorted(i, key=lambda k: k['left'])
page_table_1.append(page_dict[i])
return page_table_1
"""
Sort the line elements based on its position coordinates
"""
def sortRowElements(self,row_list):
return sorted(row_list, key=lambda k:k['left'])
"""
Combine line elements to form the line text
"""
def combineText(self, row):
temp_ = []
# for i in range(len(row)):
text = [k['content'] for k in row]
temp_.append(' '.join(text))
return ' '.join(temp_)
"""
To call aligning and sorting functions
"""
def sortText(self):
for page in self.doc_page_dict:
self.doc_page_dict[page] = self.create_page_table_modified(self.doc_page_dict[page])
self.doc_page_dict[page] = [self.sortRowElements(line) for line in self.doc_page_dict[page]]
"""
To get text from particular page of the document --> List of line text
"""
def pageText(self, page_no):
page_text = [self.combineText(line) for line in self.doc_page_dict[page_no]]
return page_text
read_document = PDFDoc()

Resources