MemoryError using OpenPyxl and insert_rows() - python-3.x

I have a script that opens up an excel worksheet (1.353 kB), updates row based on values and inserts new rows based on an index. Unfortunately, I get a MemoryError. Besides, the script is consuming a lot of RAM. Is this based on the code or this based on the library? Thank you for your support.
The error message is as follows:
Traceback (most recent call last):
File "update_worksheet.py", line 42, in <module>
update.add_row(idx)
File "update_worksheet.py", line 30, in add_row
self.__ws.insert_rows(idx)
File ".\Python\Python38-32\lib\site-packages\openpyxl\worksheet\worksheet.py", line 713, in insert_rows
self._move_cells(min_row=idx, offset=amount, row_or_col="row")
File ".\Python\Python38-32\lib\site-packages\openpyxl\worksheet\worksheet.py", line 706, in _move_cells
self._move_cell(row, column, row_offset, col_offset)
File ".\Programs\Python\Python38-32\lib\site-packages\openpyxl\worksheet\worksheet.py", line 802, in _move_cell
self._cells[new_row, new_col] = cell
MemoryError
from openpyxl import load_workbook
import pandas as pd
import time
class UpdateWorksheet():
def __init__(self,path):
self.__wb = load_workbook(path, read_only = False)
self.__ws = self.__wb["WP_Planning"]
self.__df = pd.DataFrame(self.__ws.values)
def search_value_in_col_idx(self, search_string, col_idx=1):
try:
idx = self.__df[self.__df[col_idx]==search_string].index.item()
return idx
except ValueError:
return None
def update_status(self,row_idx, col_idx, status):
self.__ws.cell(row = row_idx, column = col_idx).value = status
def find_last_row(self, key, status):
idx = None
try:
idx = self.__df[self.__df[1]=="HMI"].index.tolist()
except ValueError:
idx = None
return idx[-1]
def add_row(self, idx):
self.__ws.insert_rows(idx)
def save_workbook(self,path):
self.__wb.save(path)
print("finish")
if __name__ == "__main__":
path = './data/TrackingSheet.xlsx'
update = UpdateWorksheet(path)
index = update.search_value_in_col_idx("PFIVEDAIEX-4244",4)
update.update_status(index,7,"Mikey")
idx = update.find_last_row("PFIVEDAIEX-4244","Fixed")
update.add_row(idx)
Edit1:
from openpyxl import load_workbook
from openpyxl import Workbook
import pandas as pd
class UpdateWorksheet():
def __init__(self,path):
self.__path = path
def load_file(self, read_only):
wb = load_workbook(self.__path, read_only)
return wb
# Search line to be updated
def search_value_in_col_idx(self, search_string, col_idx, ws):
df = pd.DataFrame(ws.values)
try:
idx = df[df[col_idx]==search_string].index.item()
return idx
except ValueError:
return None
# Update line attributes
def update_status(self,row_idx, col_idx, status, ws):
if not row_idx is None:
ws.cell(row = row_idx, column = col_idx).value = status
# Find last row for new rows
def find_last_row(self,ws):
df = pd.DataFrame(ws.values)
idx = None
try:
idx = df[df[1]=="HMI"].index.tolist()
except ValueError:
idx = None
return idx[-1]
# Add new rows
def add_row(self,idx,ws):
if idx is not None:
ws.insert_rows(idx, 5)
# Save changes made to workbook
def save_workbook(self,wb):
wb.save(self.__path)
if __name__ == "__main__":
path = './data/TrackingSheet.xlsx'
update = UpdateWorksheet(path)
wb = update.load_file(read_only=True)
idx = update.search_value_in_col_idx("PFIVEDAIEX-4244",4,wb["WP_Planning"])
last_row = update.find_last_row(wb["WP_Planning"])
wb.close()
wb = update.load_file(read_only=False)
update.update_status(idx,7,"Mikey",wb["WP_Planning"])
update.add_row(200,wb["WP_Planning"])
pdate.save_workbook(wb)
wb.close()

Related

Is there a way in an external .py to catch data populated by a kivy button (row by row) and then get len() of that dataset as well?

How do I catch tuples generated by clicks on the kivy button in file 1 and the corresponding number of rows, i.e. len(), of that number of rows in file 2? Any support out there is much, much appreciated....
View explanation below...
I created a kivy app delivering a row of tupled values every time I click my button. That works fine. Now I want to pick up, e.g. a dataset of five clicks on button, i.e. 5 rows of tuples. Below is what I did in file 1:
file 1.py:
kv = '''
<Launch>:
BoxLayout:
Button:
size:(80,80)
size_hint:(None,None)
text:"..."
on_press: root.build()
'''
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
import pandas as pd
import numpy as np
from kivy.app import App
from kivy.uix.button import Button
def test(t):
size = t
a = pd.DataFrame(columns=['col1', 'col2', 'col3'])
a['col1'] = pd.DataFrame(np.random.randint(1, 50, size))
a['col2'] = pd.DataFrame(np.random.randint(1, 50, size))
a['col3'] = pd.DataFrame(np.random.rand(size))
t = a
return t
def vars_n(self):
a = test(t=1)
# Define objects for dataframe and col inputs
self.a_num = pd.DataFrame(test(1))
self.a_limit = a[(a.col3) < 1 & (a.col3 > 0.8)]
self.a_col1 = a['col1']
self.a_col2 = a['col2']
self.a_col3 = a['col3']
cols = self.a_col1, self.a_col2, self.a_col3
lst = []
self.a_col1, self.a_col2, self.a_col3 = 'src', 'dest', 'col3'
for a in range(1):
lst.append([self.a_col1, self.a_col2, self.a_col3])
self.a_col1, self.a_col2, self.a_col3 = \
np.random.randint(1, 40, size=1), np.random.randint(1, 40, size=1), np.random.rand(1)
df = pd.DataFrame(lst, columns=cols)
tuple1 = self.a_col1
tuple2 = self.a_col2
tuple3 = self.a_col3
q = tuple(zip(tuple1, tuple2, tuple3))
return q
class MyDf(App, object):
def __init__(self):
super().__init__()
def test_def(self):
msg = test(1)
print(msg)
def test_vars_n(self):
msg = vars_n(test(t=1))
print(msg)
def length(self):
result = len(vars_n(test(t=1)))
print(result)
# Define output for activation of kivy button
def press(self, instance):
print(vars_n(test(t=1)))
# Define kivy button configuration
def build(self):
butt=Button(text="...")
butt.bind(on_press=self.press)
return butt
MyDf().run()
Result after e.g. five clicks, could generate below dataset:
((6, 22, 0.8525529856428397),)
((12, 7, 0.3912468711230911),)
((30, 14, 0.979806646854341),)
((21, 27, 0.618131650972481),)
((8, 20, 0.9164440407619223),)
So, in file 2, I'd like to pull above five lines in the dataset above and, at the same time, get the len of that dataset, i.e. 5. Tried this, but it does not seem to catch the output of file 1:
file 2.py:
import pandas as pd
import numpy as np
my_instance = MyDf()
interactions = my_instance.test_vars_n()
interactions = np.array(interactions)
print(len(interactions)) # testing result
Got this error:
Traceback (most recent call last):
File "G:\...\...\...\....\file2.py", line 38, in <module>
print(len(interactions))
TypeError: len() of unsized object
EDITING with example I was inspired by in above attempts:
File 1
import pandas as pd
import numpy as np
def test(t):
size = t
a = pd.DataFrame(columns=['col1', 'col2', 'col3'])
a['col1'] = pd.DataFrame(np.random.randint(1, 50, size))
a['col2'] = pd.DataFrame(np.random.randint(1, 50, size))
a['col3'] = pd.DataFrame(np.random.rand(size))
t = a
return t
class ClassTest(object):
def test_def(self):
msg = test(1)
print(msg)
File 2:
from Call_an_outside_function_from_class_file_1 import ClassTest
my_new_instance = ClassTest()
ClassTest().test_def()
Got this result, and without using the App.get_running_app() replacement:
col1 col2 col3
0 48 3 0.514489
Process finished with exit code 0

.AttributeError: 'DataFrame' object has no attribute 'get_data_yahoo'. I am getting this error only when I want to pass more than one symbol with loop

This is my code with no error. But when I make the change as
symbol_list = ["INFY.NS", “TATAMOTORS.NS”]
in third line from bellow then I got the error
File "e:/PYTHON STOCK/test.py", line 19, in symbol_back_test
pdr = pdr.get_data_yahoo(symbol, period="2y", interval="1d")
File "C:\Users\harek\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\core\generic.py", line 5583, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'DataFrame' object has no attribute 'get_data_yahoo'
I cant understand what is going on. Why I cant pass more than one symbol through the loop. Can anybody please help.
Thanks.
import copy
import pandas as pd
import talib
import yfinance as yf
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
from pandas_datareader import data as pdr
yf.pdr_override()
def symbol_back_test(symbol):
global pdr
pdr = pdr.get_data_yahoo(symbol, period="2y", interval="1d")
pdr["MA_10"] = talib.MA(pdr["Close"], timeperiod=10)
pdr["MA_50"] = talib.MA(pdr["Close"], timeperiod=50)
pdr["RSI_14"] = talib.RSI(pdr["Close"], timeperiod=14)
position = None
symbol_trades = []
trade = {"Symbol": None, "Buy/Sell": None, "Entry": None, "Entry Date": None, "Exit": None, "Exit Date": None}
for i in pdr.index[49:]:
if pdr["MA_10"][i] > pdr["MA_50"][i] and pdr["RSI_14"][i] > 50 and position != "Buy":
if trade["Symbol"] is not None:
trade["Exit"] = pdr["Close"][i]
trade["Exit Date"] = i
symbol_trades.append(copy.deepcopy(trade))
if position is not None:
trade["Symbol"] = symbol
trade["Buy/Sell"] = "Buy"
trade["Entry"] = pdr["Close"][i]
trade["Entry Date"] = i
position = "Buy"
if pdr["MA_10"][i] < pdr["MA_50"][i] and pdr["RSI_14"][i] < 50 and position != "Sell":
if trade["Symbol"] is not None:
trade["Exit"] = pdr["Close"][i]
trade["Exit Date"] = i
symbol_trades.append(copy.deepcopy(trade))
if position is not None:
trade["Symbol"] = symbol
trade["Buy/Sell"] = "Sell"
trade["Entry"] = pdr["Close"][i]
trade["Entry Date"] = i
print("Sell")
position = "Sell"
return symbol_trades
symbol_list = ["INFY.NS"]
for symbol in symbol_list:
print(symbol_back_test(symbol))
The problem lies in this part of the code.
def symbol_back_test(symbol):
global pdr <-- Why this is needed ?
pdr = pdr.get_data_yahoo(symbol, period="2y", interval="1d")
Firstly, you don't need the global declaration for pdr as its a module you are using. Now, the issue becomes this. You declared pdr as global. Now when the loop executes, first time all goes ok. But since you assign pdr = pdr.get_data_yahoo(.., the pdr variable now references a data frame and not the pandas_datareader. So in the 2nd loop execution, pdr.get_data_yahoo will raise an error, as DataFrame does not have a method like that.
You have to remove the global declaration and do not re-assign the module variable. For eg do like this.
from pandas_datareader import data as p_data_reader
def symbol_back_test(symbol):
pdr = p_data_reader.get_data_yahoo(symbol, period="2y", interval="1d")
...
All should be fine.

How to determine the number of columns per row variably in a CSV File with Python?

I am analyzing xml-structured Textfiles about insider dealings. I wrote some code to parse through the XML-structure and write my output in a CSV file. The results of the files are written per line and the analyzed information is written in individual columns. But in some files information is present in multiple times and my code override the information in the cells, in the end only one date is in the cell of my CSV-File.
import csv
import glob
import re
import string
import time
import bs4 as bs
# User defined directory for files to be parsed
TARGET_FILES = r'D:\files\'
# User defined file pointer to LM dictionary
# User defined output file
OUTPUT_FILE = r'D:\ouput\Parser.csv'
# Setup output
OUTPUT_FIELDS = [r'Datei', 'transactionDate', r'transactionsCode', r'Director', r'Officer', r'Titel', r'10-% Eigner', r'sonstiges', r'SignatureDate']
def main():
f_out = open(OUTPUT_FILE, 'w')
wr = csv.writer(f_out, lineterminator='\n', delimiter=';')
wr.writerow(OUTPUT_FIELDS)
file_list = glob.glob(TARGET_FILES)
for file in file_list:
print(file)
with open(file, 'r', encoding='UTF-8', errors='ignore') as f_in:
soup = bs.BeautifulSoup(f_in, 'xml')
output_data = get_data(soup)
output_data[0] = file
wr.writerow(output_data)
def get_data(soup):
# overrides the transactionDate if more than one transactions disclosed on the current form
# the number determine the column for the output
_odata = [0] * 9
try:
for item in soup.find_all('transactionDate'):
_odata[1] = item.find('value').text
except AttributeError:
_odata[1] = ('keine Angabe')
try:
for item in soup.find_all('transactionAcquiredDisposedCode'):
_odata[2] = item.find('value').text
except AttributeError:
_odata[2] = 'ka'
for item in soup.find_all('reportingOwnerRelationship'):
try:
_odata[3] = item.find('isDirector').text
except AttributeError:
_odata[3] = ('ka')
try:
_odata[4] = item.find('isOfficer').text
except AttributeError:
_odata[4] = ('ka')
try:
_odata[5] = item.find('officerTitle').text
except AttributeError:
_odata[5] = 'ka'
try:
_odata[6] = item.find('isTenPercentOwner').text
except AttributeError:
_odata[6] = ('ka')
try:
_odata[7] = item.find('isOther').text
except AttributeError:
_odata[7] = ('ka')
try:
for item in soup.find_all('ownerSignature'):
_odata[8] = item.find('signatureDate').text
except AttributeError:
_odata[8] = ('ka')
return _odata
if __name__ == '__main__':
print('\n' + time.strftime('%c') + '\nGeneric_Parser.py\n')
main()
print('\n' + time.strftime('%c') + '\nNormal termination.')
Actually the code works, but overwrites columns if, for e.g. more than one transacion date is given in the file. So I need a code that automatically uses the next column for each transaction date. How could this work?
I would be glad if someone have a solution for my problem. Thanks a lot!
Your issue is that you are iterating over the result of
soup.find_all()
and every time you are writing to the same value. You need to do something with
_odata in each iteration, otherwise you will only end up with whatever is written to it the last time.
If you can show us what the data you're trying to parse actually looks like, perhaps we could give a more specific answer.

Inserting values in a table using psycopg2

I am trying to insert data in a "Dummy" table in postgres SQL using psycopg2 and faker library. This is a table that I have created only for learning purpose. It has only one column Student_name which is of type char[]. Below is my Python script
import psycopg2
from faker import Faker
fake = Faker()
conn = psycopg2.connect(database="kreiotdb", user="****", password="*****", host="127.0.0.1", port="5432")
print("Connected Successfuly")
cur = conn.cursor()
for i in range (10):
name = fake.name()
cur.execute(""" INSERT INTO "Dummy" ("Student_name") VALUES (%s);""",[name])
It is giving me the following error when I run the script. The connection is successful
Fri Nov 02 12:16:07 gaurav ~ $ python3 /Users/gaurav/Desktop/populate.py
Connected Successfuly
Traceback (most recent call last):
File "/Users/gaurav/Desktop/populate.py", line 11, in <module>
cur.execute(""" INSERT INTO "Dummy" ("Student_name") VALUES (%s);""",[name])
psycopg2.DataError: malformed array literal: "Brent Allison"
LINE 1: INSERT INTO "Dummy" ("Student_name") VALUES ('Brent Allison...
^
DETAIL: Array value must start with "{" or dimension information.
Why is it giving me this error and should I do ?
Please help.
import os
import csv
import sys
import psycopg2
import json
import csv
#import xlsxwriter
#import configparser
import psycopg2.extras
import psycopg2.extensions
#import logging
#import logging.config
import datetime
import zipfile
from subprocess import call
def db_connect():
dbconn = None
#if conf_section in config == False:
# print("Given section -> {0} is not exists in conf file.".format(conf_section))
# return None
dbhost = ""
dbport = ""
dbname = ""
dbuser = ""
dbpass = ""
try:
dbconn = psycopg2.connect(host=dbhost, port=dbport, dbname=dbname, user=dbuser, password=dbpass)
dbconn.autocommit = True
except Exception as e:
print(e)
return None
finally:
return dbconn
def execute_query(dbconn, query):
nrows = cursor = None
colnames = result = []
try :
cursor = dbconn.cursor(cursor_factory=psycopg2.extras.DictCursor)
cursor.execute(query)
except Exception as e:
print(e)
return (0, colnames, result)
nrows = cursor.rowcount
colnames = [desc[0] for desc in cursor.description]
result = cursor.fetchall()
#cursor.close()
return (nrows)
def parse_csv(default_data):
with open('key.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
tid=row['TID']
mid=row ['MID']
key=row ['Exported Key ']
kcv=row ['KCV']
serial_no=row['HarwardSerialNo']
print ("TID="+tid+" MID="+mid+" EXPORTED KEY="+key+" KCV="+kcv)
request_data=default_data+key
request_data.replace(" ", "")
print (request_data)
cmd="/home/siva/HSM_REQ/hsm_comms.out 192.168.5.51 4000"+request_data
response_data=os.system(cmd)
print (response_data)
dbconn = db_connect()
query="select * from hsm_keys where serial_no ='"+serial_no+"'";
rows=execute_query(dbconn, query)
print (rows)
if (rows==0):
query="INSERT ";
print (query)
return()
def main():
header="0101303200"
head_len="1D"
fun_code="EE0200"
fun_mod="00"
key_len="05"
key_spc="081002"
key_index="0004"
key_type="0500"
len_of_key="10"
default_data=header+head_len+fun_code+fun_mod+key_len+key_spc+key_index+key_type+len_of_key
print (default_data)
parse_csv(default_data)
if __name__ == '__main__':
main()

error while importing images to odoo 11 using python script

I am trying to import images to odoo 11 using a python script. Earlier I used to work on a laptop with windows 7 installed and this script was working fine. But now, I have upgraded my laptop to windows 10 and I tried to run this same script but I am facing few errors.
Here is my script,
import csv
from pprint import pprint
import xmlrpc.client as xmlrpclib
class OpenERPXMLRPC():
#def __init__(self, host="0.0.0.0", port="8088", db="demo",
# user="admin",
# password="admin"):
def __init__(self, host="205.147.98.219", port="", db="BUILDSTATION_ROMFORD",
user="tina.santhosh#gmail.com",
password="buildstation1234*"):
common_url = "http://%s:%s/xmlrpc/common" % (host, port)
object_url = "http://%s:%s/xmlrpc/object" % (host, port)
com_sock = xmlrpclib.ServerProxy(common_url)
uid = com_sock.login(db, user, password)
if uid:
self.uid = uid
self.password = password
self.db = db
else:
print("Error in Authentication")
self.sock = xmlrpclib.ServerProxy(object_url)
def execute(self, model, method, *args):
res = self.sock.execute(self.db, self.uid, self.password, model,
method, *args)
return res
oe = OpenERPXMLRPC(db="BUILDSTATION_ROMFORD")
application = csv.reader(open('C:\\Users\\Asus\\Desktop\\test1.csv'))
for rec in application:
fields = rec
break
all_datas = []
count = 1
for rec in application:
all_datas.append(rec)
count = 0
all_error = []
for rec in all_datas:
count += 1
print(rec)
product_id = oe.execute(
'product.template',
'search',
[('name','=', rec[0])])
print("product_name--", product_id)
with open(rec[1], 'rb') as image:
image_base64 = image.read().encode("base64")
vals = {
'name': rec[0],
'image_medium': image_base64
}
oe.execute(
'product.template',
'write',
product_id,
vals)
I have created a separate file called test1.csv where I have uploaded only the product name and image location.
Here is the error that I am getting,
C:\Users\Asus>python c:\users\asus\desktop\final_import.py
['Airbrick Black', 'E:\\compressed images for odoo\\building materials\\airblocks\\ti0014.jpg']
product_name-- [4071]
Traceback (most recent call last):
File "c:\users\asus\desktop\final_import.py", line 55, in <module>
image_base64 = image.read().encode("base64")
AttributeError: 'bytes' object has no attribute 'encode'
any help here would be much appreciated.
Thanks,
Tina

Resources