str does not support the buffer interface using .find - python-3.x

trying to search within a .thumbdata3 file for thumbnail images. This was someone else's sample code, but I am getting an error
"str does not support the buffer interface using .find"
"""extract files from Android thumbdata3 file"""
f=open('thumbdata3.dat','rb')
tdata = f.read()
f.close()
ss = '\xff\xd8'
se = '\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d03.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2

ok, turned out to be very simple.
just add b in front of the data I am trying to match
so
ss = '\xff\xd8'
se = '\xff\xd9'
becomes
ss = b'\xff\xd8'
se = b'\xff\xd9'

It's all right.
With Python 3.x like python-3.6.2
Rename .thumbdata3-1763508120 file to thumbdata3.dat
Rename .thumbdata3--1967290299 file to thumbdata4.dat
enter code here
"""extract files from Android thumbdata3 file"""
f=open('thumbdata3.dat','rb')
tdata = f.read()
f.close()
ss = b'\xff\xd8'
se = b'\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d03.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2
enter code here
"""extract files from Android thumbdata4 file"""
f=open('thumbdata4.dat','rb')
tdata = f.read()
f.close()
ss = b'\xff\xd8'
se = b'\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d04.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2

Related

Print REGEX using USER DEFINED FUNCTION

I'm trying to print the variables ccb_3, nome, data, taxa and parcela using the function I defined as "ext_ccb", but when I run the code it returns 3 times (because I defined q as 3) the variable ccb_3.
I tried splitting it into 2 functions (one with the variable ccb_3 e one with the rest that uses REGEX) but it didn't worked to.
'''
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
x = 1
q = 3
def ext_ccb():
nome_ccb = str("Vazio (" + y + ").pdf")
ccb = PdfFileReader(nome_ccb)
ccb_obj_1 = ccb.getPage(0)
ccb_text_1 = ccb_obj_1.extractText()
ccb_obj_2 = ccb.getPage(1)
ccb_text_2 = ccb_obj_2.extractText()
ccb_3 = ccb_text_1[1:8]
print(ccb_3)
pattern_nome = re.compile(r'''[^\n][^CPF][A-Z](|\.)\w*\s*.*$
Nome Completo
''', re.M)
matches_nome = pattern_nome.finditer(ccb_text_1)
for match in matches_nome:
nome = str(match)
nome = nome[40:].replace(r"\n\nNome Completo\n'>", "")
print(nome)
pattern_data = re.compile(r'''5\.2\. Modalidade
\d{2}/\d{2}/\d{4}
''')
matches_data = pattern_data.findall(ccb_text_1)
for match in matches_data:
data = match[17:27]
print(data)
pattern_taxa = re.compile(r'''Taxa de Juros a\.m\. \(%\)
\d*,\d*''')
matches_taxa = pattern_taxa.findall(ccb_text_2)
for match in matches_taxa:
taxa = match[24:]
print(taxa)
pattern_vparcela = re.compile(r'''Valor das Parcelas
R\$ \d*,\d*''')
matches_vparcela = pattern_vparcela.findall(ccb_text_2)
for match in matches_vparcela:
parcela = match[23:]
print(parcela)
while x <= q:
y = str(x)
x += 1
ext_ccb()
'''
What I really need is to insert it into an csv, multiple times from different PDF's, which I already have the code for:
'''
from csv import writer
x = 5
q = 0
while q < x:
q += 1
ccb_3 += 1
nome += 2
data += 4
taxa += 4
parcela += 5
list_data = [ccb_3, nome, data, taxa, parcela]
with open('csv_teste.csv', 'a', newline = '') as f_object:
writer_object = writer(f_object)
writer_object.writerow(list_data)
f_object.close()
'''
How can I save each data from each PDF and put it into the CSV?

Multiple threads tkinter = GUI empty

I am trying to copy a file and generate progress in a progress bar while it happens. I had to split the copying down to separate threads because tkinter wouldn't spawn the gui otherwise. All works fine with one thread, but as soon as a start another, the GUI doesn't update, it's just blank. Here's the code that matters:
def move_bar():
global stop
text.set(f'Moving {to_move[last_sep + 1:]}...')
while stop:
if mque.empty():
continue
else:
a = mque.get()
prog.set(a)
pgs.update()
else:
stop = 1
#copy_bar()
def copy_bar():
text.set(f'Copying {to_move[last_sep + 1:]}...')
while stop:
if cque.empty():
continue
else:
a = cque.get()
prog.set(a)
pgs.update()
else:
root.destroy()
def copyfile(src, dst):
global stop
pyway = 1024 * 1024 # Found this number in the shutil module
length = size if size < pyway else pyway
p = 0
with threading.Lock(): # Will be in it's own thread, lock till done
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
with memoryview(bytearray(length)) as mv:
while True:
# r (int) contains the size of the chunck written
r = fsrc.readinto(mv)
if not r:
break
elif r < length:
with mv[:r] as smv:
fdst.write(smv)
else:
fdst.write(mv)
cque.put(p)
p += r
stop = 0
def movefile(src, dst):
global stop
pyway = 1024 * 1024 # Found this number in the shutil module
p = 0
with threading.Lock(): # Will be in it's own thread, lock till done
length = size if size < pyway else pyway
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
with memoryview(bytearray(length)) as mv:
while True:
# r (int) contains the size of the chunck written
r = fsrc.readinto(mv)
if not r:
break
elif r < length:
with mv[:r] as smv:
fdst.write(smv)
else:
fdst.write(mv)
mque.put(p)
p += r
stop = 0
os.remove(src)
#to_move = sys.argv[1] # file dragged onto script
to_move = 'D:\\00.mkv'
size = os.stat(to_move).st_size
last_sep = to_move.rfind('\\')
# Create the GUI
root = T.Tk()
text = T.StringVar()
prog = T.IntVar()
lbl = T.Label(root, textvariable=text, font=(None, 13))
lbl.grid()
pgs = t.Progressbar(root, orient='horizontal', length=150, mode='determinate',
maximum=size, variable=prog)
pgs.grid(row=1)
s = time.time()
root.after(250, move_bar)
drives_dict = get_drive_name(get_drive_letters())
height = get_pixel_height(to_move)
# If I ever change my encodes so that DVDs height is more or less than
# 480, I'll have to change the next line:
if height == 480:
path1 = drives_dict['Back-Ups'] + 'My Movies\\DVD Rips\\'
path2 = drives_dict['Movies'] + 'DVD Rips\\'
else:
path1 = drives_dict['Back-Ups'] + 'My Movies\\Blu-ray Rips\\'
path2 = drives_dict['Movies'] + 'Blu-ray Rips\\'
move_to = path1 + to_move[last_sep + 1:]
copy_to = path2 + to_move[last_sep + 1:]
# Multiple threads because tkinter doesn't want to generate when the main
# thread is tied up:
t1 = threading.Thread(target=movefile, args=(to_move, move_to))
t1.start()
#t2 = threading.Thread(target=copyfile, args=(move_to, copy_to))
#t2.start()
root.mainloop()
Anyone have an idea why two threads mess up tkinter? Have I done something wrong?

Scipy optimize.minimize with multi- parameters

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import linalg, optimize
%matplotlib inline
Data load
data = pd.read_csv("D:/Stat/TimeSeries/KRW_month_0617_1.csv",index_col="Date") / 100
para = open("D:/Stat/TimeSeries/KRW_month_0617_1.txt").readlines()[0:2]
data.index = pd.to_datetime(data.index)
Parameters
cond = []
params = []
time = []
for i in para:
j = i.split()
for k in j:
cond.append(k)
cond = cond[1:]
for i in range(len(cond)):
cond[i] = round(float(cond[i]),4)
params = cond[0:23]
time = cond[23:]
maturity = np.array(time[1:])
timegap = 1/cond[23]
Functions We need
def Paramcheck(Params, checkStationary = 1):
result = 0
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0], [params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
for i in range(len(Sigma)):
if Sigma[i][i] < 0:
result = 1
for j in SigmaEps:
if np.any(SigmaEps) < 0:
result = 1
if Lambda < 0.05 or Lambda > 2:
result = 1
elif State[0] < 0:
result = 1
elif Kappa[0][0] < 0:
result = 1
if result == 0 and checkStationary > 0:
if max(np.linalg.eigvals(-Kappa).real) > 0:
result = 2
return result
def CheckDet(x):
if x == np.inf or x == np.nan:
result = 1
elif x < 0:
result = 2
elif abs(x) < 10**-250:
result = 3
else:
result = 0
return result
def NS_factor(lambda_val, maturity):
col1 = np.ones(len(maturity))
col2 = (1 - np.exp(-lambda_val*maturity))/(lambda_val*maturity)
col3 = col2 - np.exp(-lambda_val*maturity)
factor = np.array([col1,col2,col3]).transpose()
return factor
def DNS_Kalman_filter(Params, *args):
N = Paramcheck(Params)
if N == 0:
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0],
[params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
Obs_Yield = args[0]
Obs_Date = args[1]
Timegap = args[2]
Obs_Mty = args[3]
Finalstate = args[4]
Mty_length = len(Obs_Mty)
B = NS_factor(lambda_val = Lambda,maturity = Obs_Mty)
H_large = SigmaEps **2
N_obs = len(Obs_Date)
LLH_vec = np.zeros(N_obs)
phi1 = linalg.expm(-Kappa*Timegap)
phi0 = (np.identity(3)-phi1) # State
Eigenvalues = np.linalg.eig(Kappa)[0]
Eigen_vec = np.linalg.eig(Kappa)[1]
Eigen_vec_inv = np.linalg.inv(Eigen_vec)
S = Eigen_vec_inv # Sigma # Sigma.transpose() # Eigen_vec_inv.transpose()
Atilde = np.dot(Sigma[0], Sigma[0])
Btilde = np.dot(Sigma[1], Sigma[1])
Ctilde = np.dot(Sigma[2], Sigma[2])
Dtilde = np.dot(Sigma[0], Sigma[1])
Etilde = np.dot(Sigma[0], Sigma[2])
Ftilde = np.dot(Sigma[1], Sigma[2])
res1= Atilde* Obs_Mty* Obs_Mty/6
res2= Btilde*(1/(2*Lambda**2) - (1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + (1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
res3= Ctilde*(1/(2*Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2)-
Obs_Mty*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda) -
3*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda**2) - 2*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 5*(1-
np.exp(-2*Lambda*Obs_Mty))/(8*Lambda**3*Obs_Mty))
res4= Dtilde*(Obs_Mty/(2*Lambda) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) - (1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res5= Etilde*(3*np.exp(-Lambda*Obs_Mty)/(Lambda**2) + Obs_Mty/(2*Lambda)+Obs_Mty*np.exp(-
Lambda*Obs_Mty)/(Lambda) - 3*(1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res6= Ftilde*(1/(Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) -
np.exp(-2*Lambda*Obs_Mty)/(2*Lambda**2) - 3*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 3*(1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
val = res1 + res2 + res3 + res4 + res5 + res6
V_mat = np.zeros([3,3])
V_lim = np.zeros([3,3])
for i in range(3):
for j in range(3):
V_mat[i][j] = S[i][j]*(1-np.exp(-(Eigenvalues[i] +
Eigenvalues[j])*Timegap))/(Eigenvalues[i] + Eigenvalues[j])
V_lim[i][j] = S[i][j]/(Eigenvalues[i] + Eigenvalues[j])
Q = (Eigen_vec # V_mat # Eigen_vec.transpose()).real
Sigma_lim = (Eigen_vec # V_lim # Eigen_vec.transpose()).real
for i in range(N_obs):
y = Obs_Yield[i]
xhat = phi0 + phi1 # State
y_implied = B # xhat
v = y - y_implied + val
Sigmahat = phi1 # Sigma_lim # phi1.transpose() + Q
F = B # Sigmahat # B.transpose() + H_large
detF = np.linalg.det(F)
if CheckDet(detF) > 0:
N = 3
break
Finv = np.linalg.inv(F)
State = xhat + Sigmahat # B.transpose() # Finv # v
Sigma_lim = Sigmahat - Sigmahat # B.transpose() # Finv # B # Sigmahat
LLH_vec[i] = np.log(detF) + v.transpose() # Finv # v
if N == 0:
if Finalstate:
yDate = Obs_Date[-1]
result = np.array([yDate,State])
else:
result = 0.5 * (sum(LLH_vec) + Mty_length*N_obs*np.log(2*np.pi))
else:
result = 7000000
return result
I made a code that does Arbitrage Free Nelson-Siegel model. Data is return rates of bond (1Y,1.5Y, ... ,20Y). I wanna optimize that function with scipy optimize.minimize function with fixed *args.
Suppose that Initial parmas are verified that it's close to optimized params from empirical experiments using Dynamic Nelson-Siegel Model.
LLC_new = 0
while True:
LLC_old = LLC_new
OPT = optimize.minimize(x0=params,fun=DNS_Kalman_filter, args=
(data.values,data.index,timegap,maturity,0))
params = OPT.x
LLC_new = round(OPT.fun,5)
print("Current LLC: %0.5f" %LLC_new)
if LLC_old == LLC_new:
OPT_para = params
FinalState = DNS_Kalman_filter(params,data.values,data.index,timegap,maturity,True)
break
Result is
Current LLC: -7613.70146
Current LLC: -7613.70146
LLC(log-likelihood value) isn't maximized. It's not a result I desire using Optimizer.
Is there any solution for that?
In R, there is optim() function works as similar as scipy.optimize.minimize() which works really well. I also have a R code for that very similar to this Python code.

How to fix with KeyError: q[minKey]? Run time error on python

I'm currently doing my school assignment which needs to be submitted to Automarker to test the code.
The question I'm stuck has to pass 5 different tasks and each task contains different input which is not given.
The only input is given is the sample input which shows below.
I keep getting the result of "KeyError: del q[min_Key]" and the status show "RunTimeError".
I don't understand how does it occurs?
And how to fix this error correctly?
from math import sqrt
import math
def getP(numV, ver, e):
d = -1
q = {0:0.0}
while bool(q):
minNum = 9999.0
min_Key = len(q) - 1
for k in q.keys():
if minNum > q[k]:
minNum = q[k]
min_Key = k
if numV-1 == min_Key:
d = minNum
break
del q[min_Key]
for v in e[min_Key].keys():
if v not in q:
q[v] = minNum+e[min_Key][v]
else:
if q[v] > minNum+e[min_Key][v]:
q[v] = minNum+e[min_Key][v]
return (d)
while True:
try:
user_input = input().split(',')
size = len(user_input)
n = user_input[0]
ver = {}
numV = 0
for i in range(1, size):
if i % 2 == 1:
ver[numV] = (float(user_input[i]), float(user_input[i+1]))
numV = numV + 1
e = {}
for u in ver.keys():
e[u] = {}
for v in ver.keys():
if u != v:
dist1 = (ver[u][1] - ver[v][1])*(ver[u][1] - ver[v][1])
dist2 = (ver[u][0] - ver[v][0])*(ver[u][0] - ver[v][0])
dist = dist1 + dist2
if dist <= 10000.0:
dist = sqrt(dist)
e[u][v] = dist
d = getP(numV, ver, e)
if d == -1:
print(d)
else:
print('{0:.2f}'.format(d))
except EOFError:
break
Sample Input: 100,0,0,0,100,100,100
Sample Output: 200.00
TIA!

Tkinter: Checkbuttons and list

Hey Guys I have a problem with checkboxen in tkinter. Can someone say where my fault is ?
def edit_contact_gui(self):
"""GUI to edit the created contacts."""
self.edit_contact_wd = tk.Tk()
self.edit_contact_wd.title('Edit Contacts of the Phonebook:"%s"'\
% self.book)
self.button_edit = tk.Button(self.edit_contact_wd, text = 'Edit',\
command = self.edit_contact)
try:
with open('%s.txt' % self.book, 'rb') as file:
book = pickle.load(file)
x = 1
self.var_lst = []
for i in book:
var = tk.IntVar()
tk.Label(self.edit_contact_wd, text = i).grid(row = x, \
column = 0)
tk.Checkbutton(self.edit_contact_wd, text = 'edit', \
variable = var).grid(row = x, column = 1)
self.var_lst.append(var.get())
x += 1
self.button_edit.grid(row = x+1, column = 1)
except FileNotFoundError:
tk.Label(self.edit_contact_wd, text = 'The phonebook has no entrys!', fg = 'red').grid(row = 1, column = 0)
self.edit_contact_wd.mainloop()
def edit_contact(self):
print(self.var_lst)
My GUI output works, but the programm return me a List [0,0,0,0,0] full of zeros. In my opinion the Checkbox who is marked has return a 1 but it doesnt to it. Why? Can you help me ?
You have to keep IntVar (var) on list, not value from IntVar (var.get())
self.var_lst.append(var) # without .get()
and in edit_contact() you have to use get()
for var in self.var_lst:
print(var.get())

Resources