trying to search within a .thumbdata3 file for thumbnail images. This was someone else's sample code, but I am getting an error
"str does not support the buffer interface using .find"
"""extract files from Android thumbdata3 file"""
f=open('thumbdata3.dat','rb')
tdata = f.read()
f.close()
ss = '\xff\xd8'
se = '\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d03.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2
ok, turned out to be very simple.
just add b in front of the data I am trying to match
so
ss = '\xff\xd8'
se = '\xff\xd9'
becomes
ss = b'\xff\xd8'
se = b'\xff\xd9'
It's all right.
With Python 3.x like python-3.6.2
Rename .thumbdata3-1763508120 file to thumbdata3.dat
Rename .thumbdata3--1967290299 file to thumbdata4.dat
enter code here
"""extract files from Android thumbdata3 file"""
f=open('thumbdata3.dat','rb')
tdata = f.read()
f.close()
ss = b'\xff\xd8'
se = b'\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d03.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2
enter code here
"""extract files from Android thumbdata4 file"""
f=open('thumbdata4.dat','rb')
tdata = f.read()
f.close()
ss = b'\xff\xd8'
se = b'\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d04.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2
Related
I'm trying to print the variables ccb_3, nome, data, taxa and parcela using the function I defined as "ext_ccb", but when I run the code it returns 3 times (because I defined q as 3) the variable ccb_3.
I tried splitting it into 2 functions (one with the variable ccb_3 e one with the rest that uses REGEX) but it didn't worked to.
'''
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
x = 1
q = 3
def ext_ccb():
nome_ccb = str("Vazio (" + y + ").pdf")
ccb = PdfFileReader(nome_ccb)
ccb_obj_1 = ccb.getPage(0)
ccb_text_1 = ccb_obj_1.extractText()
ccb_obj_2 = ccb.getPage(1)
ccb_text_2 = ccb_obj_2.extractText()
ccb_3 = ccb_text_1[1:8]
print(ccb_3)
pattern_nome = re.compile(r'''[^\n][^CPF][A-Z](|\.)\w*\s*.*$
Nome Completo
''', re.M)
matches_nome = pattern_nome.finditer(ccb_text_1)
for match in matches_nome:
nome = str(match)
nome = nome[40:].replace(r"\n\nNome Completo\n'>", "")
print(nome)
pattern_data = re.compile(r'''5\.2\. Modalidade
\d{2}/\d{2}/\d{4}
''')
matches_data = pattern_data.findall(ccb_text_1)
for match in matches_data:
data = match[17:27]
print(data)
pattern_taxa = re.compile(r'''Taxa de Juros a\.m\. \(%\)
\d*,\d*''')
matches_taxa = pattern_taxa.findall(ccb_text_2)
for match in matches_taxa:
taxa = match[24:]
print(taxa)
pattern_vparcela = re.compile(r'''Valor das Parcelas
R\$ \d*,\d*''')
matches_vparcela = pattern_vparcela.findall(ccb_text_2)
for match in matches_vparcela:
parcela = match[23:]
print(parcela)
while x <= q:
y = str(x)
x += 1
ext_ccb()
'''
What I really need is to insert it into an csv, multiple times from different PDF's, which I already have the code for:
'''
from csv import writer
x = 5
q = 0
while q < x:
q += 1
ccb_3 += 1
nome += 2
data += 4
taxa += 4
parcela += 5
list_data = [ccb_3, nome, data, taxa, parcela]
with open('csv_teste.csv', 'a', newline = '') as f_object:
writer_object = writer(f_object)
writer_object.writerow(list_data)
f_object.close()
'''
How can I save each data from each PDF and put it into the CSV?
I am trying to copy a file and generate progress in a progress bar while it happens. I had to split the copying down to separate threads because tkinter wouldn't spawn the gui otherwise. All works fine with one thread, but as soon as a start another, the GUI doesn't update, it's just blank. Here's the code that matters:
def move_bar():
global stop
text.set(f'Moving {to_move[last_sep + 1:]}...')
while stop:
if mque.empty():
continue
else:
a = mque.get()
prog.set(a)
pgs.update()
else:
stop = 1
#copy_bar()
def copy_bar():
text.set(f'Copying {to_move[last_sep + 1:]}...')
while stop:
if cque.empty():
continue
else:
a = cque.get()
prog.set(a)
pgs.update()
else:
root.destroy()
def copyfile(src, dst):
global stop
pyway = 1024 * 1024 # Found this number in the shutil module
length = size if size < pyway else pyway
p = 0
with threading.Lock(): # Will be in it's own thread, lock till done
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
with memoryview(bytearray(length)) as mv:
while True:
# r (int) contains the size of the chunck written
r = fsrc.readinto(mv)
if not r:
break
elif r < length:
with mv[:r] as smv:
fdst.write(smv)
else:
fdst.write(mv)
cque.put(p)
p += r
stop = 0
def movefile(src, dst):
global stop
pyway = 1024 * 1024 # Found this number in the shutil module
p = 0
with threading.Lock(): # Will be in it's own thread, lock till done
length = size if size < pyway else pyway
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
with memoryview(bytearray(length)) as mv:
while True:
# r (int) contains the size of the chunck written
r = fsrc.readinto(mv)
if not r:
break
elif r < length:
with mv[:r] as smv:
fdst.write(smv)
else:
fdst.write(mv)
mque.put(p)
p += r
stop = 0
os.remove(src)
#to_move = sys.argv[1] # file dragged onto script
to_move = 'D:\\00.mkv'
size = os.stat(to_move).st_size
last_sep = to_move.rfind('\\')
# Create the GUI
root = T.Tk()
text = T.StringVar()
prog = T.IntVar()
lbl = T.Label(root, textvariable=text, font=(None, 13))
lbl.grid()
pgs = t.Progressbar(root, orient='horizontal', length=150, mode='determinate',
maximum=size, variable=prog)
pgs.grid(row=1)
s = time.time()
root.after(250, move_bar)
drives_dict = get_drive_name(get_drive_letters())
height = get_pixel_height(to_move)
# If I ever change my encodes so that DVDs height is more or less than
# 480, I'll have to change the next line:
if height == 480:
path1 = drives_dict['Back-Ups'] + 'My Movies\\DVD Rips\\'
path2 = drives_dict['Movies'] + 'DVD Rips\\'
else:
path1 = drives_dict['Back-Ups'] + 'My Movies\\Blu-ray Rips\\'
path2 = drives_dict['Movies'] + 'Blu-ray Rips\\'
move_to = path1 + to_move[last_sep + 1:]
copy_to = path2 + to_move[last_sep + 1:]
# Multiple threads because tkinter doesn't want to generate when the main
# thread is tied up:
t1 = threading.Thread(target=movefile, args=(to_move, move_to))
t1.start()
#t2 = threading.Thread(target=copyfile, args=(move_to, copy_to))
#t2.start()
root.mainloop()
Anyone have an idea why two threads mess up tkinter? Have I done something wrong?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import linalg, optimize
%matplotlib inline
Data load
data = pd.read_csv("D:/Stat/TimeSeries/KRW_month_0617_1.csv",index_col="Date") / 100
para = open("D:/Stat/TimeSeries/KRW_month_0617_1.txt").readlines()[0:2]
data.index = pd.to_datetime(data.index)
Parameters
cond = []
params = []
time = []
for i in para:
j = i.split()
for k in j:
cond.append(k)
cond = cond[1:]
for i in range(len(cond)):
cond[i] = round(float(cond[i]),4)
params = cond[0:23]
time = cond[23:]
maturity = np.array(time[1:])
timegap = 1/cond[23]
Functions We need
def Paramcheck(Params, checkStationary = 1):
result = 0
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0], [params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
for i in range(len(Sigma)):
if Sigma[i][i] < 0:
result = 1
for j in SigmaEps:
if np.any(SigmaEps) < 0:
result = 1
if Lambda < 0.05 or Lambda > 2:
result = 1
elif State[0] < 0:
result = 1
elif Kappa[0][0] < 0:
result = 1
if result == 0 and checkStationary > 0:
if max(np.linalg.eigvals(-Kappa).real) > 0:
result = 2
return result
def CheckDet(x):
if x == np.inf or x == np.nan:
result = 1
elif x < 0:
result = 2
elif abs(x) < 10**-250:
result = 3
else:
result = 0
return result
def NS_factor(lambda_val, maturity):
col1 = np.ones(len(maturity))
col2 = (1 - np.exp(-lambda_val*maturity))/(lambda_val*maturity)
col3 = col2 - np.exp(-lambda_val*maturity)
factor = np.array([col1,col2,col3]).transpose()
return factor
def DNS_Kalman_filter(Params, *args):
N = Paramcheck(Params)
if N == 0:
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0],
[params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
Obs_Yield = args[0]
Obs_Date = args[1]
Timegap = args[2]
Obs_Mty = args[3]
Finalstate = args[4]
Mty_length = len(Obs_Mty)
B = NS_factor(lambda_val = Lambda,maturity = Obs_Mty)
H_large = SigmaEps **2
N_obs = len(Obs_Date)
LLH_vec = np.zeros(N_obs)
phi1 = linalg.expm(-Kappa*Timegap)
phi0 = (np.identity(3)-phi1) # State
Eigenvalues = np.linalg.eig(Kappa)[0]
Eigen_vec = np.linalg.eig(Kappa)[1]
Eigen_vec_inv = np.linalg.inv(Eigen_vec)
S = Eigen_vec_inv # Sigma # Sigma.transpose() # Eigen_vec_inv.transpose()
Atilde = np.dot(Sigma[0], Sigma[0])
Btilde = np.dot(Sigma[1], Sigma[1])
Ctilde = np.dot(Sigma[2], Sigma[2])
Dtilde = np.dot(Sigma[0], Sigma[1])
Etilde = np.dot(Sigma[0], Sigma[2])
Ftilde = np.dot(Sigma[1], Sigma[2])
res1= Atilde* Obs_Mty* Obs_Mty/6
res2= Btilde*(1/(2*Lambda**2) - (1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + (1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
res3= Ctilde*(1/(2*Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2)-
Obs_Mty*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda) -
3*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda**2) - 2*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 5*(1-
np.exp(-2*Lambda*Obs_Mty))/(8*Lambda**3*Obs_Mty))
res4= Dtilde*(Obs_Mty/(2*Lambda) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) - (1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res5= Etilde*(3*np.exp(-Lambda*Obs_Mty)/(Lambda**2) + Obs_Mty/(2*Lambda)+Obs_Mty*np.exp(-
Lambda*Obs_Mty)/(Lambda) - 3*(1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res6= Ftilde*(1/(Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) -
np.exp(-2*Lambda*Obs_Mty)/(2*Lambda**2) - 3*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 3*(1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
val = res1 + res2 + res3 + res4 + res5 + res6
V_mat = np.zeros([3,3])
V_lim = np.zeros([3,3])
for i in range(3):
for j in range(3):
V_mat[i][j] = S[i][j]*(1-np.exp(-(Eigenvalues[i] +
Eigenvalues[j])*Timegap))/(Eigenvalues[i] + Eigenvalues[j])
V_lim[i][j] = S[i][j]/(Eigenvalues[i] + Eigenvalues[j])
Q = (Eigen_vec # V_mat # Eigen_vec.transpose()).real
Sigma_lim = (Eigen_vec # V_lim # Eigen_vec.transpose()).real
for i in range(N_obs):
y = Obs_Yield[i]
xhat = phi0 + phi1 # State
y_implied = B # xhat
v = y - y_implied + val
Sigmahat = phi1 # Sigma_lim # phi1.transpose() + Q
F = B # Sigmahat # B.transpose() + H_large
detF = np.linalg.det(F)
if CheckDet(detF) > 0:
N = 3
break
Finv = np.linalg.inv(F)
State = xhat + Sigmahat # B.transpose() # Finv # v
Sigma_lim = Sigmahat - Sigmahat # B.transpose() # Finv # B # Sigmahat
LLH_vec[i] = np.log(detF) + v.transpose() # Finv # v
if N == 0:
if Finalstate:
yDate = Obs_Date[-1]
result = np.array([yDate,State])
else:
result = 0.5 * (sum(LLH_vec) + Mty_length*N_obs*np.log(2*np.pi))
else:
result = 7000000
return result
I made a code that does Arbitrage Free Nelson-Siegel model. Data is return rates of bond (1Y,1.5Y, ... ,20Y). I wanna optimize that function with scipy optimize.minimize function with fixed *args.
Suppose that Initial parmas are verified that it's close to optimized params from empirical experiments using Dynamic Nelson-Siegel Model.
LLC_new = 0
while True:
LLC_old = LLC_new
OPT = optimize.minimize(x0=params,fun=DNS_Kalman_filter, args=
(data.values,data.index,timegap,maturity,0))
params = OPT.x
LLC_new = round(OPT.fun,5)
print("Current LLC: %0.5f" %LLC_new)
if LLC_old == LLC_new:
OPT_para = params
FinalState = DNS_Kalman_filter(params,data.values,data.index,timegap,maturity,True)
break
Result is
Current LLC: -7613.70146
Current LLC: -7613.70146
LLC(log-likelihood value) isn't maximized. It's not a result I desire using Optimizer.
Is there any solution for that?
In R, there is optim() function works as similar as scipy.optimize.minimize() which works really well. I also have a R code for that very similar to this Python code.
I'm currently doing my school assignment which needs to be submitted to Automarker to test the code.
The question I'm stuck has to pass 5 different tasks and each task contains different input which is not given.
The only input is given is the sample input which shows below.
I keep getting the result of "KeyError: del q[min_Key]" and the status show "RunTimeError".
I don't understand how does it occurs?
And how to fix this error correctly?
from math import sqrt
import math
def getP(numV, ver, e):
d = -1
q = {0:0.0}
while bool(q):
minNum = 9999.0
min_Key = len(q) - 1
for k in q.keys():
if minNum > q[k]:
minNum = q[k]
min_Key = k
if numV-1 == min_Key:
d = minNum
break
del q[min_Key]
for v in e[min_Key].keys():
if v not in q:
q[v] = minNum+e[min_Key][v]
else:
if q[v] > minNum+e[min_Key][v]:
q[v] = minNum+e[min_Key][v]
return (d)
while True:
try:
user_input = input().split(',')
size = len(user_input)
n = user_input[0]
ver = {}
numV = 0
for i in range(1, size):
if i % 2 == 1:
ver[numV] = (float(user_input[i]), float(user_input[i+1]))
numV = numV + 1
e = {}
for u in ver.keys():
e[u] = {}
for v in ver.keys():
if u != v:
dist1 = (ver[u][1] - ver[v][1])*(ver[u][1] - ver[v][1])
dist2 = (ver[u][0] - ver[v][0])*(ver[u][0] - ver[v][0])
dist = dist1 + dist2
if dist <= 10000.0:
dist = sqrt(dist)
e[u][v] = dist
d = getP(numV, ver, e)
if d == -1:
print(d)
else:
print('{0:.2f}'.format(d))
except EOFError:
break
Sample Input: 100,0,0,0,100,100,100
Sample Output: 200.00
TIA!
Hey Guys I have a problem with checkboxen in tkinter. Can someone say where my fault is ?
def edit_contact_gui(self):
"""GUI to edit the created contacts."""
self.edit_contact_wd = tk.Tk()
self.edit_contact_wd.title('Edit Contacts of the Phonebook:"%s"'\
% self.book)
self.button_edit = tk.Button(self.edit_contact_wd, text = 'Edit',\
command = self.edit_contact)
try:
with open('%s.txt' % self.book, 'rb') as file:
book = pickle.load(file)
x = 1
self.var_lst = []
for i in book:
var = tk.IntVar()
tk.Label(self.edit_contact_wd, text = i).grid(row = x, \
column = 0)
tk.Checkbutton(self.edit_contact_wd, text = 'edit', \
variable = var).grid(row = x, column = 1)
self.var_lst.append(var.get())
x += 1
self.button_edit.grid(row = x+1, column = 1)
except FileNotFoundError:
tk.Label(self.edit_contact_wd, text = 'The phonebook has no entrys!', fg = 'red').grid(row = 1, column = 0)
self.edit_contact_wd.mainloop()
def edit_contact(self):
print(self.var_lst)
My GUI output works, but the programm return me a List [0,0,0,0,0] full of zeros. In my opinion the Checkbox who is marked has return a 1 but it doesnt to it. Why? Can you help me ?
You have to keep IntVar (var) on list, not value from IntVar (var.get())
self.var_lst.append(var) # without .get()
and in edit_contact() you have to use get()
for var in self.var_lst:
print(var.get())