I have the below script.
I am a bit stuck with this specific piece:
datex = datetime.datetime.strptime(df1.start_time,'%Y-%m-%d %H:%M:%S')
I can't figure out how to extract the actual value from the start_time field & store it in the datex variable.
Can anyone help me please?
while iters <10:
time_to_add = iters * 900
time_to_checkx = time_to_check + datetime.timedelta(seconds=time_to_add)
iters = iters + 1
session = 0
for row in df1.rdd.collect():
datex = datetime.datetime.strptime(df1.start_time,'%Y-%m-%d %H:%M:%S')
print(datex)
filterx = df1.filter(datex < time_to_checkx)
session = session + filterx.count()
print('current session value' + str(session))
print(session)
Check this out. I have converted your for loop in general. If you can get me more info on iters variable or the explanation of how you want it to work:
import pyspark.sql.functions a F
spark_date_format = "YYYY-MM-dd hh:mm:ss"
session = 0
time_to_checkx = time_to_check + datetime.timedelta(seconds=time_to_add)
df1 = df1.withColumn('start_time', F.to_timestamp(F.col(date_column), spark_date_format))
filterx = df1.filter(df1.start_time < time_to_checkx)
session = session + filterx.count()
Related
I'm trying to print the variables ccb_3, nome, data, taxa and parcela using the function I defined as "ext_ccb", but when I run the code it returns 3 times (because I defined q as 3) the variable ccb_3.
I tried splitting it into 2 functions (one with the variable ccb_3 e one with the rest that uses REGEX) but it didn't worked to.
'''
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
x = 1
q = 3
def ext_ccb():
nome_ccb = str("Vazio (" + y + ").pdf")
ccb = PdfFileReader(nome_ccb)
ccb_obj_1 = ccb.getPage(0)
ccb_text_1 = ccb_obj_1.extractText()
ccb_obj_2 = ccb.getPage(1)
ccb_text_2 = ccb_obj_2.extractText()
ccb_3 = ccb_text_1[1:8]
print(ccb_3)
pattern_nome = re.compile(r'''[^\n][^CPF][A-Z](|\.)\w*\s*.*$
Nome Completo
''', re.M)
matches_nome = pattern_nome.finditer(ccb_text_1)
for match in matches_nome:
nome = str(match)
nome = nome[40:].replace(r"\n\nNome Completo\n'>", "")
print(nome)
pattern_data = re.compile(r'''5\.2\. Modalidade
\d{2}/\d{2}/\d{4}
''')
matches_data = pattern_data.findall(ccb_text_1)
for match in matches_data:
data = match[17:27]
print(data)
pattern_taxa = re.compile(r'''Taxa de Juros a\.m\. \(%\)
\d*,\d*''')
matches_taxa = pattern_taxa.findall(ccb_text_2)
for match in matches_taxa:
taxa = match[24:]
print(taxa)
pattern_vparcela = re.compile(r'''Valor das Parcelas
R\$ \d*,\d*''')
matches_vparcela = pattern_vparcela.findall(ccb_text_2)
for match in matches_vparcela:
parcela = match[23:]
print(parcela)
while x <= q:
y = str(x)
x += 1
ext_ccb()
'''
What I really need is to insert it into an csv, multiple times from different PDF's, which I already have the code for:
'''
from csv import writer
x = 5
q = 0
while q < x:
q += 1
ccb_3 += 1
nome += 2
data += 4
taxa += 4
parcela += 5
list_data = [ccb_3, nome, data, taxa, parcela]
with open('csv_teste.csv', 'a', newline = '') as f_object:
writer_object = writer(f_object)
writer_object.writerow(list_data)
f_object.close()
'''
How can I save each data from each PDF and put it into the CSV?
I'm trying to label BUY, SELL, and HOLD values to the closing stock prices based on the algorithm I found in a paper. I'm not quite able to figure out the error I'm getting. I'd very much appreciate your help. Thank you.
Algorigthm:
[EDITED]
My implementation:
window_size = 11
counter = 0
result = []
window_begin_idx=0; window_end_idx=0; window_middle_idx=0; min_idx=0; max_idx=0;
while counter < len(closing_price):
if counter > window_size:
window_begin_idx = counter - window_size
window_end_idx = window_begin_idx + window_size - 1
window_middle_idx = (window_begin_idx + window_end_idx)//2
for i in range(window_begin_idx, window_end_idx+1):
rng = closing_price[window_begin_idx:window_end_idx+1]
number = closing_price[i]
mins = rng.min()
maxs = rng.max()
if number < mins:
mins=number
min_idx = np.argmin(rng)
if number > maxs:
maxs=number
max_idx = np.argmax(rng)
if max_idx == window_middle_idx:
result.append("SELL")
elif min_idx == window_middle_idx:
result.append("BUY")
else:
result.append("HOLD")
mins = 0.0
maxs = 10000.0
counter+=1
After the edit based on the author's JAVA code, I'm only getting the HOLD label. The author's implementation is here.
You need to initialize mins, maxs, min_idx and max_idx with appropriate values before the main loop.
In your case if max_idx == occurs earlier than any max_idx assignment
Edit after questing change:
Seems in Python you can make similar behavior replacing the whole for-loop with:
rng = closing_price[window_begin_idx:window_end_idx+1]
mins = rng.min()
maxs = rng.max()
min_idx = rng.index(mins)
max_idx = rng.index(maxs)
After reading through the author's implementation and following the suggestions provided by MBo, I have managed to solve this issue. So, now anyone who wants this algorithm in python, below is the code:
window_size = 11
counter = 0
result = []
window_begin_idx=0; window_end_idx=0; window_middle_idx=0; min_idx=0; max_idx=0;
number=0.0; mins=10000.0; maxs=0.0
while counter < len(closing_price):
if counter > window_size:
window_begin_idx = counter - window_size
window_end_idx = window_begin_idx + window_size - 1
window_middle_idx = (window_begin_idx + window_end_idx)//2
for i in range(window_begin_idx, window_end_idx+1):
number = closing_price[i]
if number < mins:
mins=number
min_idx = np.where(closing_price==mins)[0][0]
if number > maxs:
maxs=number
max_idx = np.where(closing_price==maxs)[0][0]
if max_idx == window_middle_idx:
result.append("SELL")
elif min_idx == window_middle_idx:
result.append("BUY")
else:
result.append("HOLD")
mins = 10000.0
maxs = 0.0
counter+=1
def startlog():
id = enteruser.id
x = time.localtime()
sec = x.tm_sec
min = x.tm_min
hour = x.tm_hour + 1
day = x.tm_mday
date = f"{x.tm_mon}-{x.tm_mday}-{x.tm_year}"
starttime = (day * 86400) + (hour * 3600) + (min * 60) + sec
updatestart = "UPDATE log SET start = ?, date = ? WHERE ID = ?"
c.execute(updatestart, (starttime, date, id,))
conn.commit()
I have this function startlog, and a clone of it endlog.
My database log is consisted of (name, starttime, endtime, date)
Is there any way to keep track of the changes?
Desired output:
Name / Time / Date
x / time1 / date1
x / time2 / date2
I tried creating a list so everytime I'm calling out the function it will append on the list but it disappears after the session.
I used csv for my case since it's just a personal project. I used columns like ID/Time in / Time out / Total Time and used ID to determine which value to display. This is the snippet of my code (using tkinter for gui)
def csvwrite():
with open ('test.cvs', 'a', newline="") as csvfile:
writer = csv.writer(csvfile)
tup1 = (enteruser.id, log.start, log.end)
writer.writerow(tup1)
csvfile.close()
def csvread():
with open('test.cvs', 'r') as csvfile:
reader = csv.reader(csvfile)
filtered = filter(filterer, reader)
res = []
for i in filtered:
print(i)
historylbl = Label(historyWindow.historywndw, text = i)
historylbl.pack()
I am trying to run this model of seed predation and population dynamics but I am new to coding and I am only getting one predation value that gets repeated over different generations. How can I get different predation values for different year?
Also, Is there an issue with the normalizing method used?
import numpy as np
import matplotlib.pyplot as plt
def is_odd(year):
return ((year % 2) == 1)
def reproduction(p_iter, year, dead):
if is_odd(year):
predation = dead
seedsProd = p_iter*s_oddd
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) + np.array([0,0,p_iter[2]])
else:
predation = dead
seedsProd = p_iter*s_even
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) +np.array([0,p_iter[1],0])
def normalize(p_iter):
if is_odd(year):
x = np.copy(p_iter)
x[2] = 0
x = (K-p_iter[2]) * x / sum(x)
x[2] = p_iter[2]
return x
else:
x = np.copy(p_iter)
x[1] = 0
x = (K-p_iter[1]) * x / sum(x)
x[1] = p_iter[1]
return x
Predation is defined here:
def predation():
return (np.array(np.round(np.random.uniform(0.4,0.6),2)))
#max_years
Y = 100
#carrying capacity
K = 1000
#initial populaton
p_1, p_2, p_3 = 998., 1., 1.
#seed released per plant
s_1, s_2, s_3 = 200, 260, 260
p_init = np.array([p_1, p_2, p_3],dtype=float)
s_oddd = np.array([s_1, s_2, 0.0])
s_even = np.array([s_1, 0.0, s_3])
n = len(p_init)
m = np.append(p_init,s_oddd)
p_iter = p_init
dead = 0
norm = 0
for year in range(1,Y+1):
dead = predation()
seeds = reproduction(p_iter, year, dead)
p_iter = np.maximum(seeds,np.zeros(p_iter.shape))
p_iter = normalize(p_iter)
m = np.vstack((m, [*p_iter]+[*seeds] ))
import numpy as np
udacity_set = np.array(
[[1,1,1,0],
[1,0,1,0],
[0,1,0,1],
[1,0,0,1]])
label = udacity_set[:,udacity_set.shape[1]-1]
fx = label.size
positive = label[label == 1].shape[0]
positive_probability = positive/fx
negative = label[label == 0].shape[0]
negative_probability = negative/fx
entropy = -negative_probability*np.log2(negative_probability) - positive_probability*np.log2(positive_probability)
atribute = 0
V = 1
attribute_set = udacity_set[np.where(udacity_set[:,atribute] == 1)] #selecting positive instance of occurance in attribute 14
instances = attribute_set.shape[0]
negative_labels = attribute_set[np.where(attribute_set[:,attribute_set.shape[1]-1]== 0)].shape[0]
positive_labels = attribute_set[np.where(attribute_set[:,attribute_set.shape[1]-1]== 1)].shape[0]
p0 = negative_labels/instances
p1 = positive_labels/instances
entropy2 = - p0*np.log2(p0) - p1*np.log2(p1)
attribute_set2 = udacity_set[np.where(udacity_set[:,atribute] == 0)] #selecting positive instance of occurance in attribute 14
instances2 = attribute_set2.shape[0]
negative_labels2 = attribute_set[np.where(attribute_set2[:,attribute_set2.shape[1]-1]== 0)].shape[0]
positive_labels2 = attribute_set[np.where(attribute_set2[:,attribute_set2.shape[1]-1]== 1)].shape[0]
p02 = negative_labels2/instances2
p12 = positive_labels2/instances2
entropy22 = - p02*np.log2(p02) - p12*np.log2(p12)
Problem is when attribute is pure and entropy is meant to be 0. But when i put this into a formula i get NaN. I know how to code workaround, but why is this formula rigged?