I have a tensor data of size (1000,110) and I want to iterate over the first index of the tensor and calculate the following.
data = torch.randn(size=(1000,110)).to(device)
male_poor = torch.tensor(0).float().to(device)
male_rich = torch.tensor(0).float().to(device)
female_poor = torch.tensor(0).float().to(device)
female_rich = torch.tensor(0).float().to(device)
for i in data:
if torch.argmax(i[64:66]) == 0 and torch.argmax(i[108:110]) == 0:
female_poor += 1
if torch.argmax(i[64:66]) == 0 and torch.argmax(i[108:110]) == 1:
female_rich += 1
if torch.argmax(i[64:66]) == 1 and torch.argmax(i[108:110]) == 0:
male_poor += 1
if torch.argmax(i[64:66]) == 1 and torch.argmax(i[108:110]) == 1:
male_rich += 1
disparity = ((female_rich/(female_rich + female_poor))) / ((male_rich/(male_rich + male_poor)))
Is there a faster way than for loop to do this?
The key in pytorch (as well as numpy) is vectorizataion, that is if you can remove loops by operating on matrices it will be a lot faster. Loops in python are quite slow compared to the loops in the underlying compiled C code. On my machine the execution time for your code was about 0.091s, the following vectorized code was about 0.002s so about x50 faster:
import torch
torch.manual_seed(0)
device = torch.device('cpu')
data = torch.randn(size=(1000, 110)).to(device)
import time
t = time.time()
#vectorize over first dimension
argmax64_0 = torch.argmax(data[:, 64:66], dim=1) == 0
argmax64_1 = torch.argmax(data[:, 64:66], dim=1) == 1
argmax108_0 = torch.argmax(data[:, 108:110], dim=1) == 0
argmax108_1 = torch.argmax(data[:, 108:110], dim=1) == 1
female_poor = (argmax64_0 & argmax108_0).sum()
female_rich = (argmax64_0 & argmax108_1).sum()
male_poor = (argmax64_1 & argmax108_0).sum()
male_rich = (argmax64_1 & argmax108_1).sum()
disparity = ((female_rich / (female_rich + female_poor))) / ((male_rich / (male_rich + male_poor)))
print(time.time()-t)
print(disparity)
Related
I want to generate random K values based on the image. My code is working in python script but while integrating with Chaquopy. When I am debugging my code, the code is not working and not throwing errors also.
def Adaptive_K_Value(image):
print(image)
data = image / 255.0 # use 0...1 scale
print(data)
data = data.reshape(image.shape[0] * image.shape[1], 3)
print(data.shape)
data = np.array(data)
print(data)
max_Dunn = float("-inf")
min_DB = float("inf")
max_Silhoutte = float("-inf")
max_Calinski = float("-inf")
ideal_k = k = 2
while True:
print("k = ", k + 1)
kmeans = KMeans(n_clusters=k)
print("kmeans ",kmeans)
print(data.shape)
kmeans.fit(data)
print("fitted model ",kmeans)
kmeans_labels = kmeans.labels_
print(kmeans.labels_)
print(kmeans.cluster_centers_)
distances = pairwise_distances(data)
Dunn = dunn(distances, kmeans_labels)
print("Dunn Index kmeans =", Dunn)
DB = davies_bouldin_score(data, kmeans_labels)
print("DB Index kmeans =", DB)
Silhoutte = silhouette_score(data, kmeans_labels)
print('Silhouette Score kmeans=', Silhoutte)
Calinski = metrics.calinski_harabasz_score(data, kmeans_labels)
print('Calinski-Harabasz Index kmeans =', Calinski)
count = 0
if max_Dunn < Dunn:
count += 1
if min_DB > DB:
count += 1
if max_Silhoutte < Silhoutte:
count += 1
if max_Calinski < Calinski:
count += 1
if count < 2:
break
else:
ideal_k = k
k += 1
max_Dunn = Dunn
min_DB = DB
max_Silhoutte = Silhoutte
max_Calinski = Calinski
print("\nOptimum K value =", ideal_k + 1)
return ideal_k
I am trying to implement Adaptive_K_Value() method to get random K values from the image.
This line kmeans.fit(data) is not working. While loop is terminating directly. It just printing KMeans(n_clusters=3) means this line -> kmeans = KMeans(n_clusters=k) is only working.
Please help me
Logs of Android Studio
Here the k-means model is just initialized but the fitting part is not working. When I run the python script on the command line it returns the K value.
I have a sequential set of code which generates a tuple of values for different stocks, which is passed to a multiprocessing pool to apply technical indicators. Below is the sequential piece of code, which is working as expected.
child_fn_arg_tuple_list = []
for stock in m1_ts_consistent_stock_list: # prev_day_stock_list:
f_prev_ts_stock_merged_mdf_row =
m1_df_in_mdf[(m1_df_in_mdf['stock_id']==stock) &
(m1_df_in_mdf['datetimestamp'] == prev_ts)] # previous timestamp
if f_prev_ts_stock_merged_mdf_row.empty:
f_filtered_stock_list.remove(stock)
else:
f_stock_prev_ts_merged_ohlcv_df_list_of_dict =
f_prev_ts_stock_merged_mdf_row['merged_ohlcv_df'].iloc[0]
f_current_ts_stock_ohlcv_row_df =
period_ts_ohlcv_df[(period_ts_ohlcv_df['stock_id'] == stock)].copy()
if f_current_ts_stock_ohlcv_row_df.shape[0] == 1:
pass
else:
error_string = f_current_fn + 'Expected
f_current_ts_stock_ohlcv_row_df shape for stock ' + stock \
+ 'at ts ' + str(m1_time) + ' is not 1 - ' +
str(f_current_ts_stock_ohlcv_row_df.shape[0])
f_current_ts_stock_ohlcv_row_df =
period_ts_ohlcv_df[(period_ts_ohlcv_df['stock_id'] == stock) &
(period_ts_ohlcv_df['datetimestamp'] == (m1_time -
timedelta(minutes=1)))].copy()
fn_arg_tuple = (f_from_date_list,f_run_folder_name,stock,
f_period,m1_time, f_stock_prev_ts_merged_ohlcv_df_list_of_dict,
f_current_ts_stock_ohlcv_row_df,f_grouped_column_list_dict)
child_fn_arg_tuple_list.append(fn_arg_tuple)
result_list = []
pool = multiprocessing.Pool(7)
for result in pool.starmap(single_stock_apply_indicator_df_in_df_v3, child_fn_arg_tuple_list):
result_list.append(result)
pool.close()
Since the for loop runs for around 400 stocks every minute, I am trying to speed up the for loop over stocks, before passing them for applying multiprocessing using python inner function and joblib - parallel , delayed.
def create_child_fn_arg_tuple_list(cp_stock): # cp = child parameter
f_prev_ts_stock_merged_mdf_row = m1_df_in_mdf[
(m1_df_in_mdf['stock_id'] == cp_stock) &
(m1_df_in_mdf['datetimestamp'] == prev_ts)].copy()
if f_prev_ts_stock_merged_mdf_row.empty:
f_filtered_stock_list.remove(cp_stock)
else:
f_stock_prev_ts_merged_ohlcv_df_list_of_dict = \
f_prev_ts_stock_merged_mdf_row['merged_ohlcv_df'].iloc[0]
f_current_ts_stock_ohlcv_row_df = period_ts_ohlcv_df[
(period_ts_ohlcv_df['stock_id'] == cp_stock)].copy()
if f_current_ts_stock_ohlcv_row_df.shape[0] == 1:
pass
else:
error_string = f_current_fn + 'Expected f_current_ts_stock_ohlcv_row_df
shape for stock ' + \
cp_stock + 'at ts ' + str(m1_time) + ' is not 1 - ' + \
str(f_current_ts_stock_ohlcv_row_df.shape[0])
f_current_ts_stock_ohlcv_row_df =
period_ts_ohlcv_df[(period_ts_ohlcv_df['stock_id'] == cp_stock)
& (period_ts_ohlcv_df['datetimestamp'] ==
(m1_time - timedelta(minutes=1)))].copy()
fn_arg_tuple = (f_from_date_list, f_run_folder_name, cp_stock, f_period,
m1_time,f_stock_prev_ts_merged_ohlcv_df_list_of_dict,
f_current_ts_stock_ohlcv_row_df,f_grouped_column_list_dict)
child_fn_arg_tuple_list.append(fn_arg_tuple)
return child_fn_arg_tuple_list
child_fn_arg_tuple_list = Parallel(n_jobs=7, backend='multiprocessing')\
(delayed(create_child_fn_arg_tuple_list)(in_stock) for in_stock in
m1_ts_consistent_stock_list)
result_list = []
pool = multiprocessing.Pool(7)
for result in pool.starmap(single_stock_apply_indicator_df_in_df_v3, child_fn_arg_tuple_list):
result_list.append(result)
pool.close()
I am getting an error -
AttributeError: Can't pickle local object 'multiple_stock_apply_indicator_df_in_df_v6..create_child_fn_arg_tuple_list' and occurs in the line line where I am trying to apply the joblib parallel and delayed.
Please note that there are some common variables between the main function and inner function - m1_df_in_mdf, f_filtered_stock_list
1] m1_df_in_mdf is not affected as it is used only in read only mode
2] f_filtered_stock_list is affected as some stocks are removed
My objective is to get the for loop of stocks run faster, any other approaches are also welcome.
I'm trying to label BUY, SELL, and HOLD values to the closing stock prices based on the algorithm I found in a paper. I'm not quite able to figure out the error I'm getting. I'd very much appreciate your help. Thank you.
Algorigthm:
[EDITED]
My implementation:
window_size = 11
counter = 0
result = []
window_begin_idx=0; window_end_idx=0; window_middle_idx=0; min_idx=0; max_idx=0;
while counter < len(closing_price):
if counter > window_size:
window_begin_idx = counter - window_size
window_end_idx = window_begin_idx + window_size - 1
window_middle_idx = (window_begin_idx + window_end_idx)//2
for i in range(window_begin_idx, window_end_idx+1):
rng = closing_price[window_begin_idx:window_end_idx+1]
number = closing_price[i]
mins = rng.min()
maxs = rng.max()
if number < mins:
mins=number
min_idx = np.argmin(rng)
if number > maxs:
maxs=number
max_idx = np.argmax(rng)
if max_idx == window_middle_idx:
result.append("SELL")
elif min_idx == window_middle_idx:
result.append("BUY")
else:
result.append("HOLD")
mins = 0.0
maxs = 10000.0
counter+=1
After the edit based on the author's JAVA code, I'm only getting the HOLD label. The author's implementation is here.
You need to initialize mins, maxs, min_idx and max_idx with appropriate values before the main loop.
In your case if max_idx == occurs earlier than any max_idx assignment
Edit after questing change:
Seems in Python you can make similar behavior replacing the whole for-loop with:
rng = closing_price[window_begin_idx:window_end_idx+1]
mins = rng.min()
maxs = rng.max()
min_idx = rng.index(mins)
max_idx = rng.index(maxs)
After reading through the author's implementation and following the suggestions provided by MBo, I have managed to solve this issue. So, now anyone who wants this algorithm in python, below is the code:
window_size = 11
counter = 0
result = []
window_begin_idx=0; window_end_idx=0; window_middle_idx=0; min_idx=0; max_idx=0;
number=0.0; mins=10000.0; maxs=0.0
while counter < len(closing_price):
if counter > window_size:
window_begin_idx = counter - window_size
window_end_idx = window_begin_idx + window_size - 1
window_middle_idx = (window_begin_idx + window_end_idx)//2
for i in range(window_begin_idx, window_end_idx+1):
number = closing_price[i]
if number < mins:
mins=number
min_idx = np.where(closing_price==mins)[0][0]
if number > maxs:
maxs=number
max_idx = np.where(closing_price==maxs)[0][0]
if max_idx == window_middle_idx:
result.append("SELL")
elif min_idx == window_middle_idx:
result.append("BUY")
else:
result.append("HOLD")
mins = 10000.0
maxs = 0.0
counter+=1
I built a grid that generates random obstacles for pathfinding algorithm, but with fixed starting and ending points as shown in my snippet below:
import random
import numpy as np
#grid format
# 0 = navigable space
# 1 = occupied space
x = [[random.uniform(0,1) for i in range(50)]for j in range(50)]
grid = np.array([[0 for i in range(len(x[0]))]for j in range(len(x))])
for i in range(len(x)):
for j in range(len(x[0])):
if x[i][j] <= 0.7:
grid[i][j] = 0
else:
grid[i][j] = 1
init = [5,5] #Start location
goal = [45,45] #Our goal
# clear starting and end point of potential obstacles
def clear_grid(grid, x, y):
if x != 0 and y != 0:
grid[x-1:x+2,y-1:y+2]=0
elif x == 0 and y != 0:
grid[x:x+2,y-1:y+2]=0
elif x != 0 and y == 0:
grid[x-1:x+2,y:y+2]=0
elif x ==0 and y == 0:
grid[x:x+2,y:y+2]=0
clear_grid(grid, init[0], init[1])
clear_grid(grid, goal[0], goal[1])
I need to generate also the starting and ending points randomly every time I run the code instead of making them fixed. How could I make it? Any assistance, please?.
Replace,
init = [5,5] #Start location
goal = [45,45] #Our goal
with,
init = np.random.randint(0, high = 49, size = 2)
goal = np.random.randint(0, high = 49, size = 2)
Assuming your grid goes from 0-49 on each axis. Personally I would add grid size variables, i_length & j_length
EDIT #1
i_length = 50
j_length = 50
x = [[random.uniform(0,1) for i in range(i_length)]for j in range(j_length)]
grid = np.array([[0 for i in range(i_length)]for j in range(j_length)])
I made a program to implement dilation method for gray scale images without using any predefined function. Here is the code I wrote in python:
import cv2 as cv
import numpy as np
img = cv.imread('dil.png', 0)
cv.imshow('Original', img)
def find_max(k):
mx = k[0]
for i in range(1, len(k)):
if k[i] > mx:
mx = k[i]
return mx
mask=[0]*9
mask[0] = 0
mask[1] = 1
mask[2] = 0
mask[3] = 1
mask[4] = 1
mask[5] = 1
mask[6] = 0
mask[7] = 1
mask[8] = 0
for m in range(1, img.shape[0]-1):
for n in range(1, img.shape[1]-1):
mask[6] += img[m-1,n-1]
mask[7] += img[m,n-1]
mask[8] += img[m+1,n-1]
mask[3] += img[m-1,n]
mask[4] += img[m,n]
mask[5] += img[m+1,n]
mask[0] += img[m-1,n+1]
mask[1] += img[m,n+1]
mask[2] += img[m+1,n+1]
u = find_max(mask)
img[m,n]= u
cv.imshow('Dilated', img)
cv.waitKey(0)
cv.destroyAllWindows()
I used here 3X3 kernel. Please do correct if anything else is also wrong. I am a beginner and just curious to done this without using direct functions.