Сompare two faces using python3 module face_recognition? - python-3.x

sorry for my bad english.
I am trying to compare two faces using python3 module 'face_recognition'
here is an example of calculating euclidean distance in python
pdist([vector1, vector2], 'euclidean')
I want to calculate euclidean distance only in SQL query, because all faces(theirs vectors) will be stored in my database, but I do not know how to do this with a SQL query.
Information:
MariaDB version: 10.5.11
Python: 3.9.2
#!/usr/bin/env python3
import cv2
import face_recognition
import mysql.connector as mysql
def get_image_hash(image):
# Open image
img = face_recognition.load_image_file(image)
# Save as black
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Get vector
vector = face_recognition.face_encodings(img)[0]
vector = (str(vector),)
return vector
# Open DB
conn = mysql.connect(
host = '127.0.0.1',
user = 'user',
passwd = 'password'
)
cur = conn.cursor()
cur.execute("SHOW DATABASES")
# Check if db 'test' already exist
db_found = False
for db in cur:
if 'test' in db:
db_found = True
if not db_found:
cur.execute("CREATE DATABASE IF NOT EXISTS test;")
conn.commit()
cur.execute("USE test;")
cur.execute("""CREATE TABLE IF NOT EXISTS faces(id_face BIGINT PRIMARY KEY NOT NULL AUTO_INCREMENT, face_hash TEXT)""")
new_image = get_image_hash('test.jpg')
# Add face(array) in DB
cur.execute('''INSERT INTO faces (face_hash) VALUES(%s)''', new_image)
conn.commit()
# Upload a picture for search
find_me_image = get_image_hash('findme.jpg')
#print('d: ', find_me_image[0])
# How should i compare these two arrays in my SQL query to find a similar face?
cur.execute("SELECT * FROM faces WHERE ..... ;")
cur.close()
print('find_me_image: ', str(find_me_image))
print('new_image: ', str(new_image))
Result:
Find_me_image: ('[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808066 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.1824664 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.00201617 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.01165112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.13573587 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.01903715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.17672779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.06180557 -0.01749492\n -0.023851 0.11586148]',)
New_image: ('[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808066 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.1824664 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.00201617 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.01165112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.13573587 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.01903715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.17672779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.06180557 -0.01749492\n -0.023851 0.11586148]',)
New:
#!/usr/bin/env python3
import cv2
import json
import face_recognition
import mysql.connector as mysql
# DB
conn = mysql.connect(
host = 'localhost',
user = '',
passwd = ''
)
def load(str_data):
str_data = str_data.replace("[", "").replace("]", "")
result = []
for i, line in enumerate(str_data.split("\n")):
result.append([])
for element in line.replace(" ", " ").split(" "):
try:
result[i].append(float(element))
except ValueError:
pass
return result
def distance(model, test):
distance = 0
for i, line in enumerate(model):
dist_line = 0
for j, element in enumerate(line):
dist_line += (element - test[i][j]) ** 2
distance += dist_line ** 0.5
return distance
def get_image_hash(image):
# Open image
img = face_recognition.load_image_file(image)
# Save as black
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Get vector
vector = face_recognition.face_encodings(img)[0]
# We can save only iterable object
vector = (str(vector),)
return vector
cur = conn.cursor(buffered=True)
cur.execute("SHOW DATABASES")
# Check if db 'test' already exist
db_found = False
for db in cur:
if 'test' in db:
db_found = True
if not db_found:
cur.execute("CREATE DATABASE IF NOT EXISTS test;")
conn.commit()
cur.execute("USE test;")
cur.execute("""CREATE TABLE IF NOT EXISTS faces(id_face BIGINT PRIMARY KEY NOT NULL AUTO_INCREMENT, face_hash TEXT)""")
# Add face in DB
new_image = get_image_hash('test.jpg')
print('new_image debug: ', new_image)
cur.execute('''INSERT INTO faces (face_hash) VALUES(%s)''', new_image)
conn.commit()
# Find added face
find_me_image = get_image_hash('findme.jpg')
print('debug find_me_image: ', find_me_image)
# Get data from DB
cur.execute("SELECT * FROM faces;")
face_data = cur.fetchall()
# Check
for x in face_data:
print('1: ', load(find_me_image[0]))
print('2: ', load(x[1]))
# x[1] == row face_hash
compare_result = distance(load(find_me_image[0]), load(x[1]))
#print('Result: ', compare_result)
# Got error
'''
Traceback (most recent call last):
File "/home/user/Desktop/parser_steam/image_recognition/test/./test.py", line 102, in <module>
compare_result = distance(load(find_me_image[0]), load(x[1]))
File "/home/user/Desktop/parser_steam/image_recognition/test/./test.py", line 35, in distance
dist_line += (element - test[i][j]) ** 2
IndexError: list index out of range
'''
cur.close()
Error:

Here is what you need!
import json
def load(str_data):
str_data = str_data.replace("[", "").replace("]", "")
result = []
for i, line in enumerate(str_data.split("\n")):
result.append([])
for element in line.replace(" ", " ").split(" "):
try:
result[i].append(float(element))
except ValueError:
pass
return result
def distance(model, test):
distance = 0
for i, line in enumerate(model):
dist_line = 0
for j, element in enumerate(line):
dist_line += (element - test[i][j]) ** 2
distance += dist_line ** 0.5
return distance
if __name__ == "__main__":
Find_me_image = '[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808066 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.1824664 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.00201617 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.01165112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.13573587 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.01903715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.17672779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.06180557 -0.01749492\n -0.023851 0.11586148]'
New_image = '[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808064 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.18246 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.0020117 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.0116112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.1357387 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.0193715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.1762779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.0618057 -0.01749492\n -0.023851 0.1158648]'
print(distance(
load(Find_me_image),
load(New_image)
))
You need first to convert your datas using load function. Then calculate distance using distance function.
As your datas are the sames, I modify New_image datas to test the function.

Related

Plotting multiple lines with a Nested Dictionary, and unknown variables to Line Graph

I was able to find somewhat of an answer to my question, but it was not as nested as my dictionary and so I am really unsure how to proceed as I am still very new to python. I currently have a nested dictionary like
{'140.10': {'46': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}, '28': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}}}:
I am wanting to plot it so that '140.10' becomes the title of the graph and '46' and '28' become the individual lines and key '1' for example is on the y axis and the x axis is the final number (in this case '-49.50918). Essentially a graph like this:
I generated this graph with a csv file that is written at another part of the code just with excel:
[![enter image description here][2]][2]
The problem I am running into is that these keys are autogenerated from a larger csv file and I will not know their exact value until the code has been run. As each of the keys are autogenerated in an earlier part of the script. As I will be running it over various files called the Graph name, and each file will have a different values for:
{key1:{key2_1: {key3_1: value1, key3_2: value2, key3_3: value3}, key_2_2 ...}}}
I have tried to do something like this:
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in time_an_dict:
atom = list(time_an_dict[s])
ion = time_an_dict[s]
for f in time_an_dict[s]:
x_val = []
y_val = []
fz = ion[f]
for i in time_an_dict[s][f]:
pos = (fz[i])
frame = i
y_val.append(frame)
x_val.append(pos)
'''ions = atom
frame = frames
position = pos
plt.plot(frame, position, label = frames)
plt.xlabel("Frame")
plt.ylabel("Position")
plt.show()
#plt.savefig('{}_Pos.png'.format(s))'''
But it has not run as intended.
I have also tried:
for filename in os.listdir(Directory):
if filename.endswith('_Atom.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
new = '{}_A_pos.csv'.format(s)
ions = list(time_an_dict.values())[0].keys()
for i in ions:
x_axis_values = []
y_axis_values = []
frame = list(time_an_dict[s][i])
x_axis_values.append(frame)
empty = []
print(x_axis_values)
for x in frame:
values = time_an_dict[s][i][x]
empty.append(values)
y_axis_values.append(empty)
plt.plot(x_axis_values, y_axis_values, label = x )
plt.show()
But keep getting the error:
Traceback (most recent call last): File "Atoms_pos.py", line 175, in
plt.plot(x_axis_values, y_axis_values, label = x ) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py",
line 2840, in plot
return gca().plot( File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_axes.py",
line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)] File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py",
line 273, in call
yield from self._plot_args(this, kwargs) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py",
line 394, in _plot_args
self.axes.xaxis.update_units(x) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axis.py",
line 1466, in update_units
default = self.converter.default_units(data, self) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 107, in default_units
axis.set_units(UnitData(data)) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 176, in init
self.update(data) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 209, in update
for val in OrderedDict.fromkeys(data): TypeError: unhashable type: 'numpy.ndarray'
Here is the remainder of the other parts of the code that generate the files and dictionaries I am using. I was told in another question I asked that this could be helpful.
# importing dependencies
import math
import sys
import pandas as pd
import MDAnalysis as mda
import os
import numpy as np
import csv
import matplotlib.pyplot as plt
################################################################################
###############################################################################
Directory = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/Blah'
os.chdir(Directory)
################################################################################
''' We are only looking at the positions of the CLAs and SODs and not the DRUDE counterparts. We are assuming the DRUDE
are very close and it is not something that needs to be concerned with'''
def Positions(dcd, topo):
fields = ['Window', 'ION', 'ResID', 'Location', 'Position', 'Frame', 'Final']
with open('{}_Atoms.csv'.format(s), 'a') as d:
writer = csv.writer(d)
writer.writerow(fields)
d.close()
CLAs = u.select_atoms('segid IONS and name CLA')
SODs = u.select_atoms('segid IONS and name SOD')
CLA_res = len(CLAs)
SOD_res = len(SODs)
frame = 0
for ts in u.trajectory[-10:]:
frame +=1
CLA_pos = CLAs.positions[:,2]
SOD_pos = SODs.positions[:,2]
for i in range(CLA_res):
ids = i + 46
if CLA_pos[i] < 0:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'CLA', ids, 'Bottom', CLA_pos[i], frame,10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
else:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'CLA', ids, 'Top', CLA_pos[i], frame, 10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
for i in range(SOD_res):
ids = i
if SOD_pos[i] < 0:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'SOD', ids, 'Bottom', SOD_pos[i], frame,10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
else:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'SOD', ids, 'Top', SOD_pos[i], frame, 10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
csv_Data = pd.read_csv('{}_Atoms.csv'.format(s))
filename = s + '_Atom.csv'
sorted_df = csv_Data.sort_values(["ION", "ResID", "Frame"],
ascending=[True, True, True])
sorted_df.to_csv(filename, index = False)
os.remove('{}_Atoms.csv'.format(s))
''' this function underneath looks at the ResIds, compares them to make sure they are the same and then counts how many
times the ion flip flops around the boundaries'''
def turn_dict(f):
read = open(f)
reader = csv.reader(read, delimiter=",", quotechar = '"')
my_dict = {}
new_list = []
for row in reader:
new_list.append(row)
for i in range(len(new_list[:])):
prev = i - 1
if new_list[i][2] == new_list[prev][2]:
if new_list[i][3] != new_list[prev][3]:
if new_list[i][2] in my_dict:
my_dict[new_list[i][2]] += 1
else:
my_dict[new_list[i][2]] = 1
return my_dict
def plot_flips(f):
dict = turn_dict(f)
ions = list(dict.keys())
occ = list(dict.values())
plt.bar(range(len(dict)), occ, tick_label = ions)
plt.title("{}".format(s))
plt.xlabel("Residue ID")
plt.ylabel("Boundary Crosses")
plt.savefig('{}_Flip.png'.format(s))
def analyze_time(f, dicts):
read = open(f)
reader = csv.reader(read, delimiter=",", quotechar='"')
new_list = []
keys = list(dicts.keys())
time_dict = {}
pos_matrix = {}
for row in reader:
new_list.append(row)
fields = ['ResID', 'Position', 'Frame']
with open('{}_A_pos.csv'.format(s), 'a') as k:
writer = csv.writer(k)
writer.writerow(fields)
k.close()
for i in range(len(new_list[:])):
if new_list[i][2] in keys:
with open('{}_A_pos.csv'.format(s), 'a') as k:
new_line = [new_list[i][2], new_list[i][4], new_list[i][5]]
writes = csv.writer(k)
writes.writerow(new_line)
k.close()
read = open('{}_A_pos.csv'.format(s))
reader = csv.reader(read, delimiter=",", quotechar='"')
time_list = []
for row in reader:
time_list.append(row)
for j in range(len(keys)):
for i in range(len(time_list[1:])):
if time_list[i][0] == keys[j]:
pos_matrix[time_list[i][2]] = time_list[i][1]
time_dict[keys[j]] = pos_matrix
return time_dict
window_dict = {}
for filename in os.listdir(Directory):
s = filename.split('.dcd')[0]
fors = s + '.txt'
topos = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/topo.psf'
if filename.endswith('.dcd'):
print('We are starting with {} \n '.format(s))
u = mda.Universe(topos, filename)
Positions(filename, topos)
name = s + '_Atom.csv'
plot_flips(name)
window_dict[s] = turn_dict(name)
continue
time_an_dict = {}
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in time_an_dict:
atom = list(time_an_dict[s])
ion = time_an_dict[s]
for f in time_an_dict[s]:
x_val = []
y_val = []
fz = ion[f]
for i in time_an_dict[s][f]:
pos = (fz[i])
frame = i
y_val.append(frame)
x_val.append(pos)
'''ions = atom
frame = frames
position = pos
plt.plot(frame, position, label = frames)
plt.xlabel("Frame")
plt.ylabel("Position")
plt.show()
#plt.savefig('{}_Pos.png'.format(s))'''
Everything here runs well except this last bottom block of code. That deals with trying to make a graph from a nested dictionary. Any help would be appreciated!
Thanks!
I figured out the answer:
for filename in os.listdir(Directory):
if filename.endswith('_Atom.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
new = '{}_A_pos.csv'.format(s)
ions = list(time_an_dict[s])
plt.yticks(np.arange(-50, 50, 5))
plt.xlabel('Frame')
plt.ylabel('Z axis position(Ang)')
plt.title([s])
for i in ions:
x_value = []
y_value = []
time_frame =len(time_an_dict[s][i]) +1
for frame in range(1,time_frame):
frame = str(frame)
x_value.append(int(frame))
y_value.append(float(time_an_dict[s][i][frame]))
plt.plot(x_value, y_value, label=[i])
plt.xticks(np.arange(1, 11, 1))
plt.legend()
plt.savefig('{}_Positions.png'.format(s))
plt.clf()
os.remove("{}_A_pos.csv".format(s))
From there, with the combo of the other parts of the code, it produces these graphs:
For more than 1 file as long as there is more '.dcd' files.

UnicodeDecodeError: 'gbk' codec can't decode byte 0xa5 in position 6: illegal multibyte sequence

Using python2.x code in Python3.8 get an error run follow code
import pandas as pd
from Crypto.Cipher import AES
from Crypto.Hash import MD5
import time
def build_codeword(ID, trapdoor):
ID_index = MD5.new()
ID_index.update(str.encode(ID))
ECB_cipher = AES.new(trapdoor, AES.MODE_ECB)
return ECB_cipher.encrypt(ID_index.digest()).encode("hex")
def search_index(document, trapdoor):
search_result = []
data_index = pd.read_csv(document)
data_index = data_index.values
# start_time = time.time()
for row in range(data_index.shape[0]):
if build_codeword(row, trapdoor) in data_index[row]:
search_result.append(row)
# print time.time() - start_time
return search_result
if __name__ == "__main__":
index_file_name = input("Please input the index file you want to search: ")
keyword_trapdoor = input("Please input the file stored the trapdoor you want to search: ")
keyword_trapdoor = open(keyword_trapdoor).read().strip()
search_result = search_index(index_file_name, keyword_trapdoor)
print("The identifiers of files that contain the keyword are: \n", search_result)
import pandas as pd
from Crypto.Cipher import AES
from Crypto.Hash import MD5
import time
def build_codeword(ID, trapdoor):
ID_index = MD5.new()
ID_index.update(str.encode(ID))
ECB_cipher = AES.new(trapdoor, AES.MODE_ECB)
return ECB_cipher.encrypt(ID_index.digest()).encode("hex")
def search_index(document, trapdoor):
search_result = []
data_index = pd.read_csv(document)
data_index = data_index.values
# start_time = time.time()
for row in range(data_index.shape[0]):
if build_codeword(row, trapdoor) in data_index[row]:
search_result.append(row)
# print time.time() - start_time
return search_result
if __name__ == "__main__":
index_file_name = input("Please input the index file you want to search: ")
keyword_trapdoor = input("Please input the file stored the trapdoor you want to search: ")
keyword_trapdoor = open(keyword_trapdoor).read().strip()
search_result = search_index(index_file_name, keyword_trapdoor)
print("The identifiers of files that contain the keyword are: \n", search_result)
Traceback (most recent call last):
File "E:/Searchable_Encryption-master/Searchable_Encryption-master/sse_search.py",
line 28, in
keyword_trapdoor = open(keyword_trapdoor).read().strip()
UnicodeDecodeError: 'gbk' codec can't decode byte 0xa5 in position 6: illegal multibyte sequence

TypeError: 'NoneType' object is not iterable (Python3 with Oracle 19c)

Python 3.6.3 /
Oracle 19c
The following script runs fine till it hits upc_id, = cur.fetchone(). Could someone explain, please what may cause it? If I run the query in database, I get the result back (see below). Is there a way to see exactly what Oracle runs, after variable substitution? I suspect single quotes are not in place for bind variables, but how can I confirm?
import datetime
import cx_Oracle
import db
line_item_sku = 'Y35FLPQ_034Y_M'
x = line_item_sku.split("_")
print (x)
print ("Split-list len: "+ str(len(x)))
if len(x) == 3:
sku_with_dimension = False
elif len(x) == 4:
sku_with_dimension = True
print ("SKU with dimension: " + str(sku_with_dimension))
style_id = x[0]
color_id = x[1]
size_id = x[2]
if sku_with_dimension:
dimension_id = x[3]
print ("Style: "+style_id)
print ("Color: "+color_id)
print ("Size: "+size_id)
conn = db.connect('Galo')
print ("Connected to: " + conn.version)
cur = conn.cursor()
upc_id = cur.var(str)
print ("Assigned return value")
if sku_with_dimension:
sql = ("""
select upc_id
from sku
where business_unit_id = '81'
and style_id = :1
and color_id = :2
and identifier_id = 'EA'
and size_id = :3
and dimension_id = :4
""")
cur.execute(sql,(style_id, color_id, size_id, dimension_id))
else:
sql = ("""
select upc_id
from sku
where business_unit_id = '81'
and style_id = :1
and color_id = :2
and identifier_id = 'EA'
and size_id = :3
""")
cur.execute(sql,(style_id, color_id, size_id))
print ("Determined which query to run")
upc_id, = cur.fetchone()
print (upc_id)
db.disconnect(conn, cur)
Here is the output
'Y35FLPQ', '034Y', 'M']
Split-list len: 3
SKU with dimension: False
Style: Y35FLPQ
Color: 034Y
Size: M
Connected to: 19.0.0.0.0
Assigned return value
Determined which query to run
Traceback (most recent call last):
File "c:/Python/code/test.py", line 66, in <module>
upc_id, = cur.fetchone()
TypeError: 'NoneType' object is not iterable
If I run the query in database, I receive a result back:

object has no attribute error with python3

I have a error when trying to call calculate_similarity2 function which in in DocSim.py file from my notebook.
The error message is : 'DocSim' object has no attribute 'calculate_similarity2'
Here the content of my docsim File :
import numpy as np
class DocSim(object):
def __init__(self, w2v_model , stopwords=[]):
self.w2v_model = w2v_model
self.stopwords = stopwords
def vectorize(self, doc):
"""Identify the vector values for each word in the given document"""
doc = doc.lower()
words = [w for w in doc.split(" ") if w not in self.stopwords]
word_vecs = []
for word in words:
try:
vec = self.w2v_model[word]
word_vecs.append(vec)
except KeyError:
# Ignore, if the word doesn't exist in the vocabulary
pass
# Assuming that document vector is the mean of all the word vectors
# PS: There are other & better ways to do it.
vector = np.mean(word_vecs, axis=0)
return vector
def _cosine_sim(self, vecA, vecB):
"""Find the cosine similarity distance between two vectors."""
csim = np.dot(vecA, vecB) / (np.linalg.norm(vecA) * np.linalg.norm(vecB))
if np.isnan(np.sum(csim)):
return 0
return csim
def calculate_similarity(self, source_doc, target_docs=[], threshold=0):
"""Calculates & returns similarity scores between given source document & all
the target documents."""
if isinstance(target_docs, str):
target_docs = [target_docs]
source_vec = self.vectorize(source_doc)
results = []
for doc in target_docs:
target_vec = self.vectorize(doc)
sim_score = self._cosine_sim(source_vec, target_vec)
if sim_score > threshold:
results.append({
'score' : sim_score,
'sentence' : doc
})
# Sort results by score in desc order
results.sort(key=lambda k : k['score'] , reverse=True)
return results
def calculate_similarity2(self, source_doc=[], target_docs=[], threshold=0):
"""Calculates & returns similarity scores between given source document & all the target documents."""
if isinstance(source_doc, str):
target_docs = [source_doc]
if isinstance(target_docs, str):
target_docs = [target_docs]
#source_vec = self.vectorize(source_doc)
results = []
for doc in source_doc:
source_vec = self.vectorize(doc)
for doc1 in target_docs:
target_vec = self.vectorize(doc)
sim_score = self._cosine_sim(source_vec, target_vec)
if sim_score > threshold:
results.append({
'score' : sim_score,
'source sentence' : doc,
'target sentence' : doc1
})
# Sort results by score in desc order
results.sort(key=lambda k : k['score'] , reverse=True)
return results
here in instruction code when i try to call the fucntion :
To create DocSim Object
ds = DocSim(word2vec_model,stopwords=stopwords)
sim_scores = ds.calculate_similarity2(source_doc, target_docs)
the error message is :
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-54-bb0bd1e0e0ad> in <module>()
----> 1 sim_scores = ds.calculate_similarity2(source_doc, target_docs)
AttributeError: 'DocSim' object has no attribute 'calculate_similarity2'
i don't undersantand how to resolve this problem.
I can access to all function except calculate_similarity2
Can you help me please?
thanks
You have defined the calculate_similarity2 function inside the __init__ scope. Try getting it out of there

Bigquery CSV file load fail

google.api_core.exceptions.BadRequest: 400 Error while reading data, error message: CSV table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details.
I am trying to run Python script that loads the data into csv but getting this error. can anyone explain me this error
import csv
#Imports the Google Cloud BigQuery client library
from google.cloud import bigquery
from google.cloud.bigquery import Dataset
from google.cloud.bigquery import Table
from google.cloud.bigquery import LoadJobConfig
from google.cloud.bigquery import SchemaField
filename = 'events.csv'
idNeeded=0
#Instantiates a client
bigquery_client = bigquery.Client()
#Runs a query from BigQuery
def runBigQueryQuery( query, filename, idNeeded ):
if idNeeded == 1:
i = 1
query_job = bigquery_client.query(query)
results = query_job.result()
with open (filename, 'w', newline='') as f: #Create CSV file
write = csv.writer(f,dialect='excel',lineterminator='\n')
try:
for row in results:
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format(row.EventId,
row.ScheduleId,
row.Date,
row.TimeFrom,
row.Description,
row.TimeTo,
row.ResourceId,
row.EmployeeId,
row.MovementTypeId,
row.Capacity,
row.CanBook,
row.NonMemberFlag,
row.MemberAmount,
row.NonMemberAmount,
row.Attendance))
write.writerow([i,row.EventId,
row.ScheduleId,
row.Date,
row.TimeFrom,
row.Description,
row.TimeTo,
row.ResourceId,
row.EmployeeId,
row.MovementTypeId,
row.Capacity,
row.CanBook,
row.NonMemberFlag,
row.MemberAmount,
row.NonMemberAmount,
row.Attendance]) #write Rows to CSV
i = i+1
except AttributeError as error:
print('An error occured: {0}'.format(error))
else:
query_job = bigquery_client.query(query)
results = query_job.result()
with open (filename, 'w', newline='') as f: #Create CSV file
write = csv.writer(f,dialect='excel',lineterminator='\n')
try:
for row in results:
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format( row.EventId,
row.ScheduleId,
row.Date,
row.TimeFrom,
row.Description,
row.TimeTo,
row.ResourceId,
row.EmployeeId,
row.MovementTypeId,
row.Capacity,
row.CanBook,
row.NonMemberFlag,
row.MemberAmount,
row.NonMemberAmount,
row.Attendance))
write.writerow([row.EventId,
row.ScheduleId,
row.Date,
row.TimeFrom,
row.Description,
row.TimeTo,
row.ResourceId,
row.EmployeeId,
row.MovementTypeId,
row.Capacity,
row.CanBook,
row.NonMemberFlag,
row.MemberAmount,
row.NonMemberAmount,
row.Attendance]) #write Rows to CSV
except AttributeError as error:
print('An error occured: {0}'.format(error))
return
#Creates a dataset in BigQuery
def createDataset(datasetname):
dataset_ref = bigquery_client.dataset(datasetname)
dataset = Dataset(dataset_ref)
dataset.location = 'US'
dataset = bigquery_client.create_dataset(dataset)
return
def getDataset(datasetname):
dataset = bigquery_client.dataset(datasetname)
return dataset
def createTable(tablename, global_dataset_ref):
schema = [
#Enter Schema here.
# SchemaField('url', 'STRING', mode='required'),
# SchemaField('views', 'INTEGER', mode='required')
]
table_ref = global_dataset_ref.table(tablename)
table = Table(table_ref, schema=schema)
table = bigquery_client.create_table(table)
assert table.table_id == tablename
return
def getTable(tablename, global_dataset_ref):
table_ref = global_dataset_ref.table(tablename)
table = bigquery_client.get_table(table_ref)
# print(table.table_id)
print(table.schema)
# print(table.description)
# print(table.num_rows)
return table
def getTableSchema(tablename, global_dataset_ref):
table_ref = global_dataset_ref.table(tablename)
table = bigquery_client.get_table(table_ref)
schema = table.schema
return schema
def loadDataFromCSV(tablename, global_dataset_ref, filename):
schema = getTableSchema(tablename, global_dataset_ref)
table_ref = global_dataset_ref.table(tablename)
load_config = LoadJobConfig()
load_config.source_format = bigquery.SourceFormat.CSV
load_config.schema = schema
load_config.autodetect = True
load_config.allow_quoted_newlines = True
with open (filename, 'rb') as readable:
job = bigquery_client.load_table_from_file(readable, table_ref, location='US', job_config=load_config)
job.result()
print('Loaded {} rows into {}:{}.'.format(job.output_rows, global_dataset_ref, table_ref.table_id))
return
# Testing
if __name__ == '__main__':
datasetname = 'Data_Layer'
tablename = 'Events'
sqlquery = '''SELECT
null as EventId,
sc.scheduleid AS ScheduleId,
NULL AS Description,
sc.scheduledatefrom AS Date,
sc.timestart AS TimeFrom,
sc.timeduration AS TimeTo,
r.resourceid AS ResourceId,
sp.employeeid AS EmployeeId,
NULL AS MovementTypeId,
r.configheight AS Capacity,
CASE
WHEN st.schedulestatus IN (1, 3) THEN '1'
ELSE '0'
END CanBook,
CASE
WHEN sv.nonmembermayenroll = TRUE THEN '1'
ELSE '0'
END NonMemberFlag,
COALESCE(ProgramPrice.pricemember,
ServicePrice.pricemember,
0) AS MemberAmount,
COALESCE(ProgramPrice.pricenonmember,
ServicePrice.pricenonmember,
0) AS NonMemberAmount,
'N/A' AS Attendance
FROM
AloomaTest.SCSESSIONS s
LEFT JOIN
AloomaTest.SCSESSION_PROVIDERS sp
ON
sp.sessionid = s.sessionid
LEFT JOIN
AloomaTest.SCSESSION_RESOURCES sr
ON
sr.sessionid = s.sessionid
LEFT JOIN
AloomaTest.SCSCHEDULES sc
ON
sc.scheduleid = s.scheduleid
LEFT JOIN
AloomaTest._SCSCHEDULESTATUS ST
ON
ST.schedulestatus = sc.schedulestatus
LEFT JOIN
AloomaTest.SCRESOURCES r
ON
r.resourceid = sr.resourceid
LEFT JOIN
AloomaTest.SCSERVICES sv
ON
sv.serviceid = sc.serviceid
LEFT JOIN
AloomaTest.SCPROGREG_SEMCOURSES semc
ON
semc.serviceid = sc.serviceid
AND semc.semesterid = sc.semesterid
LEFT JOIN
AloomaTest.SCPROGREG_PRICES ProgramPrice
ON
ProgramPrice.scheduleid = sc.scheduleid
LEFT JOIN
AloomaTest.SCPROGREG_PRICES ServicePrice
ON
ServicePrice.semcourseid = semc.semcourseid
WHERE
COALESCE(ProgramPrice.feetypeid,
0) = 0
AND COALESCE(ServicePrice.feetypeid,
0)= 0
and sc.scheduleid in(31207,
25936,
5761094,
832794,
9825,
17912)
'''
#createDataset(datasetname) #Successfully tested this code 2018-09-24
global_dataset_ref = getDataset(datasetname) #Successfully tested this code 2018-09-24
#createTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24
getTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24
runBigQueryQuery(sqlquery,filename,idNeeded) #Successfully tested this code 2018-09-24
loadDataFromCSV(tablename, global_dataset_ref,filename) #Successfully tested this code 2018-09-24
sample data
,25936,2009-06-01 18:30:00,1110,M1PO - M1 PT Full,60,,254,,,1,0,0,0,N/A
,17912,2009-04-22 06:15:00,375,Pil Ptnr - Pilates Partner,60,47,398,,10,1,1,0,0,N/A
,31207,2009-06-22 19:00:00,1140,D390-2 - 1 1/2 Hour Massage,90,107,548,,20,0,0,0,0,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,583,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,591,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,585,2349,,20,0,1,20,50,N/A
,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,584,2349,,20,0,1,20,50,N/A
,832794,2012-02-21 14:30:00,870,Comp Member One/One,60,,2963,,,1,0,0,0,N/A
The error message indicates that there is only 1 row in your CSV, you might be missing new lines while making it.

Resources