Сompare two faces using python3 module face_recognition? - python-3.x
sorry for my bad english.
I am trying to compare two faces using python3 module 'face_recognition'
here is an example of calculating euclidean distance in python
pdist([vector1, vector2], 'euclidean')
I want to calculate euclidean distance only in SQL query, because all faces(theirs vectors) will be stored in my database, but I do not know how to do this with a SQL query.
Information:
MariaDB version: 10.5.11
Python: 3.9.2
#!/usr/bin/env python3
import cv2
import face_recognition
import mysql.connector as mysql
def get_image_hash(image):
# Open image
img = face_recognition.load_image_file(image)
# Save as black
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Get vector
vector = face_recognition.face_encodings(img)[0]
vector = (str(vector),)
return vector
# Open DB
conn = mysql.connect(
host = '127.0.0.1',
user = 'user',
passwd = 'password'
)
cur = conn.cursor()
cur.execute("SHOW DATABASES")
# Check if db 'test' already exist
db_found = False
for db in cur:
if 'test' in db:
db_found = True
if not db_found:
cur.execute("CREATE DATABASE IF NOT EXISTS test;")
conn.commit()
cur.execute("USE test;")
cur.execute("""CREATE TABLE IF NOT EXISTS faces(id_face BIGINT PRIMARY KEY NOT NULL AUTO_INCREMENT, face_hash TEXT)""")
new_image = get_image_hash('test.jpg')
# Add face(array) in DB
cur.execute('''INSERT INTO faces (face_hash) VALUES(%s)''', new_image)
conn.commit()
# Upload a picture for search
find_me_image = get_image_hash('findme.jpg')
#print('d: ', find_me_image[0])
# How should i compare these two arrays in my SQL query to find a similar face?
cur.execute("SELECT * FROM faces WHERE ..... ;")
cur.close()
print('find_me_image: ', str(find_me_image))
print('new_image: ', str(new_image))
Result:
Find_me_image: ('[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808066 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.1824664 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.00201617 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.01165112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.13573587 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.01903715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.17672779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.06180557 -0.01749492\n -0.023851 0.11586148]',)
New_image: ('[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808066 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.1824664 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.00201617 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.01165112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.13573587 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.01903715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.17672779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.06180557 -0.01749492\n -0.023851 0.11586148]',)
New:
#!/usr/bin/env python3
import cv2
import json
import face_recognition
import mysql.connector as mysql
# DB
conn = mysql.connect(
host = 'localhost',
user = '',
passwd = ''
)
def load(str_data):
str_data = str_data.replace("[", "").replace("]", "")
result = []
for i, line in enumerate(str_data.split("\n")):
result.append([])
for element in line.replace(" ", " ").split(" "):
try:
result[i].append(float(element))
except ValueError:
pass
return result
def distance(model, test):
distance = 0
for i, line in enumerate(model):
dist_line = 0
for j, element in enumerate(line):
dist_line += (element - test[i][j]) ** 2
distance += dist_line ** 0.5
return distance
def get_image_hash(image):
# Open image
img = face_recognition.load_image_file(image)
# Save as black
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Get vector
vector = face_recognition.face_encodings(img)[0]
# We can save only iterable object
vector = (str(vector),)
return vector
cur = conn.cursor(buffered=True)
cur.execute("SHOW DATABASES")
# Check if db 'test' already exist
db_found = False
for db in cur:
if 'test' in db:
db_found = True
if not db_found:
cur.execute("CREATE DATABASE IF NOT EXISTS test;")
conn.commit()
cur.execute("USE test;")
cur.execute("""CREATE TABLE IF NOT EXISTS faces(id_face BIGINT PRIMARY KEY NOT NULL AUTO_INCREMENT, face_hash TEXT)""")
# Add face in DB
new_image = get_image_hash('test.jpg')
print('new_image debug: ', new_image)
cur.execute('''INSERT INTO faces (face_hash) VALUES(%s)''', new_image)
conn.commit()
# Find added face
find_me_image = get_image_hash('findme.jpg')
print('debug find_me_image: ', find_me_image)
# Get data from DB
cur.execute("SELECT * FROM faces;")
face_data = cur.fetchall()
# Check
for x in face_data:
print('1: ', load(find_me_image[0]))
print('2: ', load(x[1]))
# x[1] == row face_hash
compare_result = distance(load(find_me_image[0]), load(x[1]))
#print('Result: ', compare_result)
# Got error
'''
Traceback (most recent call last):
File "/home/user/Desktop/parser_steam/image_recognition/test/./test.py", line 102, in <module>
compare_result = distance(load(find_me_image[0]), load(x[1]))
File "/home/user/Desktop/parser_steam/image_recognition/test/./test.py", line 35, in distance
dist_line += (element - test[i][j]) ** 2
IndexError: list index out of range
'''
cur.close()
Error:
Here is what you need!
import json
def load(str_data):
str_data = str_data.replace("[", "").replace("]", "")
result = []
for i, line in enumerate(str_data.split("\n")):
result.append([])
for element in line.replace(" ", " ").split(" "):
try:
result[i].append(float(element))
except ValueError:
pass
return result
def distance(model, test):
distance = 0
for i, line in enumerate(model):
dist_line = 0
for j, element in enumerate(line):
dist_line += (element - test[i][j]) ** 2
distance += dist_line ** 0.5
return distance
if __name__ == "__main__":
Find_me_image = '[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808066 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.1824664 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.00201617 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.01165112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.13573587 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.01903715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.17672779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.06180557 -0.01749492\n -0.023851 0.11586148]'
New_image = '[-0.04221933 0.04460172 0.10287622 -0.14319997 -0.13808064 0.00552465\n -0.04414323 -0.07157505 0.23200855 -0.12091423 0.16892464 -0.16992114\n -0.2487883 0.09141497 -0.14198568 0.18246 -0.11484738 -0.1130986\n -0.14396232 -0.06075872 -0.0020117 0.07473749 -0.01706937 0.05610432\n -0.11021845 -0.30173326 -0.02712429 -0.10394925 -0.05155517 -0.21909578\n 0.03083897 0.16680503 -0.09715255 -0.0407755 -0.01714687 0.08432341\n -0.01913652 -0.13662203 0.21924476 0.04394831 -0.20848413 -0.03259828\n 0.04784738 0.30321479 0.22730266 -0.02372641 -0.0116112 -0.12765107\n 0.13877977 -0.3403039 0.0424962 0.10813272 0.0511388 0.12078771\n 0.04942191 -0.13038178 0.02736722 0.15339687 -0.24367541 0.10453884\n 0.13450858 -0.09997959 0.01744595 -0.10602434 0.2614505 0.10681546\n -0.12075276 -0.12065229 0.195976 -0.11606392 -0.0447496 0.08198876\n -0.1357387 -0.18409243 -0.19127932 0.01680213 0.35644779 0.16652581\n -0.12988403 -0.00341757 -0.15569599 -0.09128557 -0.03799717 0.09235845\n 0.06296059 -0.07972728 0.00744779 0.07452074 0.23394027 -0.0726112\n -0.00072305 0.2978259 -0.01452125 -0.06529554 -0.08694689 0.0193715\n -0.14941891 0.10714116 -0.1096215 0.00143995 0.00146057 0.00348109\n 0.06795555 0.10826397 -0.18627991 0.21965174 -0.04136307 -0.01491791\n 0.03774849 -0.07495191 -0.03808937 -0.02331351 0.29242265 -0.23740929\n 0.13265632 0.1274993 0.1762779 0.11845816 0.01477844 0.07670261\n 0.11437597 -0.03779818 -0.21296507 0.03480547 0.0618057 -0.01749492\n -0.023851 0.1158648]'
print(distance(
load(Find_me_image),
load(New_image)
))
You need first to convert your datas using load function. Then calculate distance using distance function.
As your datas are the sames, I modify New_image datas to test the function.
Related
Plotting multiple lines with a Nested Dictionary, and unknown variables to Line Graph
I was able to find somewhat of an answer to my question, but it was not as nested as my dictionary and so I am really unsure how to proceed as I am still very new to python. I currently have a nested dictionary like {'140.10': {'46': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}, '28': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}}}: I am wanting to plot it so that '140.10' becomes the title of the graph and '46' and '28' become the individual lines and key '1' for example is on the y axis and the x axis is the final number (in this case '-49.50918). Essentially a graph like this: I generated this graph with a csv file that is written at another part of the code just with excel: [![enter image description here][2]][2] The problem I am running into is that these keys are autogenerated from a larger csv file and I will not know their exact value until the code has been run. As each of the keys are autogenerated in an earlier part of the script. As I will be running it over various files called the Graph name, and each file will have a different values for: {key1:{key2_1: {key3_1: value1, key3_2: value2, key3_3: value3}, key_2_2 ...}}} I have tried to do something like this: for filename in os.listdir(Directory): if filename.endswith('.csv'): q = filename.split('.csv')[0] s = q.split('_')[0] if s in time_an_dict: atom = list(time_an_dict[s]) ion = time_an_dict[s] for f in time_an_dict[s]: x_val = [] y_val = [] fz = ion[f] for i in time_an_dict[s][f]: pos = (fz[i]) frame = i y_val.append(frame) x_val.append(pos) '''ions = atom frame = frames position = pos plt.plot(frame, position, label = frames) plt.xlabel("Frame") plt.ylabel("Position") plt.show() #plt.savefig('{}_Pos.png'.format(s))''' But it has not run as intended. I have also tried: for filename in os.listdir(Directory): if filename.endswith('_Atom.csv'): q = filename.split('.csv')[0] s = q.split('_')[0] if s in window_dict: name = s + '_Atom.csv' time_an_dict[s] = analyze_time(name,window_dict[s]) new = '{}_A_pos.csv'.format(s) ions = list(time_an_dict.values())[0].keys() for i in ions: x_axis_values = [] y_axis_values = [] frame = list(time_an_dict[s][i]) x_axis_values.append(frame) empty = [] print(x_axis_values) for x in frame: values = time_an_dict[s][i][x] empty.append(values) y_axis_values.append(empty) plt.plot(x_axis_values, y_axis_values, label = x ) plt.show() But keep getting the error: Traceback (most recent call last): File "Atoms_pos.py", line 175, in plt.plot(x_axis_values, y_axis_values, label = x ) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py", line 2840, in plot return gca().plot( File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_axes.py", line 1743, in plot lines = [*self._get_lines(*args, data=data, **kwargs)] File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py", line 273, in call yield from self._plot_args(this, kwargs) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py", line 394, in _plot_args self.axes.xaxis.update_units(x) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axis.py", line 1466, in update_units default = self.converter.default_units(data, self) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py", line 107, in default_units axis.set_units(UnitData(data)) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py", line 176, in init self.update(data) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py", line 209, in update for val in OrderedDict.fromkeys(data): TypeError: unhashable type: 'numpy.ndarray' Here is the remainder of the other parts of the code that generate the files and dictionaries I am using. I was told in another question I asked that this could be helpful. # importing dependencies import math import sys import pandas as pd import MDAnalysis as mda import os import numpy as np import csv import matplotlib.pyplot as plt ################################################################################ ############################################################################### Directory = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/Blah' os.chdir(Directory) ################################################################################ ''' We are only looking at the positions of the CLAs and SODs and not the DRUDE counterparts. We are assuming the DRUDE are very close and it is not something that needs to be concerned with''' def Positions(dcd, topo): fields = ['Window', 'ION', 'ResID', 'Location', 'Position', 'Frame', 'Final'] with open('{}_Atoms.csv'.format(s), 'a') as d: writer = csv.writer(d) writer.writerow(fields) d.close() CLAs = u.select_atoms('segid IONS and name CLA') SODs = u.select_atoms('segid IONS and name SOD') CLA_res = len(CLAs) SOD_res = len(SODs) frame = 0 for ts in u.trajectory[-10:]: frame +=1 CLA_pos = CLAs.positions[:,2] SOD_pos = SODs.positions[:,2] for i in range(CLA_res): ids = i + 46 if CLA_pos[i] < 0: with open('{}_Atoms.csv'.format(s), 'a') as q: new_line = [s,'CLA', ids, 'Bottom', CLA_pos[i], frame,10] writes = csv.writer(q) writes.writerow(new_line) q.close() else: with open('{}_Atoms.csv'.format(s), 'a') as q: new_line = [s,'CLA', ids, 'Top', CLA_pos[i], frame, 10] writes = csv.writer(q) writes.writerow(new_line) q.close() for i in range(SOD_res): ids = i if SOD_pos[i] < 0: with open('{}_Atoms.csv'.format(s), 'a') as q: new_line = [s,'SOD', ids, 'Bottom', SOD_pos[i], frame,10] writes = csv.writer(q) writes.writerow(new_line) q.close() else: with open('{}_Atoms.csv'.format(s), 'a') as q: new_line = [s,'SOD', ids, 'Top', SOD_pos[i], frame, 10] writes = csv.writer(q) writes.writerow(new_line) q.close() csv_Data = pd.read_csv('{}_Atoms.csv'.format(s)) filename = s + '_Atom.csv' sorted_df = csv_Data.sort_values(["ION", "ResID", "Frame"], ascending=[True, True, True]) sorted_df.to_csv(filename, index = False) os.remove('{}_Atoms.csv'.format(s)) ''' this function underneath looks at the ResIds, compares them to make sure they are the same and then counts how many times the ion flip flops around the boundaries''' def turn_dict(f): read = open(f) reader = csv.reader(read, delimiter=",", quotechar = '"') my_dict = {} new_list = [] for row in reader: new_list.append(row) for i in range(len(new_list[:])): prev = i - 1 if new_list[i][2] == new_list[prev][2]: if new_list[i][3] != new_list[prev][3]: if new_list[i][2] in my_dict: my_dict[new_list[i][2]] += 1 else: my_dict[new_list[i][2]] = 1 return my_dict def plot_flips(f): dict = turn_dict(f) ions = list(dict.keys()) occ = list(dict.values()) plt.bar(range(len(dict)), occ, tick_label = ions) plt.title("{}".format(s)) plt.xlabel("Residue ID") plt.ylabel("Boundary Crosses") plt.savefig('{}_Flip.png'.format(s)) def analyze_time(f, dicts): read = open(f) reader = csv.reader(read, delimiter=",", quotechar='"') new_list = [] keys = list(dicts.keys()) time_dict = {} pos_matrix = {} for row in reader: new_list.append(row) fields = ['ResID', 'Position', 'Frame'] with open('{}_A_pos.csv'.format(s), 'a') as k: writer = csv.writer(k) writer.writerow(fields) k.close() for i in range(len(new_list[:])): if new_list[i][2] in keys: with open('{}_A_pos.csv'.format(s), 'a') as k: new_line = [new_list[i][2], new_list[i][4], new_list[i][5]] writes = csv.writer(k) writes.writerow(new_line) k.close() read = open('{}_A_pos.csv'.format(s)) reader = csv.reader(read, delimiter=",", quotechar='"') time_list = [] for row in reader: time_list.append(row) for j in range(len(keys)): for i in range(len(time_list[1:])): if time_list[i][0] == keys[j]: pos_matrix[time_list[i][2]] = time_list[i][1] time_dict[keys[j]] = pos_matrix return time_dict window_dict = {} for filename in os.listdir(Directory): s = filename.split('.dcd')[0] fors = s + '.txt' topos = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/topo.psf' if filename.endswith('.dcd'): print('We are starting with {} \n '.format(s)) u = mda.Universe(topos, filename) Positions(filename, topos) name = s + '_Atom.csv' plot_flips(name) window_dict[s] = turn_dict(name) continue time_an_dict = {} for filename in os.listdir(Directory): if filename.endswith('.csv'): q = filename.split('.csv')[0] s = q.split('_')[0] if s in window_dict: name = s + '_Atom.csv' time_an_dict[s] = analyze_time(name,window_dict[s]) for filename in os.listdir(Directory): if filename.endswith('.csv'): q = filename.split('.csv')[0] s = q.split('_')[0] if s in time_an_dict: atom = list(time_an_dict[s]) ion = time_an_dict[s] for f in time_an_dict[s]: x_val = [] y_val = [] fz = ion[f] for i in time_an_dict[s][f]: pos = (fz[i]) frame = i y_val.append(frame) x_val.append(pos) '''ions = atom frame = frames position = pos plt.plot(frame, position, label = frames) plt.xlabel("Frame") plt.ylabel("Position") plt.show() #plt.savefig('{}_Pos.png'.format(s))''' Everything here runs well except this last bottom block of code. That deals with trying to make a graph from a nested dictionary. Any help would be appreciated! Thanks!
I figured out the answer: for filename in os.listdir(Directory): if filename.endswith('_Atom.csv'): q = filename.split('.csv')[0] s = q.split('_')[0] if s in window_dict: name = s + '_Atom.csv' time_an_dict[s] = analyze_time(name,window_dict[s]) new = '{}_A_pos.csv'.format(s) ions = list(time_an_dict[s]) plt.yticks(np.arange(-50, 50, 5)) plt.xlabel('Frame') plt.ylabel('Z axis position(Ang)') plt.title([s]) for i in ions: x_value = [] y_value = [] time_frame =len(time_an_dict[s][i]) +1 for frame in range(1,time_frame): frame = str(frame) x_value.append(int(frame)) y_value.append(float(time_an_dict[s][i][frame])) plt.plot(x_value, y_value, label=[i]) plt.xticks(np.arange(1, 11, 1)) plt.legend() plt.savefig('{}_Positions.png'.format(s)) plt.clf() os.remove("{}_A_pos.csv".format(s)) From there, with the combo of the other parts of the code, it produces these graphs: For more than 1 file as long as there is more '.dcd' files.
UnicodeDecodeError: 'gbk' codec can't decode byte 0xa5 in position 6: illegal multibyte sequence
Using python2.x code in Python3.8 get an error run follow code import pandas as pd from Crypto.Cipher import AES from Crypto.Hash import MD5 import time def build_codeword(ID, trapdoor): ID_index = MD5.new() ID_index.update(str.encode(ID)) ECB_cipher = AES.new(trapdoor, AES.MODE_ECB) return ECB_cipher.encrypt(ID_index.digest()).encode("hex") def search_index(document, trapdoor): search_result = [] data_index = pd.read_csv(document) data_index = data_index.values # start_time = time.time() for row in range(data_index.shape[0]): if build_codeword(row, trapdoor) in data_index[row]: search_result.append(row) # print time.time() - start_time return search_result if __name__ == "__main__": index_file_name = input("Please input the index file you want to search: ") keyword_trapdoor = input("Please input the file stored the trapdoor you want to search: ") keyword_trapdoor = open(keyword_trapdoor).read().strip() search_result = search_index(index_file_name, keyword_trapdoor) print("The identifiers of files that contain the keyword are: \n", search_result) import pandas as pd from Crypto.Cipher import AES from Crypto.Hash import MD5 import time def build_codeword(ID, trapdoor): ID_index = MD5.new() ID_index.update(str.encode(ID)) ECB_cipher = AES.new(trapdoor, AES.MODE_ECB) return ECB_cipher.encrypt(ID_index.digest()).encode("hex") def search_index(document, trapdoor): search_result = [] data_index = pd.read_csv(document) data_index = data_index.values # start_time = time.time() for row in range(data_index.shape[0]): if build_codeword(row, trapdoor) in data_index[row]: search_result.append(row) # print time.time() - start_time return search_result if __name__ == "__main__": index_file_name = input("Please input the index file you want to search: ") keyword_trapdoor = input("Please input the file stored the trapdoor you want to search: ") keyword_trapdoor = open(keyword_trapdoor).read().strip() search_result = search_index(index_file_name, keyword_trapdoor) print("The identifiers of files that contain the keyword are: \n", search_result) Traceback (most recent call last): File "E:/Searchable_Encryption-master/Searchable_Encryption-master/sse_search.py", line 28, in keyword_trapdoor = open(keyword_trapdoor).read().strip() UnicodeDecodeError: 'gbk' codec can't decode byte 0xa5 in position 6: illegal multibyte sequence
TypeError: 'NoneType' object is not iterable (Python3 with Oracle 19c)
Python 3.6.3 / Oracle 19c The following script runs fine till it hits upc_id, = cur.fetchone(). Could someone explain, please what may cause it? If I run the query in database, I get the result back (see below). Is there a way to see exactly what Oracle runs, after variable substitution? I suspect single quotes are not in place for bind variables, but how can I confirm? import datetime import cx_Oracle import db line_item_sku = 'Y35FLPQ_034Y_M' x = line_item_sku.split("_") print (x) print ("Split-list len: "+ str(len(x))) if len(x) == 3: sku_with_dimension = False elif len(x) == 4: sku_with_dimension = True print ("SKU with dimension: " + str(sku_with_dimension)) style_id = x[0] color_id = x[1] size_id = x[2] if sku_with_dimension: dimension_id = x[3] print ("Style: "+style_id) print ("Color: "+color_id) print ("Size: "+size_id) conn = db.connect('Galo') print ("Connected to: " + conn.version) cur = conn.cursor() upc_id = cur.var(str) print ("Assigned return value") if sku_with_dimension: sql = (""" select upc_id from sku where business_unit_id = '81' and style_id = :1 and color_id = :2 and identifier_id = 'EA' and size_id = :3 and dimension_id = :4 """) cur.execute(sql,(style_id, color_id, size_id, dimension_id)) else: sql = (""" select upc_id from sku where business_unit_id = '81' and style_id = :1 and color_id = :2 and identifier_id = 'EA' and size_id = :3 """) cur.execute(sql,(style_id, color_id, size_id)) print ("Determined which query to run") upc_id, = cur.fetchone() print (upc_id) db.disconnect(conn, cur) Here is the output 'Y35FLPQ', '034Y', 'M'] Split-list len: 3 SKU with dimension: False Style: Y35FLPQ Color: 034Y Size: M Connected to: 19.0.0.0.0 Assigned return value Determined which query to run Traceback (most recent call last): File "c:/Python/code/test.py", line 66, in <module> upc_id, = cur.fetchone() TypeError: 'NoneType' object is not iterable If I run the query in database, I receive a result back:
object has no attribute error with python3
I have a error when trying to call calculate_similarity2 function which in in DocSim.py file from my notebook. The error message is : 'DocSim' object has no attribute 'calculate_similarity2' Here the content of my docsim File : import numpy as np class DocSim(object): def __init__(self, w2v_model , stopwords=[]): self.w2v_model = w2v_model self.stopwords = stopwords def vectorize(self, doc): """Identify the vector values for each word in the given document""" doc = doc.lower() words = [w for w in doc.split(" ") if w not in self.stopwords] word_vecs = [] for word in words: try: vec = self.w2v_model[word] word_vecs.append(vec) except KeyError: # Ignore, if the word doesn't exist in the vocabulary pass # Assuming that document vector is the mean of all the word vectors # PS: There are other & better ways to do it. vector = np.mean(word_vecs, axis=0) return vector def _cosine_sim(self, vecA, vecB): """Find the cosine similarity distance between two vectors.""" csim = np.dot(vecA, vecB) / (np.linalg.norm(vecA) * np.linalg.norm(vecB)) if np.isnan(np.sum(csim)): return 0 return csim def calculate_similarity(self, source_doc, target_docs=[], threshold=0): """Calculates & returns similarity scores between given source document & all the target documents.""" if isinstance(target_docs, str): target_docs = [target_docs] source_vec = self.vectorize(source_doc) results = [] for doc in target_docs: target_vec = self.vectorize(doc) sim_score = self._cosine_sim(source_vec, target_vec) if sim_score > threshold: results.append({ 'score' : sim_score, 'sentence' : doc }) # Sort results by score in desc order results.sort(key=lambda k : k['score'] , reverse=True) return results def calculate_similarity2(self, source_doc=[], target_docs=[], threshold=0): """Calculates & returns similarity scores between given source document & all the target documents.""" if isinstance(source_doc, str): target_docs = [source_doc] if isinstance(target_docs, str): target_docs = [target_docs] #source_vec = self.vectorize(source_doc) results = [] for doc in source_doc: source_vec = self.vectorize(doc) for doc1 in target_docs: target_vec = self.vectorize(doc) sim_score = self._cosine_sim(source_vec, target_vec) if sim_score > threshold: results.append({ 'score' : sim_score, 'source sentence' : doc, 'target sentence' : doc1 }) # Sort results by score in desc order results.sort(key=lambda k : k['score'] , reverse=True) return results here in instruction code when i try to call the fucntion : To create DocSim Object ds = DocSim(word2vec_model,stopwords=stopwords) sim_scores = ds.calculate_similarity2(source_doc, target_docs) the error message is : --------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-54-bb0bd1e0e0ad> in <module>() ----> 1 sim_scores = ds.calculate_similarity2(source_doc, target_docs) AttributeError: 'DocSim' object has no attribute 'calculate_similarity2' i don't undersantand how to resolve this problem. I can access to all function except calculate_similarity2 Can you help me please? thanks
You have defined the calculate_similarity2 function inside the __init__ scope. Try getting it out of there
Bigquery CSV file load fail
google.api_core.exceptions.BadRequest: 400 Error while reading data, error message: CSV table encountered too many errors, giving up. Rows: 1; errors: 1. Please look into the error stream for more details. I am trying to run Python script that loads the data into csv but getting this error. can anyone explain me this error import csv #Imports the Google Cloud BigQuery client library from google.cloud import bigquery from google.cloud.bigquery import Dataset from google.cloud.bigquery import Table from google.cloud.bigquery import LoadJobConfig from google.cloud.bigquery import SchemaField filename = 'events.csv' idNeeded=0 #Instantiates a client bigquery_client = bigquery.Client() #Runs a query from BigQuery def runBigQueryQuery( query, filename, idNeeded ): if idNeeded == 1: i = 1 query_job = bigquery_client.query(query) results = query_job.result() with open (filename, 'w', newline='') as f: #Create CSV file write = csv.writer(f,dialect='excel',lineterminator='\n') try: for row in results: print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format(row.EventId, row.ScheduleId, row.Date, row.TimeFrom, row.Description, row.TimeTo, row.ResourceId, row.EmployeeId, row.MovementTypeId, row.Capacity, row.CanBook, row.NonMemberFlag, row.MemberAmount, row.NonMemberAmount, row.Attendance)) write.writerow([i,row.EventId, row.ScheduleId, row.Date, row.TimeFrom, row.Description, row.TimeTo, row.ResourceId, row.EmployeeId, row.MovementTypeId, row.Capacity, row.CanBook, row.NonMemberFlag, row.MemberAmount, row.NonMemberAmount, row.Attendance]) #write Rows to CSV i = i+1 except AttributeError as error: print('An error occured: {0}'.format(error)) else: query_job = bigquery_client.query(query) results = query_job.result() with open (filename, 'w', newline='') as f: #Create CSV file write = csv.writer(f,dialect='excel',lineterminator='\n') try: for row in results: print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{} '.format( row.EventId, row.ScheduleId, row.Date, row.TimeFrom, row.Description, row.TimeTo, row.ResourceId, row.EmployeeId, row.MovementTypeId, row.Capacity, row.CanBook, row.NonMemberFlag, row.MemberAmount, row.NonMemberAmount, row.Attendance)) write.writerow([row.EventId, row.ScheduleId, row.Date, row.TimeFrom, row.Description, row.TimeTo, row.ResourceId, row.EmployeeId, row.MovementTypeId, row.Capacity, row.CanBook, row.NonMemberFlag, row.MemberAmount, row.NonMemberAmount, row.Attendance]) #write Rows to CSV except AttributeError as error: print('An error occured: {0}'.format(error)) return #Creates a dataset in BigQuery def createDataset(datasetname): dataset_ref = bigquery_client.dataset(datasetname) dataset = Dataset(dataset_ref) dataset.location = 'US' dataset = bigquery_client.create_dataset(dataset) return def getDataset(datasetname): dataset = bigquery_client.dataset(datasetname) return dataset def createTable(tablename, global_dataset_ref): schema = [ #Enter Schema here. # SchemaField('url', 'STRING', mode='required'), # SchemaField('views', 'INTEGER', mode='required') ] table_ref = global_dataset_ref.table(tablename) table = Table(table_ref, schema=schema) table = bigquery_client.create_table(table) assert table.table_id == tablename return def getTable(tablename, global_dataset_ref): table_ref = global_dataset_ref.table(tablename) table = bigquery_client.get_table(table_ref) # print(table.table_id) print(table.schema) # print(table.description) # print(table.num_rows) return table def getTableSchema(tablename, global_dataset_ref): table_ref = global_dataset_ref.table(tablename) table = bigquery_client.get_table(table_ref) schema = table.schema return schema def loadDataFromCSV(tablename, global_dataset_ref, filename): schema = getTableSchema(tablename, global_dataset_ref) table_ref = global_dataset_ref.table(tablename) load_config = LoadJobConfig() load_config.source_format = bigquery.SourceFormat.CSV load_config.schema = schema load_config.autodetect = True load_config.allow_quoted_newlines = True with open (filename, 'rb') as readable: job = bigquery_client.load_table_from_file(readable, table_ref, location='US', job_config=load_config) job.result() print('Loaded {} rows into {}:{}.'.format(job.output_rows, global_dataset_ref, table_ref.table_id)) return # Testing if __name__ == '__main__': datasetname = 'Data_Layer' tablename = 'Events' sqlquery = '''SELECT null as EventId, sc.scheduleid AS ScheduleId, NULL AS Description, sc.scheduledatefrom AS Date, sc.timestart AS TimeFrom, sc.timeduration AS TimeTo, r.resourceid AS ResourceId, sp.employeeid AS EmployeeId, NULL AS MovementTypeId, r.configheight AS Capacity, CASE WHEN st.schedulestatus IN (1, 3) THEN '1' ELSE '0' END CanBook, CASE WHEN sv.nonmembermayenroll = TRUE THEN '1' ELSE '0' END NonMemberFlag, COALESCE(ProgramPrice.pricemember, ServicePrice.pricemember, 0) AS MemberAmount, COALESCE(ProgramPrice.pricenonmember, ServicePrice.pricenonmember, 0) AS NonMemberAmount, 'N/A' AS Attendance FROM AloomaTest.SCSESSIONS s LEFT JOIN AloomaTest.SCSESSION_PROVIDERS sp ON sp.sessionid = s.sessionid LEFT JOIN AloomaTest.SCSESSION_RESOURCES sr ON sr.sessionid = s.sessionid LEFT JOIN AloomaTest.SCSCHEDULES sc ON sc.scheduleid = s.scheduleid LEFT JOIN AloomaTest._SCSCHEDULESTATUS ST ON ST.schedulestatus = sc.schedulestatus LEFT JOIN AloomaTest.SCRESOURCES r ON r.resourceid = sr.resourceid LEFT JOIN AloomaTest.SCSERVICES sv ON sv.serviceid = sc.serviceid LEFT JOIN AloomaTest.SCPROGREG_SEMCOURSES semc ON semc.serviceid = sc.serviceid AND semc.semesterid = sc.semesterid LEFT JOIN AloomaTest.SCPROGREG_PRICES ProgramPrice ON ProgramPrice.scheduleid = sc.scheduleid LEFT JOIN AloomaTest.SCPROGREG_PRICES ServicePrice ON ServicePrice.semcourseid = semc.semcourseid WHERE COALESCE(ProgramPrice.feetypeid, 0) = 0 AND COALESCE(ServicePrice.feetypeid, 0)= 0 and sc.scheduleid in(31207, 25936, 5761094, 832794, 9825, 17912) ''' #createDataset(datasetname) #Successfully tested this code 2018-09-24 global_dataset_ref = getDataset(datasetname) #Successfully tested this code 2018-09-24 #createTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24 getTable(tablename, global_dataset_ref) #Successfully tested this code 2018-09-24 runBigQueryQuery(sqlquery,filename,idNeeded) #Successfully tested this code 2018-09-24 loadDataFromCSV(tablename, global_dataset_ref,filename) #Successfully tested this code 2018-09-24 sample data ,25936,2009-06-01 18:30:00,1110,M1PO - M1 PT Full,60,,254,,,1,0,0,0,N/A ,17912,2009-04-22 06:15:00,375,Pil Ptnr - Pilates Partner,60,47,398,,10,1,1,0,0,N/A ,31207,2009-06-22 19:00:00,1140,D390-2 - 1 1/2 Hour Massage,90,107,548,,20,0,0,0,0,N/A ,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,583,2349,,20,0,1,20,50,N/A ,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,591,2349,,20,0,1,20,50,N/A ,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,585,2349,,20,0,1,20,50,N/A ,5761094,2018-10-05 00:00:00,1140,Fr 7:00-9:00p Adult Paddle Mixer,120,584,2349,,20,0,1,20,50,N/A ,832794,2012-02-21 14:30:00,870,Comp Member One/One,60,,2963,,,1,0,0,0,N/A
The error message indicates that there is only 1 row in your CSV, you might be missing new lines while making it.