Run the code When all Threads are done (Python ThreadPoolExecutor)

Run the code When all Threads are done (Python ThreadPoolExecutor) - multithreading

I want to find a way to run a code when all Threads are done using ThreadPoolExecutor.
Here is a slice of my code (Making GUI with Tkinter).
I want to config the label when all threads are done.
How can I achieve this?
def logic(labels):
length = len(files)
global datalist
global future1
global future2
global future3
datalist = []
for i in range(length):
name = files[i]
ext = name.split('.')[-1]
file_name = os.path.basename(name)
if file_name.startswith('!v3'):
raw_data = pd.read_excel(name, sheet_name=None)
raw_data = pd.concat(raw_data,ignore_index=True)
elif (ext == 'xlsx') or (ext == 'xls'):
raw_data = pd.read_excel(name)
else:
raw_data = pd.read_csv(name, encoding='CP949')
file_name = os.path.basename(name)
if file_name.startswith('!v1'):
raw_data['업장명'] = file_name.split('_')[1]
with concurrent.futures.ThreadPoolExecutor() as executor:
future1 = executor.submit(data_processing1, raw_data)
final1 = future1.result()
datalist.append(final1)
elif file_name.startswith('!v2'):
if '지점명' not in raw_data.columns :
raw_data['지점명'] = file_name.split('_')[2]
raw_data['업장명'] = file_name.split('_')[1]
with concurrent.futures.ThreadPoolExecutor() as executor:
future2 = executor.submit(data_processing2, raw_data)
final2 = future2.result()
datalist.append(final2)
elif file_name.startswith('!v3'):
raw_data['업장명']=file_name.split('_')[1]
with concurrent.futures.ThreadPoolExecutor() as executor:
future3 = executor.submit(data_processing3, raw_data)
final3 = future3.result()
datalist.append(final3)
else:
space()
label = Label(root, text = name + '파일 이름 에러')
label.pack()
labels.config(text='Complete')

I want to config the label when all threads are done.
How can I solve this?
You can do it with concurrent.futures.wait - here's a sketch based on your code:
def logic(labels):
length = len(files)
global datalist
datalist = []
with concurrent.futures.ThreadPoolExecutor(length) as executor:
fs = [] # list of the Future instances created
for i in range(length):
…
if file_name.startswith('!v1'):
raw_data['업장명'] = file_name.split('_')[1]
fs.append(executor.submit(data_processing1, raw_data))
elif file_name.startswith('!v2'):
if '지점명' not in raw_data.columns:
raw_data['지점명'] = file_name.split('_')[2]
raw_data['업장명'] = file_name.split('_')[1]
fs.append(executor.submit(data_processing2, raw_data))
elif file_name.startswith('!v3'):
raw_data['업장명'] = file_name.split('_')[1]
fs.append(executor.submit(data_processing3, raw_data))
else:
…
concurrent.futures.wait(fs) # wait for all threads
for f in fs: datalist.append(f.result())
labels.config(text='Complete')
Note that although you could do it with multiple Executor instances, I used only one.

Related

How to convert a binary tree to a Newick tree using Python?

I have created a Tree object with the following structure:
class Tree:
def __init__(self, data=None):
self.data = data
self.left_child = None
self.right_child = None
An instance of this object is:
tree = Tree("A")
tree.left_child = Tree("B")
tree.right_child = Tree("C")
tree.left_child.left_child = Tree("D")
tree.left_child.right_child = Tree("E")
tree.right_child.left_child = Tree("F")
tree.right_child.right_child = Tree("G")
Its Newick format should be ((G,F)C,(E,D)B)A;
How can I convert any instance of Tree object to its Newick format?

Thanks to Blckknght for his hint.
def to_newick(tree):
newick = ""
newick = traverse(tree, newick)
newick = f"{newick};"
return newick
def traverse(tree, newick):
if tree.left_child and not tree.right_child:
newick = f"(,{traverse(tree.left_child, newick)}){tree.data}"
elif not tree.left_child and tree.right_child:
newick = f"({traverse(tree.right_child, newick)},){tree.data}"
elif tree.left_child and tree.right_child:
newick = f"({traverse(tree.right_child, newick)},{traverse(tree.left_child, newick)}){tree.data}"
elif not tree.left_child and not tree.right_child:
newick = f"{tree.data}"
else:
pass
return newick

I just thought you might want something not recursive, iterative implementations usually run faster.
from typing import List
class Tree:
def __init__(self, data=None):
self.data: str = data
self.left_child: Tree = None
self.right_child: Tree = None
def newick(self) -> str:
# Recursive version
# Practically a postorder tree traversal
if not self.left_child and not self.right_child:
return self.data
left_child = self.left_child.newick() if self.left_child else ""
right_child = self.right_child.newick() if self.right_child else ""
return f"({right_child},{left_child}){self.data}"
def newick_iter(self) -> str:
# Iterative version
# https://www.geeksforgeeks.org/iterative-postorder-traversal-using-stack/
res: str = ""
traverse_stack: List[Tree] = []
curr: Tree = self
while True:
while curr:
if curr.left_child:
traverse_stack.append(curr.left_child)
res += '('
traverse_stack.append(curr)
curr = curr.right_child
curr = traverse_stack.pop()
if curr.left_child and (traverse_stack and curr.left_child == traverse_stack[-1]):
tmp = traverse_stack.pop()
traverse_stack.append(curr)
curr = tmp
if res[-1] == ')':
res = res[:-1]
res += ','
else:
res += curr.data + ')'
curr = None
if not traverse_stack:
break
res = res[:-1]
return res
def main():
tree = Tree("A")
tree.left_child = Tree("B")
tree.right_child = Tree("C")
tree.left_child.left_child = Tree("D")
tree.left_child.right_child = Tree("E")
tree.right_child.left_child = Tree("F")
tree.right_child.right_child = Tree("G")
print(tree.newick_iter())
print(tree.newick())
if __name__ == '__main__':
main()

ValueError: ctypes objects containing pointers cannot be pickled

I am new to Python (2 month of programing/learining exp in total).
In this code all i do is get some data from MSSQL database and transfer it to DynamDB.
I just want to know why i getting this error: ValueError: ctypes objects containing pointers cannot be pickled
Its happaning it this line : p.map(self.import_sample_data_dynamo, list_to_batch).
import_sample_data_dynamo: its a function for Dynamo batch.
list_to_batch: its a list of dictionaries.
can some one plz tell me what I am doint wrong.
class GetSensorsSamplesSetToDynamoDBTable:
def __init__(self):
self.client = None
self.db = None
# MSSQL
self.connection = None
self.cursor: Cursor = None
def init(self):
# MSSQL
connect_string = 'Driver={SQL Server};' \
'Server=xxxx;' 'Database=xxx;' \
'uid=xxx;pwd=xxx'
self.connection = pypyodbc.connect(connect_string)
self.cursor = self.connection.cursor()
# DynamoDB
dynamodb = boto3.resource('dynamodb')
self.table = dynamodb.Table('xxx')
def cleanup(self):
# MSSQL
self.cursor.close()
self.connection.close()
def do_work(self):
self.init()
data = []
samples = self.get_files_received_by_ftp_prod2_data()
for sample in samples:
sample_id = sample['id']
project_id = sample['projectid']
sensor_id = sample['sensorid']
sample_time = sample['sampletime']
row = {"_id": sample_id, 'ProjectID': project_id, 'SensorID': sensor_id,
'Sample_Time': sample_time,
'Z_Fields': sample}
data.append(row)
self.split_for_batch(data)
# self.import_sample_data_dynamo(data)
def get_files_received_by_ftp_prod2_data(self):
sql_cmd = f"SELECT TOP (1000) * FROM FilesReceivedByFTP_Prod2"
self.cursor.execute(sql_cmd)
records = self.cursor.fetchall()
samples = []
records = list(records)
for res in records:
samples_data = {self.cursor.description[i][0]: res[i] for i in range(len(res))}
self.fix_bad_fields(samples_data)
samples.append(samples_data)
return samples
def split_for_batch(self, data):
temp_list = []
list_to_batch = []
while len(data) > 0:
temp_list.append(data[0])
data.pop(0)
if len(temp_list) > 24 or len(data) is 0:
list_to_batch.append(temp_list)
temp_list = []
print(len(data))
print(len(list_to_batch))
start_time = time.time()
num_workers = multiprocessing.cpu_count()
p = Pool(num_workers - 1)
p.map(self.import_sample_data_dynamo, list_to_batch)
p.close()
p.join()
elapsed = time.time() - start_time
print(f"read_all_samples elapsed {elapsed:.0F} Seconds")
def import_sample_data_dynamo(self, data):
with self.table.batch_writer() as batch:
for item in data:
ddb_data = json.loads(json.dumps(item, default=json_util.default),
parse_float=Decimal, object_hook=json_util.object_hook)
batch.put_item(Item=ddb_data)
return True
def fix_bad_fields(self, data):
for k, v in data.items():
if v == '':
data[k] = '---'
# elif type(v) == type(datetime.datetime.now()):
# # data[k] = v.strftime("%d/%m/%Y, %H:%M:%S")
# data[k] = v.timestamp()
elif type(v) is bytearray:
data[k] = "bytearray"
if __name__ == '__main__':
freeze_support()
worker = GetSensorsSamplesSetToDynamoDBTable()
worker.do_work()

How can I get tuple values (from a class) in my main function?

I am using multiprocesssing in Python 3.5.
#Multiprocessing
def main():
p1 = multiprocessing.Process(name="p1", target=datac)
p2 = multiprocessing.Process(name="p2", target=test)
p2.start()
p1.start()
if __name__ == "__main__":
main()
There are two processes=>
1.test
2.datac
Test:
def test():
#a=0
while 1:
#if 'gaze_point' in gaze_data_callback(gaze_data):
# data = gaze_data_callback['gaze_point']
# if s == 0:
# x = data[0] * 1920
# y = data[1] * 1080
# return [x, y]
#a+=1
for x in range(0, 10):
print("We're on time %d" % (x))
print('Waiting..')
#time.sleep(5)
Datac:
def datac():
while 1:
tmp = eye.__new__(eye)
tmp.__init__()
print(tmp)
This is the class:
class eye(object):
def gaze_data_callback(gaze_data):
left_3d = gaze_data['left_gaze_point_in_user_coordinate_system']
right_3d = gaze_data['right_gaze_point_in_user_coordinate_system']
#Get the gaze point of both eyes
gaze_point = ((left_3d), (right_3d))
gaze_point = tuple(mean(gaze_point, axis=0))
print("3d gaze:",gaze_point)
my_eyetracker.subscribe_to(tr.EYETRACKER_GAZE_DATA, gaze_data_callback, as_dictionary=True)
time.sleep(5)
my_eyetracker.unsubscribe_from(tr.EYETRACKER_GAZE_DATA, gaze_data_callback)
return (gaze_point)
I want to print the gaze point but it is not being printed. I am getting the object location

Kruskal algorithm in python

import heapq
from collections import defaultdict
a = list(map(int, input().split()))
nodes = a[0]
disjoint_set = [-1]*(nodes+1)
rank_set = [0]*(nodes+1)
edges = a[1]
heap = []
def get_parent(u):
if disjoint_set[u] == -1:
return u
return get_parent(disjoint_set[u])
def make_union(x, y):
x_parent = get_parent(x)
y_parent = get_parent(y)
if rank_set[x_parent] == rank_set[y_parent]:
disjoint_set[x_parent] = y_parent
rank_set[x_parent] +=1
elif rank_set[x_parent] > rank_set[y_parent]:
disjoint_set[x_parent] = y_parent
else:
disjoint_set[y_parent] = x_parent
def not_cycle(*item):
x_parent = get_parent(item[1])
y_parent = get_parent(item[2])
if x_parent == y_parent:
return False;
make_union(x_parent, y_parent)
return True
while(edges!=0):
edge = list(map(int, input().split()))
heapq.heappush(heap, [edge[2], edge[0], edge[1]])
edges-=1
cnt = 0
total = 0
while(cnt!=nodes-1):
item = heapq.heappop(heap)
if(not_cycle(*item) is True):
total+= item[0]
cnt+=1
print(total)
I implemented the kruskal algorthm in python. I am getting RecursionError:maximum recursion depth exceeded in comparison error. make_union and get_parent are method of disjoint set algorithm. I am getting the error in get_parent method. How to solve this?

In not_cycle you are passing the parents to make_union but then in make_union you are trying to get the parents again. After the first change the parents will no longer be -1 and you will recurse "forever"[1]
[1] "forever" in this case is until the maximum depth of your stack.

-
aa,bb=list(map(int,input().split()))
c=[] for i in range(bb):
z=list(map(int,input().split()))
c.append(z) c.sort(key=lambda x: x[2])
a=[]
b=[]
for i in c:
a.append((i[0]-1,i[1]-1))
b.append(i[2])
arr=[]
size=[]
for i in range(len(b)):
arr.append(i)
size.append(1)
def root(i):
while arr[i]!=i:
i=arr[i]
return i
def unions(arr,size,p,q):
root_a=root(p)
root_b=root(q)
if size[root_a]>size[root_b]:
arr[root_b]=arr[root_a]
size[root_a]+=size[root_b]
else:
arr[root_a]=arr[root_b]
size[root_b]+=size[root_a]
def kruskals(b,a,aa):
te=[]
i=0
while (len(te))<aa-1:
(p,q)=a[i]
if root(p)!=root(q):
te.append(b[i])
unions(arr,size,p,q)
i+=1
return sum(te)
print(kruskals(b,a,aa))

Runtime/Resource Warning error in Python

I was trying to run this code and encountered a run time error. I am not able to debug the code. I do believe that the error lies in functions huffman_encode and huffman_decode. The error showing is the resource warning error. Here is the code:
from linked_list import *
from huffman_bits_io import HuffmanBitsWriter as writer, HuffmanBitsReader as reader
import unittest
class Leaf:
'''class that implements Leaf'''
def __init__(self, parent, value, code, frequency):
self.parent = parent
self.frequency = frequency
self.value = value
self.code = code
def __eq__(self, other):
return type(other) == Leaf and self.parent == other.parent and self.frequency ==other.frequency and self.value==other.value and self.code==other.code
def __repr__(self):
return "[ {}, frequency = {} ]".format(self.code, self.frequency)
class Node:
'''class that implements Node'''
def __init__(self, parent, code, lchild, rchild, frequency):
self.parent = parent
self.code = code
self.frequency = frequency
self.lchild = lchild
self.rchild = rchild
def __eq__(self, other):
return type(other) == Node and self.parent==other.parent and self.code == other.code and self.frequency == other.frequency and self.lchild == other.lchild and self.rchild == other.rchild
def __repr__(self):
return "{}, freq = {}\n\left = {}\n\right = {}".format(self.code, self.frequency, self.lchild.___repr__(), self.rchild.__repr__())
def strip(string, seq):
'''this function cuts sequence from beginning of string if possible and returns result '''
if len(seq) > len(string):
return string
for i in range(len(seq)):
if seq[i] != string[i]:
return string
else:
return string[len(seq):]
def find(lst, item):
'''this function finds index of first occurrence of given element in the list and returns it or raise error if there is no such element'''
for i in range(lst.length):
if get(lst, i).value[0] == item:
return i
else:
raise ValueError
def string_traverse(node):
'''this function returns string representation of tree in pre-order traversal'''
lst = empty_list()
traverse(node, lst) #calls traverse
result_string = ''
for i in range(lst.length): #accumulate string from lst list
result_string += chr(get(lst, i).value)
return result_string
def traverse(node, code):
'''this function traverse the try and return list of leaf's value(helper for string_traverse)'''
if type(node) == Leaf:
code = add(code, node.value, code.length) #if node is Leaf than terminate recursion and return character
else:
traverse(node.lchild, code) #recursive call
traverse(node.rchild, code) #recursive call
def count_occurrences(file_name):
'''this function returns list that represent occurrence of every character of given string'''
with open(file_name) as file: #reads file
data = file.read()
lst = list()
for i in range(len(data)): #creates list of integer representation of string
lst.append(ord(data[i]))
data = lst
lst = empty_list()
for char in data: #this loop calculates occurrences of characters in the string
try:
index = find(lst, char)
lst = set(lst, index, (char, get(lst, index).value[1] + 1))
except ValueError:
lst = add(lst, (char, 1), 0)
lst = sort(lst, lambda x: x.value[1], False) #sorts occurrences
return lst
def comes_before(el1, el2):
'''this function returns True if el1 leaf should come before el2 leaf in Huffman tree meaning'''
if el1[1] < el2[1] or (el1[1] == el2[1] and type(el1[0]) is int and type(el2[0]) is int and el1[0] < el2[0]):
return True
else:
return False
def build_tree(occurrences):
'''this function returns Huffman tree based on given list of occurrences'''
if occurrences.length == 1: #if we have only one character returns Leaf with this character and code '0'
return Leaf(None, get(occurrences, 0).value[0], '0', get(occurrences, 0).value[1])
while occurrences.length != 1: #algorith described in the task
el1, occurrences = remove(occurrences, 0)
el2, occurrences = remove(occurrences, 0)
el1, el2 = el1.value, el2.value
if not comes_before(el1, el2): #finds order of elements in the tree
el1, el2 = el2, el1
new = Node(None, '', None, None, el1[1] + el2[1]) #creates new node
if type(el1[0]) is Node:
el1[0].code = '0' #sets up code for node
el1[0].parent = new
new.lchild = el1[0]
else:
new.lchild = Leaf(new, el1[0], '0', el1[1]) #if el1 is character not Node we will create leaf for that character
if type(el2[0]) is Node:
el2[0].code = '1' #sets up code for node
el2[0].parent = new
new.rchild = el2[0]
else:
new.rchild = Leaf(new, el2[0], '1', el2[1]) #if el2 is character not Node we will create leaf for that character
occurrences = insert_sorted(occurrences, (new, new.frequency), comes_before) #inserts new node
return get(occurrences, 0).value[0]
def collect_code(node, code = ''):
'''this function traverse Huffman tree and collect code for each leaf and returns them as nested list(helper for create_code)'''
if type(node) == Leaf:
lst = empty_list()
return add(lst, (node.value, code + node.code), 0) #if node is Leaf terminates recursion and returns code for the leaf
else:
lst = empty_list()
lst = add(lst, collect_code(node.lchild, code + node.code), 0) #recursive call
lst = add(lst, collect_code(node.rchild, code + node.code), 0) #recursive call
return lst
def create_code(tree):
'''this function unpack result of calling collect_code and return Huffman code as a list of tuples'''
code = collect_code(tree) #calls collect code
i = 0
while i < code.length: #this loop unpacks list
if type(get(code, i).value) is not tuple:
item, code = remove(code, i)
for j in range(item.value.length):
code = add(code, get(item.value, j).value, i)
continue
i += 1
return code
def huffman_encode(input_file, output_file):
'''task describe this function'''
occurrences = count_occurrences(input_file)
tree = build_tree(occurrences)
string = empty_list()
t = traverse(tree, string)
code = create_code(tree)
with open(input_file) as file:
string = file.read()
result_string = ''
for i in range(len(string)): #this loop encodes string using code produced by create_code function
for j in range(code.length):
temp = get(code, j).value
if string[i] == chr(temp[0]):
result_string += temp[1]
break
for i in range(occurrences.length):
temp = get(occurrences, i).value
occurrences = set(occurrences, i, (chr(temp[0]), temp[1]))
occurrences = sort(occurrences, lambda x: x.value[0], False)
file = writer(output_file)
file.write_int(code.length)
for i in range(occurrences.length):
temp = get(occurrences, i).value
file.write_byte(ord(temp[0]))
file.write_int(temp[1])
file.write_code(result_string)
file.close()
return string_traverse(tree)
def huffman_decode(input_file, output_file):
'''task describe this function'''
file = reader(input_file)
number_of_codes = file.read_int()
occurrences = empty_list()
for i in range(number_of_codes):
char = file.read_byte()
number = file.read_int()
occurrences = add(occurrences, (char, number), 0)
occurrences = sort(occurrences, lambda x: x.value[1], False)
tree = build_tree(occurrences)
code = sort(create_code(tree), lambda x: x.value[0], False)
occurrences = sort(occurrences, lambda x: x.value[0], False)
quantity_of_bits = 0
for i in range(code.length):
quantity_of_bits += get(occurrences, i).value[1]*len(get(code, i).value[1])
occurrences = sort(occurrences, lambda x: x.value[1], False)
bit_string = ''
for i in range(quantity_of_bits):
bit_string = bit_string + ('1' if file.read_bit() else '0')
result_string = ''
while bit_string: #this loop decodes string using code produced by create_code function
for j in range(code.length):
temp = get(code, j).value
stripped = strip(bit_string, temp[1])
if len(stripped) < len(bit_string):
result_string += chr(temp[0])
bit_string = stripped
break
with open(output_file, 'w') as file:
file.write(result_string)
file.close()
class Test(unittest.TestCase):
def test_strip1(self):
self.assertEqual(strip('123456', '123'), '456')
def test_strip2(self):
self.assertEqual(strip('123', '4567'), '123')
def test_strip3(self):
self.assertEqual(strip('123', '456'), '123')
def test_find(self):
lst = empty_list()
lst = add(lst, (1, 'b'), 0)
lst = add(lst, (2, 'a'), 1)
self.assertEqual(find(lst, 2), 1)
def test_find_raise(self):
lst = empty_list()
lst = add(lst, (1, 'b'), 0)
lst = add(lst, (2, 'a'), 1)
self.assertRaises(ValueError, find, lst, 5)
def test_occurrences(self):
lst = empty_list()
lst = add(lst, (97, 5), 0)
lst = add(lst, (98, 3), 0)
lst = add(lst , (99, 7), 2)
self.assertEqual(str(count_occurrences(r'test2.txt')), str(lst))
def test_create_code_and_tree_build(self):
occurrences = count_occurrences(r'test2.txt')
tree = build_tree(occurrences)
code = create_code(tree)
code = sort(code, lambda x: x.value[0], False)
self.assertEqual(str(code), "[(97, '11'), (98, '10'), (99, '0')]")
def test_huffman_encode_decode(self):
string = huffman_encode(r'test1.txt', r'test_out.txt')
huffman_decode(r'test_out.txt', r'test_decode.txt')
self.assertEqual(string, 'a')
with open(r'test1.txt') as file1:
with open(r'test_decode.txt') as file2:
self.assertEqual(file1.read(), file2.read())
file2.close()
file1.close()
def test_huffman_encode_decode3(self):
string = huffman_encode(r'test2.txt', r'test2_out.txt')
huffman_decode(r'test2_out.txt', r'test2_decode.txt')
self.assertEqual(string, 'cba')
with open(r'test2.txt') as file1:
with open(r'test2_decode.txt') as file2:
self.assertEqual(file1.read(), file2.read())
file2.close()
file1.close()
def test_huffman_encode_decode2(self):
string = huffman_encode(r'test3.txt', r'test3_out.txt')
huffman_decode(r'test3_out.txt', r'test3_decode.txt')
self.assertEqual(string, 'edcba')
with open(r'test3.txt') as file1:
with open(r'test3_decode.txt') as file2:
self.assertEqual(file1.read(), file2.read())
file2.close()
file1.close()
if __name__ == '__main__':
unittest.main()
And following is the error:
...
Warning (from warnings module):
File "C:\Users\Vikas\Documents\fwdregardingprojectdevelopment\huffman.py", line 212
with open(output_file, 'w') as file:
ResourceWarning: unclosed file <_io.BufferedReader name='test_out.txt'>
.
Warning (from warnings module):
File "C:\Users\Vikas\Documents\fwdregardingprojectdevelopment\huffman.py", line 212
with open(output_file, 'w') as file:
ResourceWarning: unclosed file <_io.BufferedReader name='test3_out.txt'>
.
Warning (from warnings module):
File "C:\Users\Vikas\Documents\fwdregardingprojectdevelopment\huffman.py", line 212
with open(output_file, 'w') as file:
ResourceWarning: unclosed file <_io.BufferedReader name='test2_out.txt'>
.....
----------------------------------------------------------------------
Ran 10 tests in 0.272s
OK

it seems somewhere in your code file 'out_file' is opened and not closed
find where it is opened and close it :
out_file.close()

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Run the code When all Threads are done (Python ThreadPoolExecutor) - multithreading

Related

How to convert a binary tree to a Newick tree using Python?

ValueError: ctypes objects containing pointers cannot be pickled

How can I get tuple values (from a class) in my main function?

Kruskal algorithm in python

Runtime/Resource Warning error in Python

Categories

Resources