cx_freeze creates multiple instances of program - python-3.x
I'm trying to compile some Python 3.3 code using cx_freeze and, after compiling, the resulting test.exe file will create an indefinite number of instances of the program, causing my Windows 7 system to become unstable. It works just as intended when just running in Python, but once compiled it causes issues. Here are my imports in my main script:
import sys
from multiprocessing import Pool, Queue
from threading import Thread
from time import sleep, time
from inspect import getmembers
from PyQt5 import QtWidgets, QtCore, QtGui
from main_ui import Ui_MainWindow # Generated UI from pyuic, imports
# QtWidgets, QtCore, and QtGui
from devices import Device1, Device2 # Both are serial.Serial objects
The setup.py script:
import sys
from cx_Freeze import setup, Executable
product_name = 'Product Name'
path_platforms = ("C:\Python33\Lib\site-packages\PyQt5\plugins\platforms\qwindows.dll",
"platforms\qwindows.dll")
includes = ['PyQt5.QtWidgets', 'PyQt5.QtCore', 'PyQt5.QtGui']
include_files = [path_platforms]
excludes = ['_gtkagg', '_tkagg', 'bsddb', 'curses', 'email', 'pywin.debugger',
'pywin.debugger.dbgcon', 'pywin.dialogs', 'tcl',
'Tkconstants', 'Tkinter']
packages = ['os']
path = []
bdist_msi_options = {'add_to_path': False}
build_exe_options = {'includes': includes,
'include_files': include_files,
'excludes': excludes,
'packages': packages,
'path': path,
'silent': True}
base = None
if sys.platform == 'win32':
base = 'Win32GUI'
exe = Executable(script='main.pyw',
base=base,
targetName='test.exe')
setup(name=product_name,
version='1.0',
description='The Test Program',
executables=[exe],
options = {'bdist_msi': bdist_msi_options, 'build_exe': build_exe_options})
And when I run python setup.py build, the following error occurs:
Missing modules:
? System imported from serial.serialcli
? TERMIOS imported from serial.serialposix
? __main__ imported from bdb
? _gestalt imported from platform
? _posixsubprocess imported from subprocess
? clr imported from serial.serialcli
Despite these errors, it still generates a test.exe file. When I execute it, it generates a seemingly infinite number of windows and the only way to stop it is to hard reset the computer. Again, the main script works just fine running under Python, but fails once compiled. Any help would be greatly appreciated!
EDIT: As requested, here is my main script:
import sys
from multiprocessing import Pool, Queue, freeze_support
from threading import Thread
from time import sleep, time
from inspect import getmembers
from PyQt5 import QtWidgets, QtCore, QtGui
from main_ui import Ui_MainWindow # Generated by pyuic
import parts # Imports time.sleep, datetime.datetime, and threading.Thread
from devices import GwPowerSupply, DataQ # Imports time.sleep and serial.Serial
# GwPowerSupply is a serial.Serial object to handle communications with a GwInstek PSP-603
# DataQ is also a serial.Serial object to handle communications with a DataQ-155
def file_logger(message):
logging = True
if logging:
with open('log.txt', 'a') as f:
f.write('{}: {}\n'.format(time(), message))
def compute():
"""
A function, designed as an independent process, to gather data from the DataQ and Power Supply
input queues, convert to human values, and output as a single queue
"""
compute.running = True
compute.paused = False
# The initial dict to pass on to the queue
data_dict = {'upstream': 0, 'downstream': 0, 'high_flow': 0, 'low_flow': 0, 'voltage': 0, 'current': 0, 'offset': 0}
while compute.running:
if compute.paused or compute.input_queue.empty():
continue
# Get the raw voltage data and convert to pressure/flow
analog_input = compute.input_queue.get()
file_logger('Compute received {}'.format(analog_input))
if analog_input is None:
continue
# Four items comes from the DataQ for pressures and flow
if len(analog_input) == 4:
# Pressure Transducers are both 1-11V, 0-500 PSI
if isinstance(analog_input[0], (float, int)):
data_dict['upstream'] = (analog_input[0]-1) * 50
if isinstance(analog_input[1], (float, int)):
data_dict['downstream'] = (analog_input[1]-1) * 50
# High Flow is 0-5V, 0-1000 Liters/min
if isinstance(analog_input[2], (float, int)):
data_dict['high_flow'] = (analog_input[2]*200) * .035147 # Convert SLM to SCFM
# Low Flow is 0-5V, 0-5 Liters/min
if isinstance(analog_input[3], (float, int)):
data_dict['low_flow'] = analog_input[3] * 1000 # Convert SLM to SCCM
# Two items are from the power supply for voltage and current
elif len(analog_input) == 2:
if isinstance(analog_input[0], (float, int)):
data_dict['voltage'] = analog_input[0]
if isinstance(analog_input[1], (float, int)):
data_dict['current'] = analog_input[1]
# A single item is the offset from the Valve program
elif len(analog_input) == 1:
data_dict['offset'] = analog_input[0]
else:
return
compute.output_queue.put(data_dict)
file_logger('Compute put out {}'.format(data_dict))
def data_q_producer():
"""
A function, designed as an independent process, to gather data from the DataQ and feed it
to the computing process
"""
# Initialize COM port
data_q = DataQ('COM4')
data_q.start()
# Continuously gather data
while True:
if not data_q.paused and not data_q.stopped:
# Gather data and put to queue, either for response or normal
file_logger('Getting Data from DataQ')
if data_q.response:
data = data_q.get_response_data()
data_q_producer.response_queue.put(data)
else:
data = data_q.get_data()
data_q_producer.queue.put(data)
file_logger('Got {} from DataQ'.format(data))
# If a command is received, such as to energize a relay, handle
if not data_q_producer.output.empty():
output = data_q_producer.output.get()
file_logger('Sending {} to DataQ'.format(output))
# Strings are to stop, run response, etc.
if isinstance(output, str):
if output == 'stop':
data_q.set_output(0, 0, 0, 0)
data_q.stop()
data_q.close()
data_q_producer.queue.put([])
return
elif output == 'start resp':
data_q.response = True
data_q.pause()
data_q.start_resp()
data_q.start()
elif output == 'stop resp':
print('Stopping Response Test')
data_q.pause()
data_q.setup()
data_q.start()
data_q.response = False
# If a single integer is received, it is the new leakage offset.
elif isinstance(output, float):
data_q_producer.queue.put([output, ])
# A List is to set the digital outputs
elif isinstance(output, list):
data_q.set_output(output[0], output[1], output[2], output[3])
def pps_producer():
"""
A function, designed as an independent process, to gather data from the Power Supply and feed it
to the computing process
"""
# Initialize COM port
pps = GwPowerSupply('COM1')
pps.set_relay(True)
# Continuously gather voltage and current readings
while True:
file_logger('Getting Data from Power Supply')
voltage = pps.get_value('V')
current = pps.get_value('A')
file_logger('Got {}V, {}A from power supply'.format(voltage, current))
pps_producer.queue.put([voltage, current])
# If a command is received to change voltage, current, etc.; handle
if not pps_producer.output.empty():
output = pps_producer.output.get()
file_logger('Got {} for Power Supply'.format(output))
# Bool is to set the relay on or off
if isinstance(output, bool):
pps.set_relay(output)
# String is primarily to stop the power supply (set the relay to Off)
elif isinstance(output, str) and output == 'stop':
pps.set_relay(False)
pps.close()
pps_producer.queue.put([])
return
# A tuple is changing a power supply output setting
else:
pps.set_value(output[0], output[1])
def pool_init(input_queue, output_queue, data_q_out, pps_out, response_queue):
"""
Initializes the above functions with external queue variables.
see http://stackoverflow.com/a/3843313/852994 for more details
"""
compute.output_queue = output_queue
compute.input_queue = input_queue
data_q_producer.queue = input_queue
data_q_producer.output = data_q_out
data_q_producer.response_queue = response_queue
pps_producer.queue = input_queue
pps_producer.output = pps_out
class MainGui(QtWidgets.QMainWindow):
"""
The Main interface builder for the program
"""
def __init__(self):
# Initialize MainGui and create the window
super(MainGui, self).__init__()
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
# The current valve part being tested
self.valve = None
# Disables the 'Energize' button when running ATP
self.auto_mode = False
# The current measured leakage offset based on the current run's test
self.measured_offset = 0
# The leakage offset table based on initial testing
# #TODO: retest offsets and go to 450 PSI
self.offset_table = ((-50, 30), (0, 31), (50, 44), (100, 37), (150, 41), (200, 44),
(250, 49), (300, 54), (350, 63), (400, 72), (450, 81))
# A table of calculated leakage offsets to give single-incremental points based on the
# above tested values
self.calculated_offsets = []
for i in range(len(self.offset_table)-1):
for x in range(self.offset_table[i][0], self.offset_table[i-1][0]):
x1 = self.offset_table[i][0]
x2 = self.offset_table[i+1][0]
y1 = self.offset_table[i][1]
y2 = self.offset_table[i+1][1]
y = ((x-x1) * (y2-y1)) / (x2-x1) + y1
self.calculated_offsets.append(y)
# Connect UI clicks and presses to commands
self.ui.btn_all.clicked.connect(lambda: self.select_all_tests(True))
self.ui.btn_none.clicked.connect(lambda: self.select_all_tests(False))
self.ui.comboBox.currentTextChanged.connect(self.select_part)
self.ui.btn_energize.clicked.connect(self.energize)
self.ui.btn_start.clicked.connect(self.start_tests)
self.ui.btn_skip.clicked.connect(self.skip_press)
# Select the initial part
self.select_part()
# Initialize queues
self.input_queue = Queue(10)
self.output_queue = Queue(10)
self.data_q_out = Queue(10)
self.pps_out = Queue(10)
self.response_queue = Queue(400)
self.test_queue = Queue(5)
self.log_queue = Queue(10)
# Initialize timer to update on-screen values
self.timer = QtCore.QTimer()
self.timer.timeout.connect(self.update_data)
self.timer.start(25)
# Initialize process pool
self.pool = Pool(processes=4, initializer=pool_init,
initargs=(self.input_queue, self.output_queue, self.data_q_out,
self.pps_out, self.response_queue))
# Place the data producing functions into the process pool
self.pool.apply_async(func=data_q_producer)
self.pool.apply_async(func=compute)
self.pool.apply_async(func=pps_producer)
def closeEvent(self, *args, **kwargs):
# Verify COM ports are closed properly before exiting
file_logger('Attempting Exit')
self.timer.stop()
self.test_queue.put('ESC')
self.data_q_out.put('stop')
self.pps_out.put('stop')
sleep(.5)
file_logger('Exited')
def keyPressEvent(self, event):
file_logger('Keypress Event: {}'.format(event.key()))
# Capture different key presses for different functions
if event.key() == QtCore.Qt.Key_Return:
self.test_queue.put(float(self.ui.lineEdit.text()))
elif event.key() == QtCore.Qt.Key_Backspace:
self.test_queue.put('ESC')
elif event.key() == QtCore.Qt.Key_S:
self.test_queue.put('SKIP')
def skip_press(self):
file_logger('Skip press Event')
self.test_queue.put('SKIP')
def print_to_log(self, text):
# Enter a line into the log with auto-scrolling
self.ui.log_output.append(text)
cursor = self.ui.log_output.textCursor()
QtGui.QTextCursor.movePosition(cursor, QtGui.QTextCursor.End)
self.ui.log_output.setTextCursor(cursor)
def update_data(self):
# Update status boxes
if not self.output_queue.empty():
file_logger('Update Interface Event')
data_dict = self.output_queue.get()
# Before calculating corrected leakage, get the offset
self.measured_offset = data_dict['offset']
# Modify low flow with offset
data_dict['low_flow'] -= self.measured_offset - self.calculated_offsets[int(data_dict['upstream'])]
# Update the status on the UI
self.ui.upstream_pressure.setText('{:.1f}'.format(data_dict['upstream']))
self.ui.downstream_pressure.setText('{:.1f}'.format(data_dict['downstream']))
self.ui.flow_sensor.setText('{:.2f}'.format(data_dict['high_flow']))
self.ui.leakage_sensor.setText('{:.0f}'.format(data_dict['low_flow']))
self.ui.voltage.setText('{:.2f}'.format(data_dict['voltage']))
self.ui.current.setText('{:.3f}'.format(data_dict['current']))
# Pass the values on to the test queue so the ATP process can use them
self.test_queue.put(data_dict)
if self.test_queue.full():
self.test_queue.get()
file_logger('Updated Interface')
# Update log
if not self.log_queue.empty():
text = self.log_queue.get()
file_logger('Printing to log: {}'.format(text))
# For the countdown timer, delete the previous line, but not the first count!
if isinstance(text, int) and text != 1:
cursor = self.ui.log_output.textCursor()
QtGui.QTextCursor.movePosition(cursor, QtGui.QTextCursor.End, QtGui.QTextCursor.MoveAnchor)
QtGui.QTextCursor.movePosition(cursor, QtGui.QTextCursor.StartOfLine, QtGui.QTextCursor.KeepAnchor)
QtGui.QTextCursor.removeSelectedText(cursor)
# Delete last newline character so the number doesn't print on the next line
QtGui.QTextCursor.deletePreviousChar(cursor)
self.print_to_log(str(text))
file_logger('Printed to log: {}'.format(text))
def select_all_tests(self, state=True):
# Select (or deselect if state is False) all tests
for i in range(len(self.ui.listWidget)):
self.ui.listWidget.item(i).setSelected(state)
def select_part(self):
# Update test list with a new part every time the combo box is changed
part_name = self.ui.comboBox.currentText()
for name, obj in getmembers(parts):
# Get the objects only labled as 'Part'
if 'Part' in name:
# Get the object with a part name that corresponds the the selected part
if part_name in obj().part_name:
self.valve = obj()
# Clear out the current contents of the test list
self.select_all_tests(False)
self.ui.listWidget.clear()
# Update test list with new tests
for test in self.valve.procedure:
self.ui.listWidget.addItem(test[0])
# Pre-select all tests
self.select_all_tests()
# Set Coils up properly; if there is only one coil in the unit, disable the second coil
self.ui.coil_1.setChecked(True)
if self.valve.coils < 2:
self.ui.coil_2.setChecked(False)
self.ui.coil_2.setEnabled(False)
else:
self.ui.coil_2.setEnabled(True)
self.ui.coil_2.setChecked(True)
return
def energize(self):
# Energize function for the energize button, but only if not running any test!
if self.auto_mode:
pass
else:
if self.ui.btn_energize.isChecked():
coil1 = int(self.ui.coil_1.checkState() / 2)
coil2 = int(self.ui.coil_2.checkState() / 2)
self.data_q_out.put([coil1, coil2, 2, 2])
else:
self.data_q_out.put([0, 0, 2, 2])
def start_tests(self):
file_logger('Starting Tests')
# Starts the testing thread
self.ui.log_output.setHtml('')
t = Thread(target=self.run_tests)
t.daemon = True
t.start()
def run_tests(self):
# Don't let the user try to start while running nor change the part number mid-test!
self.ui.btn_start.setEnabled(False)
self.ui.comboBox.setEnabled(False)
line = '-----------------------------------------------'
for test in self.valve.procedure:
# Verify the test is selected to run by iterating through all the test items in
# the test list and, if matching the current test name, verify the checked state
for i in range(len(self.ui.listWidget)):
if test[0] == self.ui.listWidget.item(i).text() and self.ui.listWidget.item(i).isSelected():
file_logger('Testing {}'.format(test[0]))
self.log_queue.put('<b>{1}\r\nRunning {0}\r\n{1}</b> '.format(test[0], line))
test[1](self.log_queue, self.test_queue, self.pps_out, self.data_q_out, self.response_queue)
# Tell the user of an escape or a skip
if self.valve.escape:
file_logger('Escaped'.format(test[0]))
self.log_queue.put('<b><font color="blue">Escaped</b></font> ')
self.ui.btn_start.setEnabled(True)
self.ui.comboBox.setEnabled(True)
self.valve.escape = False
# If escaping, break out of all loops
return
elif self.valve.skip:
file_logger('Skipped'.format(test[0]))
self.log_queue.put('<b><font color="orange">Skipped</b></font> ')
self.valve.skip = False
else:
file_logger('Test Successful')
# Once the test is found, break out of the test name matching loop
break
# If the test is not selected, notify user by displaying 'Skipping'
elif test[0] == self.ui.listWidget.item(i).text():
self.log_queue.put('<b>{1}</b>\r\nSkipping {0}'.format(test[0], line))
break
# Re-enable starting tests and selecting part numbers
self.ui.btn_start.setEnabled(True)
self.ui.comboBox.setEnabled(True)
if __name__ == '__main__':
freeze_support()
#input_queue = Queue(10)
#output_queue = Queue(10)
#data_q_out = Queue(10)
#pps_out = Queue(10)
#response_queue = Queue(400)
## Initialize process pool
#pool = Pool(processes=4, initializer=pool_init,
# initargs=(input_queue, output_queue, data_q_out, pps_out, response_queue))
#
## Place the data producing functions into the process pool
#pool.apply_async(func=data_q_producer)
#pool.apply_async(func=compute)
#pool.apply_async(func=pps_producer)
file_logger('####### NEW RUN #######\n')
app = QtWidgets.QApplication(sys.argv)
window = MainGui()
window.show()
file_logger('####### END RUN #######\n')
sys.exit(app.exec_())
You need to add the following code to your main, before anything else:
from multiprocessing import freeze_support
freeze_support()
See this stackoverflow post
Related
Ubuntu 22.04: pyautogui.locateOnScreen is returning None. How to solve this?
My OS is Ubuntu 22.04, Python 3.10.4. I am trying to create a code to automate Whatsapp send message. Have installed latest version of pyautogui. Following is the code I am running: import pyautogui as pt import paperclip as pc # from pynput.mouse import Controller, Button from time import sleep # mouse = Controller() class WhatsApp: def __init__(self, speed=5, click_speed=.3): self.speed = speed self.click_speed = click_speed self.message = '' self.last_message = '' def nav_green_dot(self): try: # position = pt.locateOnScreen('clip_pic.png', confidence = .7) # position = pt.locateOnScreen('clip_pic.png') # print(position) print(pt.locateOnScreen('clip_pic.png')) # pt.moveTo(position[0:2], duration = self.speed) # pt.moveRel(-100, 0, duration = self.speed) except Exception as e: print ('Exception (nav_green_dot): ', e) wa_bot = WhatsApp(speed = .5, click_speed = .4) sleep(5) wa_bot.nav_green_dot() At print(pt.locateOnScreen('clip_pic.png')) I am getting None. Attached is the picture I am trying to capture. I have already opencv-python installed as well. I also have whatsapp web page opened in a chrome browser. I tested in firefox as well. The output error is not clear in what direction I should go. What am I missing?
Finding image on screen for only one time may be None you need to check repeatedly for it. And if it is found you can end the loop you are using to find it. You should use python's multithreading for it. here is an updated version of your code import pyautogui as pt import paperclip as pc # from pynput.mouse import Controller, Button from time import sleep import threading # mouse = Controller() FOUND_IMAGE = False def checkFunction(): global FOUND_IMAGE while True: img = pt.locateOnScreen('img.png') if img != None: FOUND_IMAGE = True break checkThread = threading.Thread(target=checkFunction) # creating therad checkThread.start() # starting therad class WhatsApp: def __init__(self, speed=5, click_speed=.3): self.speed = speed self.click_speed = click_speed self.message = '' self.last_message = '' def nav_green_dot(self): try: # position = pt.locateOnScreen('clip_pic.png', confidence = .7) # position = pt.locateOnScreen('clip_pic.png') # print(position) print(FOUND_IMAGE) # pt.moveTo(position[0:2], duration = self.speed) # pt.moveRel(-100, 0, duration = self.speed) except Exception as e: print ('Exception (nav_green_dot): ', e) wa_bot = WhatsApp(speed = .5, click_speed = .4) sleep(5) wa_bot.nav_green_dot() For any queries have a look at this question or this Post
Memory Safe Permutations Generator with Increasing Length Value in ThreadPoolExecutor
Thanks to #rici in the comments for steering me in the right direction on this. I have discovered that concurrent.futures.map() and concurrent.futures.execut() utilize immediate processing of iterables, whereas Python's default map() function can lazily go through iterables, which is much more desirable when dealing with large product and permutation spaces. The concurrent.futures route uses up all RAM when it gets to combo's of 2 or more in the example code below. What I'm looking to do now is implement what I have in the updated code below, with multithreading. What I'm looking to do is multithread Python's default map() function, pulling iterables from one common product generator. I've commented out the "working" multithreaded example for reference and to show what I was trying to accomplish. I stumbled upon a potential fix in the main_lazy function from this post, however I'm confused on how to implement that with my code's function that returns 2 values? The maps and zips and lambdas confuse me here, and I'm not sure how the chunk thing would work with the space I'm working with, but maybe it'll make sense to someone else. For now, here is the single threaded version of the memory safe code that I'm trying to multithread now. Note that I don't care about the math behind how many combinations this generates as it's irrelevant to my use case, so long as it keeps memory usage down. Here's the updated code. To reproduce: Download VAmPI and start the server Update the BASE_URL in the code below to match your server Run this code import concurrent.futures from itertools import product, chain, islice import requests, urllib # ---------------------------------------------------------------------------- # # Variables # # ---------------------------------------------------------------------------- # MAX_ENDPOINT_PERMUTATION_LENGTH = 3 MAX_WORKERS = 6 # BASE_URL = 'http://localhost:5000/' BASE_URL = 'http://172.16.1.82:5000//' # This should be the Vampi url of the # server on your machine if BASE_URL[-1] != "/": BASE_URL = BASE_URL + "/" # ---------------------------------------------------------------------------- # # Retrieve list of endpoints to product'ize # # ---------------------------------------------------------------------------- # list_of_individual_api_endpoints = [] url = r"https://gist.githubusercontent.com/yassineaboukir/8e12adefbd505ef704674ad6ad48743d/raw/3ea2b7175f2fcf8e6de835c72cb2b2048f73f847/List%2520of%2520API%2520endpoints%2520&%2520objects" file = urllib.request.urlopen(url) for line in file: decoded_line = line.decode("utf-8").replace("\n","") list_of_individual_api_endpoints.append(decoded_line) # ---------------------------------------------------------------------------- # # The multithreaded function we're going to use # # ---------------------------------------------------------------------------- # def ping_current_api_endpoint(endpoint): # Deconstruct a proper endpoint from the passed in tuple new_endpoint = "" for x in endpoint: new_endpoint += str(x) + "/" new_endpoint = new_endpoint[:-1] # Ping the endpoint to get a response code response = requests.get(BASE_URL + str(new_endpoint)) status_code = response.status_code return status_code, new_endpoint # # ---------------------------------------------------------------------------- # # # Main Function # # # ---------------------------------------------------------------------------- # # # MULTITHREADED ATTEMPT. EATS UP RAM WHEN GETTING TO DEPTH OF 2 # def main(): # results_dict = {'endpoint':[], 'status_code': []} # # Start the threadpool # with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: # # Try/Except for a keyboard interrupt. If this is not the correct implementation # # to stop a multithreaded pool, please demonstrate the correct way # try: # # Iterate from 1 to 3 # for i in range(1, MAX_ENDPOINT_PERMUTATION_LENGTH+1): # print("Checking endpoints with depth of", i) # # Can change this from .map to .execute, tried them both # future = executor.submit(ping_current_api_endpoint, product(list_of_individual_api_endpoints, repeat=i)) # status_code = future.result()[0] # endpoint = future.result()[1] # if str(status_code) != "404": # results_dict['endpoint'].append(endpoint) # results_dict['status_code'].append(status_code) # print("Endpoint:", endpoint, ", Status Code:", status_code) # except KeyboardInterrupt: # print("Early stopping engaged...") # pass # # Print the results dict # print(results_dict) # LAZY MAP FUNCTION, SINGLE THREADED, THAT I'D LIKE TO TURN INTO MULTI def main_lazy(): results_dict = {'endpoint':[], 'status_code': []} for i in range(1, MAX_ENDPOINT_PERMUTATION_LENGTH+1): print("Checking endpoints with depth of", i) results = map(ping_current_api_endpoint, (product(list_of_individual_api_endpoints, repeat=i))) for status_code, endpoint in results: # print(status_code, endpoint) if str(status_code) != "404": results_dict['endpoint'].append(endpoint) results_dict['status_code'].append(status_code) print("Endpoint:", endpoint, ", Status Code:", status_code) # ---------------------------------------------------------------------------- # # Start Program # # ---------------------------------------------------------------------------- # if __name__ == "__main__": # main() main_lazy()
I figured out a solution. After the section of code that gets the endpoints list from github, I use the following: # ---------------------------------------------------------------------------- # # Function to Ping API Endpoints # # ---------------------------------------------------------------------------- # # Create Thread Safe Class for Generator and Worker Function results_dict = {"endpoint": [], "status_code": []} class LockedIterator(object): def __init__(self, iterator): self.lock = threading.Lock() self.iterator = iter(iterator) def __iter__(self): return self def __next__(self): with self.lock: return self.iterator.__next__() def generator_function(repeat): for x in product(list_endpoint_words, repeat=repeat): yield x def worker_function(current_gen_value): for endpoint in current_gen_value: # time.sleep(randint(0,2)) if len(endpoint) > 1: for x in endpoint: new_endpoint = x + "/" new_endpoint = new_endpoint[:-1] else: new_endpoint = endpoint[0] response = requests.get(BASE_URL + str(new_endpoint)) status_code = response.status_code if str(status_code) != "404": results_dict['endpoint'].append(endpoint) results_dict['status_code'].append(status_code) print("Endpoint:", endpoint, ", Status Code:", status_code) # ---------------------------------------------------------------------------- # # Main Program Start # # ---------------------------------------------------------------------------- # start_time = time.time() for repeat in range(1, MAX_ENDPOINT_PERMUTATION_LENGTH+1): thread_safe_generator = LockedIterator(generator_function(repeat)) threads_list = [] for _ in range(MAX_WORKERS): thread = threading.Thread(target=worker_function, args=(thread_safe_generator,)) # thread.daemon = True threads_list.append(thread) for thread in threads_list: thread.start() for thread in threads_list: thread.join() results_df = DataFrame.from_dict(results_dict) results_df = results_df.sort_values(by='status_code', ascending=True).reset_index(drop=True) results_df.to_csv("endpoint_results.csv", index=False) print(results_df) print("Elapsed time:", int((time.time() - start_time) / 60), "minutes." ) This creates a thread and memory safe generator and multiple threads for the workers. Now only thing missing is how to make CTRL + C work with this, but whatever.
from itertools import product from concurrent.futures import ThreadPoolExecutor, as_completed words = ["I", "like", "to", "take", "my", "dogs", "for", "a", "walk", "every", "day", "after", "work"] def gen(): for i in product(words, repeat=3): yield i def worker(rec_str): return rec_str def main(): with ThreadPoolExecutor() as executor: fs = (executor.submit(worker, i) for i in gen()) for i in as_completed(fs): print(i.result()) if __name__ == "__main__": main()
Why serial code is faster than concurrent.futures in this case?
I am using the following code to process some pictures for my ML project and I would like to parallelize it. import multiprocessing as mp import concurrent.futures def track_ids(seq): '''The func is so big I can not put it here''' ood = {} for i in seq: # I load around 500 images and process them ood[i] = some Value return ood seqs = [] for seq in range(1, 10):# len(seqs)+1): seq = txt+str(seq) seqs.append(seq) # serial call of the function track_ids(seq) #parallel call of the function with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex: ood_id = ex.map(track_ids, seqs) if I run the code serially it takes 3.0 minutes but for parallel with concurrent, it takes 3.5 minutes. can someone please explain why is that? and present a way to solve the problem. btw, I have 12 cores. Thanks
Here's a brief example of how one might go about profiling multiprocessing code vs serial execution: from multiprocessing import Pool from cProfile import Profile from pstats import Stats import concurrent.futures def track_ids(seq): '''The func is so big I can not put it here''' ood = {} for i in seq: # I load around 500 images and process them ood[i] = some Value return ood def profile_seq(): p = Profile() #one and only profiler instance p.enable() seqs = [] for seq in range(1, 10):# len(seqs)+1): seq = txt+str(seq) seqs.append(seq) # serial call of the function track_ids(seq) p.disable() return Stats(p), seqs def track_ids_pr(seq): p = Profile() #profile the child tasks p.enable() retval = track_ids(seq) p.disable() return (Stats(p, stream="dummy"), retval) def profile_parallel(): p = Profile() #profile stuff in the main process p.enable() with concurrent.futures.ProcessPoolExecutor(max_workers=mp.cpu_count()) as ex: retvals = ex.map(track_ids_pr, seqs) p.disable() s = Stats(p) out = [] for ret in retvals: s.add(ret[0]) out.append(ret[1]) return s, out if __name__ == "__main__": stat, retval = profile_parallel() stat.print_stats() EDIT: Unfortunately I found out that pstat.Stats objects cannot be used normally with multiprocessing.Queue because it is not pickleable (which is needed for the operation of concurrent.futures). Evidently it normally will store a reference to a file for the purpose of writing statistics to that file, and if none is given, it will by default grab a reference to sys.stdout. We don't actually need that reference however until we actually want to print out the statistics, so we can just give it a temporary value to prevent the pickle error, and then restore an appropriate value later. The following example should be copy-paste-able and run just fine rather than the pseudocode-ish example above. from multiprocessing import Queue, Process from cProfile import Profile from pstats import Stats import sys def isprime(x): for d in range(2, int(x**.5)): if x % d == 0: return False return True def foo(retq): p = Profile() p.enable() primes = [] max_n = 2**20 for n in range(3, max_n): if isprime(n): primes.append(n) p.disable() retq.put(Stats(p, stream="dummy")) #Dirty hack: set `stream` to something picklable then override later if __name__ == "__main__": q = Queue() p1 = Process(target=foo, args=(q,)) p1.start() p2 = Process(target=foo, args=(q,)) p2.start() s1 = q.get() s1.stream = sys.stdout #restore original file s2 = q.get() # s2.stream #if we are just adding this `Stats` object to another the `stream` just gets thrown away anyway. s1.add(s2) #add up the stats from both child processes. s1.print_stats() #s1.stream gets used here, but not before. If you provide a file to write to instead of sys.stdout, it will write to that file) p1.join() p2.join()
How to add an image to summary during evaluation when using Estimator?
I run an evaluation at the end of each epoch and need to show an image calculated from the features and labels arguments of the model function model_fn. Including a tf.summary.image(name, image) in evaluation part of the model function does not help and it looks to me that the only way to do so is to pass the correct eval_metric_ops to construct the EstimatorSpec for mode EVAL. So I first sub-class Estimator so that it considers images. The following code is mostly from estimator.py; the only change is the few lines marked by "my change" inside _write_dict_to_summary: import logging import io import numpy as np import matplotlib.pyplot as plt import six from google.protobuf import message import tensorflow as tf from tensorflow.python.training import evaluation from tensorflow.python import ops from tensorflow.python.estimator.estimator import _dict_to_str, _write_checkpoint_path_to_summary from tensorflow.core.framework import summary_pb2 from tensorflow.python.framework import tensor_util from tensorflow.python.summary.writer import writer_cache def dump_as_image(a): vmin = np.min(a) vmax = np.max(a) img = np.squeeze((img - vmin) / (vmax - vmin) * 255).astype(np.uint8) s = io.BytesIO() plt.imsave(s, img, format='png', vmin=0, vmax=255, cmap='gray') return s.getvalue() # see https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/estimator/estimator.py def _write_dict_to_summary(output_dir, dictionary, current_global_step): logging.info('Saving dict for global step %d: %s', current_global_step, _dict_to_str(dictionary)) summary_writer = writer_cache.FileWriterCache.get(output_dir) summary_proto = summary_pb2.Summary() for key in dictionary: if dictionary[key] is None: continue if key == 'global_step': continue if (isinstance(dictionary[key], np.float32) or isinstance(dictionary[key], float)): summary_proto.value.add(tag=key, simple_value=float(dictionary[key])) elif (isinstance(dictionary[key], np.int64) or isinstance(dictionary[key], np.int32) or isinstance(dictionary[key], int)): summary_proto.value.add(tag=key, simple_value=int(dictionary[key])) elif isinstance(dictionary[key], six.binary_type): try: summ = summary_pb2.Summary.FromString(dictionary[key]) for i, img_bytes in enumerate(summ.value): summ.value[i].tag = '%s/%d' % (key, i) summary_proto.value.extend(summ.value) except message.DecodeError: logging.warn('Skipping summary for %s, cannot parse string to Summary.', key) continue elif isinstance(dictionary[key], np.ndarray): value = summary_proto.value.add() value.tag = key value.node_name = key array = dictionary[key] # my change begins if array.ndim == 2: buffer = dump_as_image(array) value.image.encoded_image_string = buffer # my change ends else: tensor_proto = tensor_util.make_tensor_proto(array) value.tensor.CopyFrom(tensor_proto) logging.info( 'Summary for np.ndarray is not visible in Tensorboard by default. ' 'Consider using a Tensorboard plugin for visualization (see ' 'https://github.com/tensorflow/tensorboard-plugin-example/blob/master/README.md' ' for more information).') else: logging.warn( 'Skipping summary for %s, must be a float, np.float32, np.int64, ' 'np.int32 or int or np.ndarray or a serialized string of Summary.', key) summary_writer.add_summary(summary_proto, current_global_step) summary_writer.flush() class ImageMonitoringEstimator(tf.estimator.Estimator): def __init__(self, *args, **kwargs): tf.estimator.Estimator._assert_members_are_not_overridden = lambda self: None super(ImageMonitoringEstimator, self).__init__(*args, **kwargs) def _evaluate_run(self, checkpoint_path, scaffold, update_op, eval_dict, all_hooks, output_dir): eval_results = evaluation._evaluate_once( checkpoint_path=checkpoint_path, master=self._config.evaluation_master, scaffold=scaffold, eval_ops=update_op, final_ops=eval_dict, hooks=all_hooks, config=self._session_config) current_global_step = eval_results[ops.GraphKeys.GLOBAL_STEP] _write_dict_to_summary( output_dir=output_dir, dictionary=eval_results, current_global_step=current_global_step) if checkpoint_path: _write_checkpoint_path_to_summary( output_dir=output_dir, checkpoint_path=checkpoint_path, current_global_step=current_global_step) return eval_results the model function is like -- def model_func(features, labels, mode): # calculate network_output if mode == tf.estimator.ModeKeys.TRAIN: # training elif mode == tf.estimator.ModeKeys.EVAL: # make_image consists of slicing and concatenations images = tf.map_fn(make_image, (features, network_output, labels), dtype=features.dtype) eval_metric_ops = images, tf.no_op() # not working return tf.estimator.EstimatorSpec(mode, loss=loss) eval_metric_ops={'images': eval_metric_ops}) else: # prediction And the main part -- # mon_features and mon_labels are np.ndarray estimator = ImageMonitoringEstimator(model_fn=model_func,...) mon_input_func = tf.estimator.inputs.numpy_input_fn(mon_features, mon_labels, shuffle=False, num_epochs=num_epochs, batch_size=len(mon_features)) for _ in range(num_epochs): estimator.train(...) estimator.evaluate(input_fn=mon_input_func) The code above will give a warning (later an error): WARNING:tensorflow:An OutOfRangeError or StopIteration exception is raised by the code in FinalOpsHook. This typically means the Ops running by the FinalOpsHook have a dependency back to some input source, which should not happen. For example, for metrics in tf.estimator.Estimator, all metrics functions return two Ops: value_op and update_op. Estimator.evaluate calls the update_op for each batch of the data in input source and, once it is exhausted, it call the value_op to get the metric values. The value_op here should have dependency back to variables reading only, rather than reading another batch from input. Otherwise, the value_op, executed by FinalOpsHook, triggers another data reading, which ends OutOfRangeError/StopIteration. Please fix that. Looks like I didn't set the eval_metric_ops correctly. I guess tf.map_fn touches another batch as the warning message hints; maybe I need some stacking operation as the update_op to build the images used for monitoring incrementally? But I am not sure how to do that. So how to add an image to summary during evaluation when using Estimator?
The way I make it work is by passing a tf.train.SummarySaverHook under the evaluation mode and then declaring it to the tf.estimator.EstimatorSpec at evaluation_hooks=. images is a list of the desired tf.summary.image you want to print during evaluation. example: eval_summary_hook = tf.train.SummarySaverHook(output_dir=params['eval_save_path'], summary_op=images, save_secs=120) spec = tf.estimator.EstimatorSpec(mode=mode, predictions=y_pred, loss=loss, eval_metric_ops=eval_metric_ops, evaluation_hooks=[eval_summary_hook])
Python watchdog module duplicate events (edit: was not an watchdog issue)
I am creating a python script that will identify changes to a log file and print some data from the new logs. I use watchdog to create an event handler and everything seems to work fine except from that, I get duplicate events every time I modify the file. I checked creation and delete, they both work as expected and trigger one time. I have read the similar question which explains having a created and a modified event when I save a file but this is not my case. I just get two modification events. Here is my code: import os, sys, time import subprocess import threading import win32print from tkinter import filedialog from tkinter import * from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler class Handler(FileSystemEventHandler): # docstring for FileSystemEventHandler def __init__(self, observer, filename, dirname): # super(Handler, FileSystemEventHandler).__init__(self,) self.observer = observer self.filename = filename self.dirname = dirname print("Handler filename = " , self.filename) print("Handler dirname = " , self.dirname) def on_modified(self, event): if self.filename == event.src_path: print("The file was modified") print (event.src_path) # go get the last line and print the data # try: # hJob = win32print.StartDocPrinter (hPrinter, 1, ("test of raw data", None, "RAW")) # try: # win32print.StartPagePrinter (hPrinter) # win32print.WritePrinter (hPrinter, raw_data) # win32print.EndPagePrinter (hPrinter) # finally: # win32print.EndDocPrinter (hPrinter) # finally: # win32print.ClosePrinter (hPrinter) def on_created(self, event): print("A file was created (", event.src_path, ")") def on_deleted(self, event): print("A file was deleted (", event.src_path, ")") if __name__ == "__main__": Flags=2 Name=None Level=1 printers = win32print.EnumPrinters(Flags, Name, Level) print("\nChoose a printer to use:") i=1 for p in printers: print(i,')' , p[2]) i = i+1 if sys.version_info >= (3,): raw_data = bytes ("This is a test", "utf-8") else: raw_data = "This is a test" printer = int(input()) printer_name = printers[printer-1][2] #win32print.GetDefaultPrinter () print("You chose ", printer_name, "\nI will now print from the specified file with this printer") hPrinter = win32print.OpenPrinter (printer_name) # root = Tk() # root.filename = filedialog.askopenfilename(initialdir = "/Desktop",title = "Select file",filetypes = (("log files","*.log"),("all files","*.*"))) file_path = "some_file_path" # root.filename file_directory = os.path.dirname(file_path) # print (file_path) print (file_directory) observer = Observer() event_handler = Handler(observer, file_path, file_directory) observer.schedule(event_handler, path=file_directory, recursive=False) observer.start() observer.join() any ideas would be appreciated EDIT: After some debugging I found out that Windows10 is changing the file modification time twice every time I save it. The proof of concept code is this: prev_modification_time = os.path.getmtime(file_path) while True: current_mod_time = os.path.getmtime(file_path) if prev_modification_time != current_mod_time : print ("the file was modified, last modification time is: ", current_mod_time) prev_modification_time = current_mod_time pass Final edit: After testing my code on linux (Debian Stretch to be exact) it worked like a charm. So this combined with the previous edit probably shows that watchdog works fine and it is windows10 that has some issue. Should I post it on a different question or here?