How to run a function in 'background' - python-3.x

I'm parsing the last line of a continuously updating log file. If it matches, I want to return the match to a list and start another function using that data. I need to keep watching for new entries and parse them even while the new function continues.
I've been working this from a few different angles for about a week with varying success. I tried threading, but ran into issues getting the return value, I tried using a global var but couldn't get it working. I'm now trying asyncio, but having even more issues getting that to work.
def tail():
global match_list
f.seek(0, os.SEEK_END)
while True:
line = f.readline()
if not line:
time.sleep(0.1)
continue
yield line
def thread():
while True:
tail()
def somefun(list):
global match_list
#do things here
pass
def main():
match_list = []
f = open(r'file.txt')
thread=threading.Thread(target=thread, args=(f,))
thread.start()
while True:
if len(match_list) >= 1:
somefun(match_list)
if __name__ == '__main__':
main()
Wrote the above from memory..
I want tail() to return the line to a list that somefun() can use.
I'm having issues getting it to work, I will use threading or asyncio.. anything to get it running at this point.

In asyncio you might use two coroutines, one that reads from file, and the other that processes the file. Since they communicate using queue, they don't need the global variable. For example:
import os, asyncio
async def tail(f, queue):
f.seek(0, os.SEEK_END)
while True:
line = f.readline()
if not line:
await asyncio.sleep(0.1)
continue
await queue.put(line)
async def consume(queue):
lines = []
while True:
next_line = await queue.get()
lines.append(next_line)
# it is not clear if you want somefun to receive the next
# line or *all* lines, but it's easy to do either
somefun(next_line)
def somefun(line):
# do something with line
print(f'line: {line!r}')
async def main():
queue = asyncio.Queue()
with open('file.txt') as f:
await asyncio.gather(tail(f, queue), consume(queue))
if __name__ == '__main__':
asyncio.run(main())
# or, on Python older than 3.7:
#asyncio.get_event_loop().run_until_complete(main())
The beauty of an asyncio-based solution is that you can easily start an arbitrary number of such coroutines in parallel (e.g. you could start gather(main1(), main2()) in an outer coroutine, and run that), and have them all share the same thread.

with a few small fixes you almost run this :) (comments inside)
match_list # should be at the module scope
def tail():
# f = open(...) ???
f.seek(0, os.SEEK_END)
while True:
line = f.readline()
if not line:
time.sleep(0.1)
continue
yield line
def thread():
for line in tail():
match_list.append(line) # append line
print("thread DONE!")
def somefun(list):
#do things here
while match_list:
line = match_list.pop(0)
print(line)
def main():
match_list = []
f = open(r'file.txt')
thread=threading.Thread(target=thread, args=(f,))
thread.start()
while True:
if match_list:
somefun(match_list)
time.sleep(0.1) # <-- don't burn the CPU :)

Related

python global variable changes unexpectedly

I have this code that is supposed to log to a memory variable:
(the_message_lines):
import logging
from thompcoutils import log_utils
the_message_lines = []
class MemoryHandler(logging.StreamHandler):
"""
Handler that keeps all log messages in memory until the beginning of the day or the size exceeds a value
"""
def emit(self, record: logging.LogRecord):
global the_message_lines
try:
msg = self.format(record)
the_message_lines.append(msg)
self.flush()
except (KeyboardInterrupt, SystemExit):
raise
except:
self.handleError(record)
#staticmethod
def get_lines():
return the_message_lines
#staticmethod
def reset_lines():
global the_message_lines
the_message_lines.clear()
if __name__ == '__main__':
log_utils.load_log_config('logging.ini')
logger = log_utils.get_logger()
logger.warning('beginning')
for i in range(3):
lines = MemoryHandler.get_lines()
logger.warning('remaining %d seconds', i, extra={'same_line':True})
logger.warning('end')
for line in MemoryHandler.get_lines():
print(line)
It behaves as it should but the_message_lines come and go(?)
If I put a break point in the emit method and observe the_message_lines, it behaves as it should, accumulating log messages every time.
If I put a break point in the loop that is logging, the_message_lines is empty every time!
So, between log requests, the_message_lines appears to delete itself, but in the emit, it is fine.
At the end of the main() method, get_lines() returns an empty array.
What am I missing?

Do the read(bytes) function with readline() in python subprocess.pipe

I'm trying to read non-blocking from subprocess pipe on windows python3.
I made an implementation that pipes a FFMPEG video stream. It works on Linux but since FNCTL doesn't work on windows I'm looking at other ways of avoiding read/write blocking.
Since readline() isn't blocking the read stream this solution should work, however I need to recreate the read(bytes) function with readline(). I need to be able to read entire frames of size X (1920*1080*3) from the stream with readline. I'm guessing '\n' occurences needs to be handled aswell.
The code so far:
def enqueue_output(self, out, queue):
# This function should read(SIZE) from stream and add it to queue
# IT DOES NOT CURRENTLY WORK
temp = bytes()
SIZE = 1920*1080*3
while True:
for line in iter(out.readline, b''):
if len(temp) >= SIZE:
queue.put(line)
temp += line
ffmpeg = sp.Popen(ffmpegCmd, stdin=sp.PIPE,
stdout=sp.PIPE,stderr=sp.PIPE, close_fds=ON_POSIX, shell=True)
q = Queue()
t = Thread(target=self.enqueue_output, args=(ffmpeg.stdout, q))
t.daemon = True # thread dies with the program
t.start()
The Read function. Follow question, would it be better to have the byte assemble code in the read function?
img_thread = threading.Thread(target=self.reader,
args=(q,))
img_thread.start()
def reader(self, q): # TODO : catch exceptions
s_buf = ''
while True:
# read line without blocking
try: line = q.get_nowait() # or q.get(timeout=.1)
except Empty:
print('no output yet')
else: # got line
print(line)
try:
img = np.frombuffer(line, dtype=np.uint8)
img = img.reshape((1080, 1920, 3))
except Exception as e:
print(e)
pass
else:
cv2.imshow("test", img)
cv2.waitKey(1)
Some Example data of how it looks when i just send readline() :
b'\x05\x07\t\x06\x08\n'
b'\x05\x07\t\x06\x08\n'
b'\x07\t\x0b\x05\x07\t\t\x0b\r\x06\x08\n'
b'\x00\x02\x04\x01\x03\x05\x05\x07\t\x05\x07\t\x05\x07\t\x05\x07\t\x01\x03\x05\x00\x02\x04\t\x0b\r\x06\x08\n'
b'\x05\x08\x08\x05\x08\x08\x03\x06\x06\x01\x04\x04\x02\x05\x05\x06\t\t\x05\x08\x08\x05\x08\x08\x05\x08\x08\x05\x08\x08\x05\x08\x08\x05\x08\x08\x05\x08\x08\x05\x08\x08\x02\x04\x06\x05\x07\t\x05\x07\t\x02\x04\x06\x05\x07\t\x05\x07\t\x05\x07\t\x05\x07\t\x05\x07\t\x06\x08\n'
b'\x05\x07\t\x00\x00\x02\x07\t\x0b\x03\x05\x07\x00\x00\x00\x00\x00\x00\x03\x02\x04\x02\x01\x03\t\x08\n'
b'\x0c\x0b\r\x08\x07\t\x06\x05\x07\x04\x03\x05\x00\x00\x00\x05\x04\x06\x05\x04\x06\x05\x04\x06\x05\x04\x06\x06\x05\x07\x06\x05\x07\x06\x05\x07\x06\x05\x07\x06\x05\x07\x05\x04\x06\x06\x05\x07\x06\x05\x07\x05\x04\x06\x05\x04\x06\x05\x04\x06\x06\x05\x07\x05\x04\x06\x08\x07\t\t\x08\n'
b'\t\x08\n'
b'\x08\x07\t\x06\x05\x07\x05\x04\x06\x06\x05\x07\x08\x07\t\x02\x01\x03\x02\x01\x03\x08\x07\t\x06\x05\x07\x06\x05\x07\x06\x05\x07\x08\x07\t\x06\x05\x07\x06\x05\x07\x06\x05\x07\x06\x05\x07\x05\x04\x06\x05\x04\x06\x05\x04\x06\x05\x04\x06\x04\x04\x04\x05\x05\x05\x04\x04\x04\x04\x04\x04\x05\x05\x05\x05\x05\x05\x04\x04\x04\x05\x05\x05\x03\x03\x03\x03\x03\x03\x05\x05\x05\x06\x06\x06\x08\x08\x08\x06\x06\x06\x05\x05\x05\x02\x02\x02\x00\x00\x00\x04\x00\x03\x08\x04\x07\n'
b'\x06\t\x08\x04\x07\x07\x03\x06\x07\x03\x06\x07\x03\x06\x07\x03\x06\x07\x03\x06\x06\x02\x05\x05\x01\x04\x06\x02\x05\x06\x02\x05\x07\x03\x06\x08\x04\x07\x03\x03\x03\n'
b'\n'
b'\n'
b'\t\t\t\x06\x06\x06\t\t\t\x05\x05\x05\x04\x04\x04\t\t\t\x06\x06\x06\x05\x05\x05\x05\x05\x05\x06\x06\x06\x06\x06\x06\x08\x08\x08\x08\x08\x08\x08\x08\x08\x06\x06\x06\x06\x06\x06\x05\x05\x05\x04\x04\x04\x04\x04\x04\x05\x05\x05\x05\x05\x05\x05\x05\x05\x01\x01\x01\x05\x05\x05\x05\x05\x05\x02\x02\x02\x03\x03\x03\x08\x08\x08\x08\x08\x08\x04\x04\x04\x06\x03\x04\x06\x03\x04\x05\x02\x03\x05\x02\x03\x06\x03\x04\x06\x03\x04\x05\x02\x03\x05\x02\x03\x05\x02\x03\x06\x03\x04\x06\x03\x04\x05\x02\x03\x05\x02\x03\x05\x02\x03\x05\x02\x03\x04\x01\x02\x01\x01\x01\x00\x00\x00\x03\x03\x03\x05\x05\x05\x06\x06\x06\x08\x08\x08\x06\x06\x06\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x04\x04\x04\x05\x05\x05\x05\x05\x05\x05\x05\x05\x06\x06\x06\x06\x06\x06\x05\x05\x05\x05\x05\x05\x04\x04\x04\x04\x04\x04\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x06\x06\x06\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x04\x04\x04\x04\x04\x04\x03\x03\x03\x03\x03\x03\x06\x06\x06\x08\x08\x08\x02\x02\x02\x05\x05\x05\r\r\r\x11\x11\x11\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x07\x04\x05\x06\x03\x04\x06\x03\x04\x04\x01\x02\x06\x03\x04\x0b\x08\t\x08\x05\x06\x04\x01\x02\x07\x04\x05\x06\x03\x04\x06\x03\x04\x06\x03\x04\x06\x03\x04\x06\x03\x04\x06\x03\x04\x06\x03\x04\x04\x04\x04\x04\x04\x04\x00\x03\x03\x00\x03\x03\x00\x03\x03\x00\x03\x03\x03\x06\x06\x03\x06\x06\x02\x05\x05\x02\x05\x05\x01\x04\x04\x01\x04\x04\x01\x04\x04\x01\x04\x04\x04\x04\x04\x04\x04\x04\x05\x02\x03\x05\x02\x03\x05\x02\x03\x05\x02\x03\x06\x03\x04\x06\x03\x04\x06\x03\x04\x08\x05\x06\x08\x05\x06\x08\x05\x06\x04\x01\x02\x04\x01\x02\x05\x02\x03\x06\x03\x04\x07\x04\x05\x07\x04\x05\x05\x05\x05\x05\x05\x05\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x04\x04\x04\x06\x06\x06\x05\x05\x05\x04\x04\x04\x03\x03\x03\x04\x04\x04\x05\x05\x05\x05\x05\x05\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x04\x01\x04\x04\x01\x04\x04\x01\x04\x04\x00\x03\x03\x02\x05\x05\x00\x01\x01\x00\x01\x01\x02\x05\x05\x03\x06\x06\x02\x05\x05\x00\x02\x02\x00\x03\x03\x00\x03\x03\x01\x04\x04\x02\x05\x05\x02\x05\x05\x01\x03\x05\x01\x03\x05\x00\x01\x06\x00\x01\x06\x01\x02\x07\x01\x02\x07\x01\x02\x07\x02\x03\x08\x02\x03\x08\x02\x03\x08\x00\x01\x06\x00\x01\x06\x01\x04\x04\x01\x04\x04\x01\x04\x04\x02\x05\x05\x02\x05\x05\x03\x06\x06\x02\x05\x05\x02\x05\x05\x00\x03\x03\x00\x03\x03\x00\x03\x03\x00\x03\x03\x00\x03\x03\x00\x03\x03\x00\x02\x02\x00\x02\x02\x00\x01\x01\x00\x02\x02\x02\x05\x05\x01\x04\x04\x04\x04\x04\x03\x03\x03\x04\x04\x04\x03\x03\x03\x04\x04\x04\x04\x04\x04\x02\x05\x05\x02\x05\x05\x01\x04\x04\x05\x08\x08\x07\x0c\x0c\x06\x0b\x0b\x04\t\t\x06\x0b\x0b\x05\n'
b'\n'
b'\x00\x02\x02\x03\x08\x08\x03\x08\x08\x03\x08\x08\x03\x08\x08\x03\x08\x08\x03\x08\x08\x01\x06\x06\x01\x06\x06\x03\x08\x08\x03\x08\x08\x04\t\t\x04\t\t\x03\x08\x08\x03\x08\x08\x03\x08\x08\x03\x08\x08\x04\t\t\x04\t\t\x04\t\t\x04\t\t\x04\t\t\x04\t\t\x04\t\t\x04\t\t\x03\x08\x08\x03\x08\x08\x03\x08\x08\x03\x08\x08\x04\t\t\x04\t\t\x04\t\t\x05\n'
b'\n'
b'\t\x0c\x0c\t\x0c\x0c\x03\x06\x06\x02\x05\x05\x03\x06\x06\x06\t\t\t\x0c\x0c\x08\x0b\x0b\x08\x0b\x0b\x06\t\t\x05\x08\x08\x02\x05\x05\x00\x03\x03\x03\x06\x06\x06\t\t\x03\x06\x06\x06\t\t\x01\x04\x04\x01\x04\x04\x05\x08\x08\x06\t\t\x02\x05\x05\x05\x08\x08\n'
b'\r\r\x02\x05\x05\x06\t\t\x08\x0b\x0b\x06\t\t\x05\x08\x08\x07\n'
Any help would be appreciated!
If anyone has a better advice on how to avoid the read/write block on subprocess for windows that would be great too.

How to find out how long a search for files will take on python?

So I have a little app that searches for all xml files on my pc, copying the files that have 44 digits as the filename to the "output" folder.
The problem is that the final user needs an indication of the progress and remaining time of the task.
This is the module to copy files:
xml_search.py
import os
import re
from threading import Thread
from datetime import datetime
import time
import shutil
import winsound
os.system('cls')
def get_drives():
response = os.popen("wmic logicaldisk get caption")
list1 = []
t1 = datetime.now()
for line in response.readlines():
line = line.strip("\n")
line = line.strip("\r")
line = line.strip(" ")
if (line == "Caption" or line == ""):
continue
list1.append(line + '\\')
return list1
def search1(drive):
for root, dir, files in os.walk(drive):
for file in files:
if re.match("\d{44}.xml", file):
filename = os.path.join(root, file)
try:
shutil.copy(filename, os.path.join('output', file))
except Exception as e:
pass
def exec_(callback):
t1 = datetime.now()
list2 = [] # empty list is created
list1 = get_drives()
for each in list1:
process1 = Thread(target=search1, args=(each,))
process1.start()
list2.append(process1)
for t in list2:
t.join() # Terminate the threads
t2 = datetime.now()
total = str(t2-t1)
print(total, file=open('times.txt', 'a'), end="\n")
for x in range(3):
winsound.Beep(2000,100)
time.sleep(.1)
callback()
if __name__ == "__main__":
exec_()
The below code uses progressbar library and it shows
indication of the progress and remaining time of the task
import progressbar
from time import sleep
bar = progressbar.ProgressBar(maxval=1120, \
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.ETA()])
bar.start()
for i in range(1120):
bar.update(i+1)
sleep(0.1)
bar.finish()
You would need to add the above modified code to your code.
So in your case, you would need to count the number of files and provide it as input to ProgressBar constructor's maxval argument and remove sleep call.
The suggested solution with progress bar should work with one thread. You would need to figure out how to initiate the progress bar and where to put the updates if you insist to work with multiple threads.
Try to implement a timer decorator like the following:
import time
def mytimer(func):
def wrapper():
t1 = time.time()
result = func()
t2 = time.time()
print(f"The function {func.__name__} was run {t2 - t1} seconds")
return result
return wrapper
#mytimer
def TimeConsumingFunction():
time.sleep(3)
print("Hello timers")
TimeConsumingFunction()
Output:
/usr/bin/python3.7 /home/user/Documents/python-workspace/timers/example.py
Hello timers
The function TimeConsumingFunction was run 3.002610206604004 seconds
Process finished with exit code 0

How to implement a "background" or concurrent thread?

Figured it out, i think? Runs as expected. I'm not able to view the original code so I wrote this new one up. Is there a better way to do this?
import time
import threading
def threadee():
f = open(r'log.txt')
for line in f:
print(line)
time.sleep(0.2)
def threader():
while True:
threadee()
def main():
thread = threading.Thread(target=threader)
thread.start()
while True:
print('main thread running')
print(threading.enumerate())
time.sleep(1)
if __name__ == '__main__':
main()

Python Multithreading missing data

useI am working on a python script to check if the url is working. The script will write the url and response code to a log file.
To speed up the check, I am using threading and queue.
The script works well if the number of url's to check is small but when increasing the number of url's to hundreds, some url's just will miss from the log file.
Is there anything I need to fix?
My script is
#!/usr/bin/env python
import Queue
import threading
import urllib2,urllib,sys,cx_Oracle,os
import time
from urllib2 import HTTPError, URLError
queue = Queue.Queue()
##print_queue = Queue.Queue()
class NoRedirectHandler(urllib2.HTTPRedirectHandler):
def http_error_302(self, req, fp, code, msg, headers):
infourl = urllib.addinfourl(fp, headers, req.get_full_url())
infourl.status = code
infourl.code = code
return infourl
http_error_300 = http_error_302
http_error_301 = http_error_302
http_error_303 = http_error_302
http_error_307 = http_error_302
class ThreadUrl(threading.Thread):
#Threaded Url Grab
## def __init__(self, queue, print_queue):
def __init__(self, queue,error_log):
threading.Thread.__init__(self)
self.queue = queue
## self.print_queue = print_queue
self.error_log = error_log
def do_something_with_exception(self,idx,url,error_log):
exc_type, exc_value = sys.exc_info()[:2]
## self.print_queue.put([idx,url,exc_type.__name__])
with open( error_log, 'a') as err_log_f:
err_log_f.write("{0},{1},{2}\n".format(idx,url,exc_type.__name__))
def openUrl(self,pair):
try:
idx = pair[1]
url = 'http://'+pair[2]
opener = urllib2.build_opener(NoRedirectHandler())
urllib2.install_opener(opener)
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1')
#open urls of hosts
resp = urllib2.urlopen(request, timeout=10)
## self.print_queue.put([idx,url,resp.code])
with open( self.error_log, 'a') as err_log_f:
err_log_f.write("{0},{1},{2}\n".format(idx,url,resp.code))
except:
self.do_something_with_exception(idx,url,self.error_log)
def run(self):
while True:
#grabs host from queue
pair = self.queue.get()
self.openUrl(pair)
#signals to queue job is done
self.queue.task_done()
def readUrlFromDB(queue,connect_string,column_name,table_name):
try:
connection = cx_Oracle.Connection(connect_string)
cursor = cx_Oracle.Cursor(connection)
query = 'select ' + column_name + ' from ' + table_name
cursor.execute(query)
#Count lines in the file
rows = cursor.fetchall()
total = cursor.rowcount
#Loop through returned urls
for row in rows:
#print row[1],row[2]
## url = 'http://'+row[2]
queue.put(row)
cursor.close()
connection.close()
return total
except cx_Oracle.DatabaseError, e:
print e[0].context
raise
def main():
start = time.time()
error_log = "D:\\chkWebsite_Error_Log.txt"
#Check if error_log file exists
#If exists then deletes it
if os.path.isfile(error_log):
os.remove(error_log)
#spawn a pool of threads, and pass them queue instance
for i in range(10):
t = ThreadUrl(queue,error_log)
t.setDaemon(True)
t.start()
connect_string,column_name,table_name = "user/pass#db","*","T_URL_TEST"
tn = readUrlFromDB(queue,connect_string,column_name,table_name)
#wait on the queue until everything has been processed
queue.join()
## print_queue.join()
print "Total retrived: {0}".format(tn)
print "Elapsed Time: %s" % (time.time() - start)
main()
Python's threading module isn't really multithreaded because of the global interpreter lock, http://wiki.python.org/moin/GlobalInterpreterLock as such you should really use multiprocessing http://docs.python.org/library/multiprocessing.html if you really want to take advantage of multiple cores.
Also you seem to be accessing a file simultatnously
with open( self.error_log, 'a') as err_log_f:
err_log_f.write("{0},{1},{2}\n".format(idx,url,resp.code))
This is really bad AFAIK, if two threads are trying to write to the same file at the same time or almost at the same time, keep in mind, their not really multithreaded, the behavior tends to be undefined, imagine one thread writing while another just closed it...
Anyway you would need a third queue to handle writing to the file.
At first glance this looks like a race condition, since many threads are trying to write to the log file at the same time. See this question for some pointers on how to lock a file for writing (so only one thread can access it at a time).

Resources