how can I run multiple functions of a module in different threads? - multithreading

I am working on a project called monte carlo tree search(MCTS). In my project, there is a main loop which is time limited. The psudo code for MCTS module goes like this:
import time
class mctsagent:
def select_node(self):
# do sth
def expand(self):
# do sth
def roll_out(self):
# do sth
def backup(self):
# do sth
def search(self, time_budget):
# main loop
startTime = time.clock()
num_rollouts = 0
while (time.clock() - startTime < time_budget):
result1 = self.select_node()
result2 = self.expand(result1)
result3 = self.roll_out(result2)
self.backup(result3)
num_rollouts += 1
I want to use threads to have as many as iterations possible.
The question is that how can I develope a class which could make use of threading module to run the search function in different threads.
for example develope a module like this:
class MCTS_Thread:
def __init__(self, agent, time_budget):
self.agent = agent # MCTS module
self.time_budget = time_budget # The time for running loop
def run(self):
self.agent.search(self.time_budget)
and then instead of search function (# main loop) I use this:
def search(self, time_budget):
threads = []
for i in range(3):
# 3 threads running search
t = MCTS_Thread(self, time_budget)
threads.append(t)
for thread in threads:
thread.start()
for thread in threads:
thread.join()
The only thing matters here is that the functions in search function have to run in the same order in each thread.

Related

Getting returning value from multithreading in python 3

I'm trying to get one or several returning values from a thread in a multithreading process. The code I show get cycled with no way to interrupt it with Ctrl-C, Ctrl+D.
import queue as Queue
import threading
class myThread (threading.Thread):
def __init__(self, threadID, name, region):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.region = region
def run(self):
GetSales(self.region)
def GetSales(strReg):
print("Thread-" + strReg)
return "Returning-" + strReg
def Main():
RegionList = []
RegionList.append("EMEA")
RegionList.append("AP")
RegionList.append("AM")
# Create threads
threads = []
x = 0
for region in RegionList:
x += 1
rthread = myThread(x, "Thread-" + region, region) # Create new thread
rthread.start() # Start new thread
threads.append(rthread) # Add new thread to threads list
que = Queue.Queue()
# Wait for all threads to complete
for t in threads:
t.join()
result = que.get()
print(t.name + " -> Done")
Main()
If I comment line "result = que.get()" the program runs with no issues.
What you are looking for is future and async management.
Firstly, your program loop indefinitely because of the line que.get(), because there is nothing in the queue, it wait that something happen, which will never happen. You don't use it.
What you want to do is an async task and get the result :
import asyncio
async def yourExpensiveTask():
// some long calculation
return 42
async main():
tasks = []
tasks += [asyncio.create_task(yourExpensiveTask())]
tasks += [asyncio.create_task(yourExpensiveTask())]
for task in tasks:
result = await task
print(result)
See also https://docs.python.org/3/library/asyncio-task.html

call method on running process from parent process

I'm trying to write a program that interfaces with hardware via pyserial according to this diagram https://github.com/kiyoshi7/Intrument/blob/master/Idea.gif . my problem is that I don't know how to tell the child process to run a method.
I tried reducing my problem down to the essence of what I am trying to do can call the method request() from the main script. I just dont know how to handle two way communication like this, in examples using queue i just see data shared or i cant understand the examples
import multiprocessing
from time import sleep
class spawn:
def __init__(self, _number, _max):
self._number = _number
self._max = _max
self.Update()
def request(self, x):
print("{} was requested.".format(x))
def Update(self):
while True:
print("Spawned {} of {}".format(self._number, self._max))
sleep(2)
if __name__ == '__main__':
p = multiprocessing.Process(target=spawn, args=(1,1))
p.start()
sleep(5)
p.request(2) #here I'm trying to run the method I want
update thanks to Carcigenicate
import multiprocessing
from time import sleep
from operator import methodcaller
class Spawn:
def __init__(self, _number, _max):
self._number = _number
self._max = _max
# Don't call update here
def request(self, x):
print("{} was requested.".format(x))
def update(self):
while True:
print("Spawned {} of {}".format(self._number, self._max))
sleep(2)
if __name__ == '__main__':
spawn = Spawn(1, 1) # Create the object as normal
p = multiprocessing.Process(target=methodcaller("update"), args=(spawn,)) # Run the loop in the process
p.start()
while True:
sleep(1.5)
spawn.request(2) # Now you can reference the "spawn"
You're going to need to rearrange things a bit. I would not do the long running (infinite) work from the constructor. That's generally poor practice, and is complicating things here. I would instead initialize the object, then run the loop in the separate process:
from operator import methodcaller
class Spawn:
def __init__(self, _number, _max):
self._number = _number
self._max = _max
# Don't call update here
def request(self, x):
print("{} was requested.".format(x))
def update(self):
while True:
print("Spawned {} of {}".format(self._number, self._max))
sleep(2)
if __name__ == '__main__':
spawn = Spawn(1, 1) # Create the object as normal
p = multiprocessing.Process(target=methodcaller("update"), args=(spawn,)) # Run the loop in the process
p.start()
spawn.request(2) # Now you can reference the "spawn" object to do whatever you like
Unfortunately, since Process requires that it's target argument is pickleable, you can't just use a lambda wrapper like I originally had (whoops). I'm using operator.methodcaller to create a pickleable wrapper. methodcaller("update") returns a function that calls update on whatever is given to it, then we give it spawn to call it on.
You could also create a wrapper function using def:
def wrapper():
spawn.update()
. . .
p = multiprocessing.Process(target=wrapper) # Run the loop in the process
But that only works if it's feasible to have wrapper as a global function. You may need to play around to find out what works best, or use a multiprocessing library that doesn't require pickleable tasks.
Note, please use proper Python naming conventions. Class names start with capitals, and method names are lowercase. I fixed that up in the code I posted.

Coroutine to mimic a OS's scheduler

I am following the :A Curious Course on Coroutines and Concurrency to learn coroutine, encounter problem to get the following codes running:
The code mimic an operating system to schedule tasks
from queue import Quue
class Task:
taskid = 0
def __init__(self, target):
Task.taskid += 1 #count the task
self.tid = Task.taskid
self.tartet = target
self.sendval = None
def run(self):
return self.target.send(self.sendval)
class Scheduler:
def __init__(self):
self.ready = Queue() # a queue of tasks that are ready to run.
self.taskmap = {} #dictionary that keeps track of all active tasks (each task has a unique integer task ID)
def new(self, target): #introduce a new task to the scheduler
newtask = Task(target)
self.taskmap[newtask.tid] = newtask
def schedule(self, task):
self.ready.put(task)
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
Test it with
def foo():
while True:
print("I'm foo")
yield
def bar():
while True:
print("I'm bar")
yield
It pending instead of return value
In [85]: schedule.new(foo())
In [86]: schedule.new(bar())
In [87]: schedule.mainloop()
^C---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
I review the codes and find problem with
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
while self.taskmap, but there is no methods to remove elements, so it is an infinite loop
I changed it to
def mainloop(self):
while self.taskmap: #I think the problem is here
task = self.ready.get() #I think it should be while self.ready
result = task.run()
self.schedule(task)
However, it still not work.
What's the problem with my code.

python apply_async does not call method

I have a method which needs to process through a large database, that would take hours/days to dig through
The arguments are stored in a (long) list of which max X should be processed in one batch. The method does not need to return anything, yet i return "True" for "fun"...
The function is working perfectly when I'm iterating through it linearly (generating/appending the results in other tables not seen here), yet I am unable to get apply_async or map_async work. (it worked before in other projects)
Any hint of what might I be doing wrong would be appreciated, thanks in advance!
See code below:
import multiprocessing as mp
class mainClass:
#loads of stuff
def main():
multiprocess = True
batchSize = 35
mC = mainClass()
while True:
toCheck = [key for key, value in mC.lCheckSet.items()] #the tasks are stored in a dictionary, I'm referring to them with their keys, which I turn to a list here for iteration.
if multiprocess == False:
#this version works perfectly fine
for i in toCheck[:batchSize]:
mC.check(i)
else:
#the async version does not, either with apply_async...
with mp.Pool(processes = 8) as pool:
temp = [pool.apply_async(mC.check, args=(toCheck[n],)) for n in range(len(toCheck[:batchSize]))]
results = [t.get() for t in temp]
#...or as map_async
pool = mp.Pool(processes = 8)
temp = pool.map_async(mC.check, toCheck[:batchSize])
pool.close()
pool.join()
if __name__=="__main__":
main()
The "smell" here is that you are instantiating your maincClass on the main Process, just once, and then trying to call a method on it on the different processes - but note that when you pass mC.check to your process pool, it is a method already bound to the class instantiated in this process.
I'd guess there is where your problem lies. Although that could possibly work - and it does - I made this simplified version and it works as intended :
import multiprocessing as mp
import random, time
class MainClass:
def __init__(self):
self.value = 1
def check(self, arg):
time.sleep(random.uniform(0.01, 0.3))
print(id(self),self.value, arg)
def main():
mc = MainClass()
with mp.Pool(processes = 4) as pool:
temp = [pool.apply_async(mc.check, (i,)) for i in range(8)]
results = [t.get() for t in temp]
main()
(Have you tried just adding some prints to make sure the method is not running at all?)
So, the problem lies likely in some complex state in your MainClass that does not make it to the parallel processes in a good way. A possible work-around is to instantiate your mainclasses inside each process - that can be easily done since MultiProcessing allow you to get the current_process, and use this object as a namespace to keep data in the process instantiated in the worker Pool, across different calls to apply async.
So, create a new check function like the one bellow - and instead of instantiating your mainclass in the mainprocess, instantiate it inside each process in the pool:
import multiprocessing as mp
import random, time
def check(arg):
process = mp.current_process
if not hasattr(process, "main_class"):
process.main_class = MainClass()
process.main_class.check(arg)
class MainClass:
def __init__(self):
self.value = random.randrange(100)
def check(self, arg):
time.sleep(random.uniform(0.01, 0.3))
print(id(self),self.value, arg)
def main():
mc = MainClass()
with mp.Pool(processes = 2) as pool:
temp = [pool.apply_async(check, (i,)) for i in range(8)]
results = [t.get() for t in temp]
main()
I got to this question with the same problem, my apply_async calls not called at all, but the reason on my case was that the parameters number on apply_async call was different to the number on function declaration

Threaded result not giving same result as un-threaded result (python)

I have created a program to generate data points of functions that I later plot. The program takes a class which defines the function, creates a data outputting object which when called generates the data to a text file. To make the whole process faster I put the jobs in threads, however when I do, the data generated is not always correct. I have attached a picture to show what I mean:
Here are some of the relevant bits of code:
from queue import Queue
import threading
import time
queueLock = threading.Lock()
workQueue = Queue(10)
def process_data(threadName, q, queue_window, done):
while not done.get():
queueLock.acquire() # check whether or not the queue is locked
if not workQueue.empty():
data = q.get()
# data is the Plot object to be run
queueLock.release()
data.parent_window = queue_window
data.process()
else:
queueLock.release()
time.sleep(1)
class WorkThread(threading.Thread):
def __init__(self, threadID, q, done):
threading.Thread.__init__(self)
self.ID = threadID
self.q = q
self.done = done
def get_qw(self, queue_window):
# gets the queue_window object
self.queue_window = queue_window
def run(self):
# this is called when thread.start() is called
print("Thread {0} started.".format(self.ID))
process_data(self.ID, self.q, self.queue_window, self.done)
print("Thread {0} finished.".format(self.ID))
class Application(Frame):
def __init__(self, etc):
self.threads = []
# does some things
def makeThreads(self):
for i in range(1, int(self.threadNum.get()) +1):
thread = WorkThread(i, workQueue, self.calcsDone)
self.threads.append(thread)
# more code which just processes the function etc, sorts out the gui stuff.
And in a separate class (as I'm using tkinter, so the actual code to get the threads to run is called in a different window) (self.parent is the Application class):
def run_jobs(self):
if self.running == False:
# threads are only initiated when jobs are to be run
self.running = True
self.parent.calcsDone.set(False)
self.parent.threads = [] # just to make sure that it is initially empty, we want new threads each time
self.parent.makeThreads()
self.threads = self.parent.threads
for thread in self.threads:
thread.get_qw(self)
thread.start()
# put the jobs in the workQueue
queueLock.acquire()
for job in self.job_queue:
workQueue.put(job)
queueLock.release()
else:
messagebox.showerror("Error", "Jobs already running")
This is all the code which relates to the threads.
I don't know why when I run the program with multiple threads some data points are incorrect, whilst running it with just 1 single thread the data is all perfect. I tried looking up "threadsafe" processes, but couldn't find anything.
Thanks in advance!

Resources