I have a function that is looping using values from a dictionary. I want to split my dict keys, so i can break my dict at parts equal to my cpus. My fucntion is:
def find_something2(new2, threl=2.0, my_limit=150, far=365):
""" Find stocks tha are worth buying"""
global current_date, total_money, min_date, current_name, dates_dict, mylist, min_date_sell, reduced_stocks
worthing = list()
for stock in new2:
frame = reduced_stocks[stock]
temp = frame.loc[current_date:end_date]
if not temp.empty:
mydate = temp.head(far).Low.idxmin()
if mydate <= min_date_sell:
my_min = temp.head(far).Low.min()
if total_money >= my_min > 0: # find the min date at four months
ans, res, when_sell, total, income = worth_buy(stock, frame, mydate, 'Low',
thres=threl, sell_limit=my_limit)
if ans:
if income > 3 * 10 ** 6:
worthing.append([mydate, stock, res, when_sell, total, income])
if current_date > '1990-01-01':
return sorted(worthing, key=itemgetter(0))
elif current_date > '1985-01-01':
return sorted(worthing, key=itemgetter(0))
else:
answer = sorted(worthing, key=itemgetter(5), reverse=True)
return answer[::11]
so what i have tried is:
import multiprocessing as mp
result_list = []
def log_result(result):
# This is called whenever foo_pool(i) returns a result.
# result_list is modified only by the main process, not the pool workers.
global result_list
result_list.append(result)
def apply_async_with_callback():
global reduced_stocks
temp = list(reduced_stocks.keys())
temp1 = temp[0:1991]
temp2 = temp[1991:]
temp = [temp1, temp2]
pool = mp.Pool(2)
for i in temp:
pool.apply_async(find_something2, args=(i, 1.1, 2200, 1,), callback=log_result)
pool.close()
pool.join()
print(result_list)
if __name__ == '__main__':
apply_async_with_callback()
is this the right way?
I also tried threads but cpu goes max at 15% althoug iam using 12 threads(i have 6 intel core)
def pare():
relist = list(reduced_stocks.keys())
sublist = [relist[x:x+332] for x in range(0, len(relist), 332)]
data = [x for x in sublist]
threads = list()
from threading import Thread
for i in range(12):
process = Thread(target=find_something2, args=(1.4,2500,8,data[i],i,results))
process.start()
threads.append(process)
for process in threads:
process.join()
One way to do multiprocessing is to create a Pool and pass the prepared data to it. Wait for computation done and process the results. The code suggests how to do that.
# setup the function so it gets everything from arguments
def find_something2(new2, threl, my_limit, far, current_date, total_money, min_date_sell, reduced_stocks, end_date):
# ....
pass
# prepare the data
# replace the a1, a2 ... with the actual parameters your function takes
data = [(a1, a2, a3, ...) for your_data in your_dict]
import multiprocessing as mp
with mp.Pool() as pool:
results = pool.starmap(find_something2, data)
print(results)
Related
I have a piece of code as shown below:
#!/bin/python3
import math
import os
import random
import re
import sys
import logging
def consumer():
while True:
x = yield
print(x)
def producer(n):
for _ in range(n):
x = int(input())
yield x
def rooter():
logging.info("Running the rooter")
while True:
value = (yield)
yield math.sqrt(value)
def squarer():
logging.info("Running the squarer")
while True:
value = (yield)
print("from squarer: {}".format(value))
yield value * value
def accumulator():
logging.info("Running the accumulator.")
running_total = 0
while True:
value = (yield)
running_total += value
yield running_total
def pipeline(prod, workers, cons):
logging.info("workers: {}".format(workers))
for num in prod:
for i, w in enumerate(workers):
num = w.send(num)
cons.send(num)
for worker in workers:
worker.close()
cons.close()
if __name__ == '__main__':
order = input().strip()
m = int(input())
prod = producer(m)
cons = consumer()
next(cons)
root = rooter()
next(root)
accumulate = accumulator()
next(accumulate)
square = squarer()
next(square)
pipeline(prod, eval(order), cons)
Sample input
[square, accumulate]
3 <- Number of inputs coming further
1 <- actual inputs
2
3
Sample Output
*The output should be as below:*
1
5
14
but comes to
10(sum of the squares of 1 and 3) when it should actually be 14 (sum of the squares of 1, 2, 3)
So essentially the input 2 is missed (It's second in the line of inputs).
On debugging further I found that this is the case for every alternate iteration, not just for the provided inputs here.
I am not able to decipher what's happening. If it's of any help, the co-routine squarer is the one returning None in the second iteration.
I'd appreciate any help.
I found a solution to this.
It's that we prime the co-routine after use in the pipeline function so the code becomes as follows: I have marked the next(w) line within asterix for reference.
#!/bin/python3
import math
import os
import random
import re
import sys
import logging
def consumer():
while True:
x = yield
print(x)
def producer(n):
for _ in range(n):
x = int(input())
yield x
def rooter():
logging.info("Running the rooter")
while True:
value = (yield)
yield math.sqrt(value)
def squarer():
logging.info("Running the squarer")
while True:
value = (yield)
print("from squarer: {}".format(value))
yield value * value
def accumulator():
logging.info("Running the accumulator.")
running_total = 0
while True:
value = (yield)
running_total += value
yield running_total
def pipeline(prod, workers, cons):
logging.info("workers: {}".format(workers))
for num in prod:
for i, w in enumerate(workers):
num = w.send(num)
**next(w)**
cons.send(num)
for worker in workers:
worker.close()
cons.close()
if __name__ == '__main__':
order = input().strip()
m = int(input())
prod = producer(m)
cons = consumer()
next(cons)
root = rooter()
next(root)
accumulate = accumulator()
next(accumulate)
square = squarer()
next(square)
pipeline(prod, eval(order), cons)
As mentioned in PEP specification it says that a generator function's yield
is always None when resumed by a normal next call. So when explicitly made to yield, it'll be ready to handle the next input immediately in this case.
I've drafted the below code for the captioned question, but the return result is always 0. Could anyone please help me figure out what's the problem here?
Thanks a lot!
import random
dice_sides = 6
frequency_list = []
def roll_dice(times):
results = []
for roll_num in range(times):
result = random.randint(1,dice_sides)
results.append(result)
for i in range(dice_sides):
if results.count(i) != 2:
frequency = 0
break
else:
frequency = 1
return frequency
def occurrence(N,times):
for j in range(N):
frequency_list.append(roll_dice(times))
prob = frequency_list.count(1)
return prob
print(occurrence(10000,12))
You can try something like this
Code
import random
from collections import Counter
def roll_dice(n_sides, times):
if n_sides % times:
return 0
results = []
for roll_num in range(times):
result = random.randint(1, n_sides)
results.append(result)
# I'm using set here and will check its length,
# Counter(results) returns a dict of items (item, count)
# and if every item has the same count it should have length 1.
# More generic statement not only for (2 in this case)
res_dict = set(Counter(results).values())
if len(res_dict) == 1:
return 1
return 0
def mean(ar):
return sum(ar)/len(ar)
def occurrence(N, n_sides, times):
frequency_list = []
for j in range(N):
frequency_list.append(roll_dice(n_sides, times))
prob = mean(frequency_list)
return prob
if __name__ == '__main__':
N = 100000 # I intentionally made it 100k
n_sides = 6
times = 12
res_prob = occurrence(N, times)
print(res_prob)
Output
0.00604
[Finished in 3.6s]
I'm trying to implement multithreading and multiprocessing on a sorting algorithm. The way implemented it is by:
Initialize a list of 10k items
Assign a random variable between 0-100 for each element
arr = [1] * 10000
for x, i in enumerate(arr):
arr[x] = random.randint(0, 100)
t_arr = arr
m_arr = arr
s_arr = arr
Create 2 sub-lists -- one for values lower than or equal to 50 and one for the rest
I then used bubble sort for both sub-lists in parallel. One using threads and one using processes.
Theoretically both should be faster, but only multiprocessing is. Multithreading produces the same results.
I have already tried different sorting algos, problem is persistent.
# Threading Version
start_time = time.time()
subarr1 = []
subarr2 = []
# Split Array into 2
for i in t_arr:
if i <= 50:
subarr1.append(i)
else:
subarr2.append(i)
# Sort first array
t1 = threading.Thread(target=bubbleSort, args=(subarr1,))
# Sort second array
t2 = threading.Thread(target=bubbleSort, args=(subarr2,))
t1.start()
t2.start()
t1.join()
t2.join()
end_time = time.time() - start_time
print("--- %s seconds ---" % (end_time))
# Serial Version
start_time = time.time()
subarr1 = []
subarr2 = []
# Split Array into 2
for i in s_arr:
if i <= 50:
subarr1.append(i)
else:
subarr2.append(i)
# Sort first array
bubbleSort(subarr1)
# Sort second array
bubbleSort(subarr2)
end_time = time.time() - start_time
print("--- %s seconds ---" % (end_time))
# Multiprocessing Version
start_time = time.time()
subarr1 = []
subarr2 = []
# Split Array into 2
for i in s_arr:
if i <= 50:
subarr1.append(i)
else:
subarr2.append(i)
# Sort first array
p1 = multiprocessing.Process(target=bubbleSort, args=(subarr1,))
# Sort second array
p2 = multiprocessing.Process(target=bubbleSort, args=(subarr2,))
p1.start()
p2.start()
p1.join()
p2.join()
end_time = time.time() - start_time
print("--- %s seconds ---" % (end_time))
Multithreading: around 6 seconds
Serial: around 6 seconds (similar to Threading)
Multprocessing: around 3 seconds
These results are consistent. Any advice?
I am trying to implement bitonic with the python multiprocessing library and a shared resource array that will be sorted at the end of the program.
The problem I am running into is that when I run the program, I get an prompt that asks "Your program is still running! Are you sure you want to cancel it?" and then when I click cancel N - 1 times (where N is the amount of processes I am trying to spawn) then it just hangs.
When this is run from the command line, it just outputs the unsorted array. Of course, I expect it to be sorted at the program's finish.
I've been using this resource to try and get a firm grasp on how I can mitigate my errors but I haven't had any luck, and now I am here.
ANY help would be appreciated, as I really don't have anywhere else to turn to.
I wrote this using Python 3.6 and here is the program in its entirety:
from multiprocessing import Process, Array
import sys
from random import randint
# remember to move this to separate file
def createInputFile(n):
input_file = open("input.txt","w+")
input_file.write(str(n)+ "\n")
for i in range(n):
input_file.write(str(randint(0, 1000000)) + "\n")
def main():
# createInputFile(1024) # uncomment this to create 'input.txt'
fp = open("input.txt","r") # remember to read from sys.argv
length = int(fp.readline()) # guaranteed to be power of 2 by instructor
arr = Array('i', range(length))
nums = fp.read().split()
for i in range(len(nums)):
arr[i]= int(nums[i]) # overwrite shared resource values
num_processes = 8 # remember to read from sys.argv
process_dict = dict()
change_in_bounds = len(arr)//num_processes
low_b = 0 # lower bound
upp_b = change_in_bounds # upper bound
for i in range(num_processes):
print("Process num: " + str(i)) # are all processes being generated?
process_dict[i] = Process(target=bitonic_sort, args=(True, arr[low_b:upp_b]) )
process_dict[i].start()
low_b += change_in_bounds
upp_b += change_in_bounds
for i in range(num_processes):
process_arr[i].join()
print(arr[:]) # Print our sorted array (hopefully)
def bitonic_sort(up, x):
if len(x) <= 1:
return x
else:
first = bitonic_sort(True, x[:len(x) // 2])
second = bitonic_sort(False, x[len(x) // 2:])
return bitonic_merge(up, first + second)
def bitonic_merge(up, x):
# assume input x is bitonic, and sorted list is returned
if len(x) == 1:
return x
else:
bitonic_compare(up, x)
first = bitonic_merge(up, x[:len(x) // 2])
second = bitonic_merge(up, x[len(x) // 2:])
return first + second
def bitonic_compare(up, x):
dist = len(x) // 2
for i in range(dist):
if (x[i] > x[i + dist]) == up:
x[i], x[i + dist] = x[i + dist], x[i] #swap
main()
I won't go into all the syntax errors in your code since I am sure your IDE tells you about those. The problem that you have is that you are missing an if name==main. I changed your def main() to def sort() and wrote this:
if __name__ == '__main__':
sort()
And it worked (after solving all the syntax errors)
At the moment I'm trying to handle results from a calculation which come in very fast. At first I inserted each simulation result into an sqlite database but it turned out to be the bottleneck of the entire calculation. So I ended up using cursor.executemany instead of cursor.execute which is much faster.
My problem is now that I'm somehow not able to implement a thread safe counter.
The executemany task should be run every 1000 calculations. Therefore I implemented an initializer with a multiprocessing.Value I also tried this solution (http://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing) but somehow some values of the counter are duplicates which ends up in running the executemany task to often or not at all.
If anybody has an idea how to solve this issue I'd really appreciate it.
Here's a minimum sample:
import multiprocessing, sqlite3
from multiprocessing import Value, Lock
from itertools import repeat
def worker(Testvalues, TotalValues):
MP_counter.value += 1
counter.increment()
con = sqlite3.connect("Test.db", timeout=30.0)
cur = con.cursor()
# Minimum sample:
Helper = list(range(5))
Helper = [x * Testvalues for x in Helper]
GList.append(Helper)
Execute_Every = 10
print("Counter class: %d" % (counter.value()))
print("MP_counter: %d" % (MP_counter.value))
if counter.value() % Execute_Every == 0 or counter.value() == TotalValues - 1:
print("Execute query")
print("Counter class: %d" % (counter.value()))
print("MP_counter: %d" % (MP_counter.value))
Helper = [tuple(row) for row in GList[:Execute_Every]]
del GList[:Execute_Every]
cur.executemany(
"INSERT INTO Test (One, Two, Three, Four, Five) VALUES (?, ?, ?, ?, ?);", Helper)
con.commit()
con.close()
def setup(t, g, c):
global MP_counter
global GList
global counter
MP_counter = t
GList = g
counter = c
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
def value(self):
with self.lock:
return self.val.value
if __name__ == '__main__':
m = multiprocessing.Manager()
CPUS = multiprocessing.cpu_count()
MP_counter = multiprocessing.Value('i', 0)
GList = m.list([])
thread_safe_counter = Counter(0)
l = multiprocessing.Lock()
WORKERS = multiprocessing.Pool(initializer=setup, initargs=[MP_counter, GList, thread_safe_counter],processes=CPUS)
con = sqlite3.connect("Test.db", timeout=30.0)
cur = con.cursor()
cur.execute('PRAGMA journal_mode=wal')
SQLCommand = "CREATE TABLE IF NOT EXISTS Test (One INT, Two INT, Three INT, Four INT, Five INT);"
cur.execute(SQLCommand)
con.close()
TotalValues = 100
Testvalues = list(range(TotalValues))
WORKERS.starmap(worker, zip(Testvalues, repeat(TotalValues)))
WORKERS.close()
WORKERS.join()
#Check if list is empty
print(GList)
Thank you guys :)
Your counter has an increment() and a value() method, which need to be called separately, so to make this safe you'd have to call both operations while holding the lock. Your increment() method should return the new value after incrementing it, and you should use that without further calls to value(), e.g:
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
return self.val.value
...
def worker(Testvalues, TotalValues):
counter_value = counter.increment()
# use only counter_value from here on
...
Also, a Value is already created with a default RLock, which can be overridden in the constructor call with a different lock type if needed. So you don't really need to allocate your own lock, you could just use:
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
# or Value('i', initval, lock=Lock())
def increment(self):
with self.val.get_lock():
self.val.value += 1
return self.val.value