Summing numbers using Threads - multithreading

The code gives me an error..what iam i doing wrong here
#!/usr/bin/env python3
from datetime import datetime
MAX_NUM = 10000
NUMPROCS = 1
# THREADING
from multiprocessing import Process, Queue
start_time = datetime.now()
def sum_range(start,stop,out_q):
i = start
counter = 0
while i < stop:
counter += i
i += 1
out_q.put(counter)
mysums = Queue()
mybounds = [1+i for i in range(0,MAX_NUM+1,int(MAX_NUM/NUMPROCS))]
myprocs = []
for i in range(NUMPROCS):
p = Process(target=sum_range, args=(mybounds[i],mybounds[i+1],mysums))
p.start()
myprocs.append(p)
mytotal = 0
for i in range(NUMPROCS):
mytotal += mysums.get()
for i in range(NUMPROCS):
myprocs[i].join()
print(mytotal)
end_time =datetime.now()
print('Time taken : {}'. format(end_time-start_time))
was trying to make make a thread code that sums numbers.it gives me the error below
if name == 'main':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.

Related

Co-routine returns None for every alternate iteration

I have a piece of code as shown below:
#!/bin/python3
import math
import os
import random
import re
import sys
import logging
def consumer():
while True:
x = yield
print(x)
def producer(n):
for _ in range(n):
x = int(input())
yield x
def rooter():
logging.info("Running the rooter")
while True:
value = (yield)
yield math.sqrt(value)
def squarer():
logging.info("Running the squarer")
while True:
value = (yield)
print("from squarer: {}".format(value))
yield value * value
def accumulator():
logging.info("Running the accumulator.")
running_total = 0
while True:
value = (yield)
running_total += value
yield running_total
def pipeline(prod, workers, cons):
logging.info("workers: {}".format(workers))
for num in prod:
for i, w in enumerate(workers):
num = w.send(num)
cons.send(num)
for worker in workers:
worker.close()
cons.close()
if __name__ == '__main__':
order = input().strip()
m = int(input())
prod = producer(m)
cons = consumer()
next(cons)
root = rooter()
next(root)
accumulate = accumulator()
next(accumulate)
square = squarer()
next(square)
pipeline(prod, eval(order), cons)
Sample input
[square, accumulate]
3 <- Number of inputs coming further
1 <- actual inputs
2
3
Sample Output
*The output should be as below:*
1
5
14
but comes to
10(sum of the squares of 1 and 3) when it should actually be 14 (sum of the squares of 1, 2, 3)
So essentially the input 2 is missed (It's second in the line of inputs).
On debugging further I found that this is the case for every alternate iteration, not just for the provided inputs here.
I am not able to decipher what's happening. If it's of any help, the co-routine squarer is the one returning None in the second iteration.
I'd appreciate any help.
I found a solution to this.
It's that we prime the co-routine after use in the pipeline function so the code becomes as follows: I have marked the next(w) line within asterix for reference.
#!/bin/python3
import math
import os
import random
import re
import sys
import logging
def consumer():
while True:
x = yield
print(x)
def producer(n):
for _ in range(n):
x = int(input())
yield x
def rooter():
logging.info("Running the rooter")
while True:
value = (yield)
yield math.sqrt(value)
def squarer():
logging.info("Running the squarer")
while True:
value = (yield)
print("from squarer: {}".format(value))
yield value * value
def accumulator():
logging.info("Running the accumulator.")
running_total = 0
while True:
value = (yield)
running_total += value
yield running_total
def pipeline(prod, workers, cons):
logging.info("workers: {}".format(workers))
for num in prod:
for i, w in enumerate(workers):
num = w.send(num)
**next(w)**
cons.send(num)
for worker in workers:
worker.close()
cons.close()
if __name__ == '__main__':
order = input().strip()
m = int(input())
prod = producer(m)
cons = consumer()
next(cons)
root = rooter()
next(root)
accumulate = accumulator()
next(accumulate)
square = squarer()
next(square)
pipeline(prod, eval(order), cons)
As mentioned in PEP specification it says that a generator function's yield
is always None when resumed by a normal next call. So when explicitly made to yield, it'll be ready to handle the next input immediately in this case.

Python: Roll a dice for 12 times, calculate the probability if each number equally shows up twice

I've drafted the below code for the captioned question, but the return result is always 0. Could anyone please help me figure out what's the problem here?
Thanks a lot!
import random
dice_sides = 6
frequency_list = []
def roll_dice(times):
results = []
for roll_num in range(times):
result = random.randint(1,dice_sides)
results.append(result)
for i in range(dice_sides):
if results.count(i) != 2:
frequency = 0
break
else:
frequency = 1
return frequency
def occurrence(N,times):
for j in range(N):
frequency_list.append(roll_dice(times))
prob = frequency_list.count(1)
return prob
print(occurrence(10000,12))
You can try something like this
Code
import random
from collections import Counter
def roll_dice(n_sides, times):
if n_sides % times:
return 0
results = []
for roll_num in range(times):
result = random.randint(1, n_sides)
results.append(result)
# I'm using set here and will check its length,
# Counter(results) returns a dict of items (item, count)
# and if every item has the same count it should have length 1.
# More generic statement not only for (2 in this case)
res_dict = set(Counter(results).values())
if len(res_dict) == 1:
return 1
return 0
def mean(ar):
return sum(ar)/len(ar)
def occurrence(N, n_sides, times):
frequency_list = []
for j in range(N):
frequency_list.append(roll_dice(n_sides, times))
prob = mean(frequency_list)
return prob
if __name__ == '__main__':
N = 100000 # I intentionally made it 100k
n_sides = 6
times = 12
res_prob = occurrence(N, times)
print(res_prob)
Output
0.00604
[Finished in 3.6s]

Python Multiprocessing module did not work properly

I'm writing right now on a code which counts all Primenumbers up to the highest integer. The problem is that it would take way too long (about 35 days). So the solution was in our opinion the multiprocessing module, to count all Primenumbers in 4 seperated threads (process 1: starts with 1 steps +8, process 2: starts with 3 steps +8, process 3: starts with 5 steps +8, process 4: starts with 7 steps +8). That would test every odd number if it is a Primenumber.
But the following code don't work and we cant find the Problem.
Maybe one of you can help me?
from multiprocessing import Process
import time
import math
timeStart = 0
timeEnd = 0
def istTeilbar (x, y):
if x%y == 0:
return True
return False
def istPrimzahl (x):
for i in range(2, int(math.sqrt(x))+1):
if istTeilbar(x, i):
return False
return True
def schreibePrimzahlBis1 (x):
counter1 = 0
print ("x")
for i in range(1, x, 8):
if istPrimzahl(i):
counter1 = counter1 +1
print (f'Das waren {counter1} Primzahlen\n')
def schreibePrimzahlBis2 (x):
counter2 = 0
for i in range(3, x, 8):
if istPrimzahl(i):
counter2 = counter2 +1
print (f'Das waren {counter2} Primzahlen\n')
def schreibePrimzahlBis3 (x):
counter3 = 0
for i in range(5, x, 8):
if istPrimzahl(i):
counter3 = counter3 +1
print (f'Das waren {counter3} Primzahlen\n')
def schreibePrimzahlBis4 (x):
counter4 = 0
print ('counter4')
for i in range(7, x, 8):
if istPrimzahl(i):
counter4 = counter4 +1
print (f'Das waren {counter4} Primzahlen\n')
grenze = input("Die letzte zu testende Zahl:\n")
timeStart = time.time()
p1 = Process(target=schreibePrimzahlBis1, args=[int(grenze)])
p2 = Process(target=schreibePrimzahlBis2, args=[int(grenze)])
p3 = Process(target=schreibePrimzahlBis3, args=[int(grenze)])
p4 = Process(target=schreibePrimzahlBis4, args=[int(grenze)])
p1.start()
p2.start()
p3.start()
p4.start()
p1.join()
p2.join()
p3.join()
p4.join()
timeEnd = time.time()
print (f'Das hat ca. {timeEnd-timeStart} sekunden gedauert.')
The original code which would take way to long if you are intersted in:
import time
import math
timeStart = 0
timeEnd = 0
def istTeilbar (x, y):
if x%y == 0:
return True
return False
def istPrimzahl (x):
for i in range(2, int(math.sqrt(x))+1):
if istTeilbar(x, i):
return False
return True
def schreibePrimzahlBis (x):
counter = 0
for i in range(2, x):
if istPrimzahl(i):
if counter == 10000:
print (10000)
elif counter == 100000:
print (100000)
elif counter == 1000000:
print (1000000)
elif counter == 10000000:
print (10000000)
elif counter == 100000000:
print (100000000)
counter = counter +1
print ('Das waren')
print (counter)
print ('Primzahlen.\n')
grenze = input("Die letzte zu testende Zahl:\n")
timeStart = time.time()
schreibePrimzahlBis(int(grenze))
timeEnd = time.time()
print ('Das hat ca.')
print (timeEnd-timeStart)
print ('sekunden gedauert')
ยดยดยด
One way to parallelize tasks is to use concurrent.futures. I personally like this solution. Maybe you want to try this :
import concurrent.futures
function_A(arg1, arg2):
...
return val_A
function_B(arg1, arg2, arg3):
...
return val_B
if __name__ == '__main__':
arg1, arg2, arg3 = ...
with concurrent.futures.ThreadPoolExecutor() as executor:
future_A = executor.submit(function_A, arg1, arg2)
future_B = executor.submit(function_B, arg1, arg2, arg3)
print(future_A.result())
print(future_B.result())

Multiprocessing.apply on Python 3

I have a function that is looping using values from a dictionary. I want to split my dict keys, so i can break my dict at parts equal to my cpus. My fucntion is:
def find_something2(new2, threl=2.0, my_limit=150, far=365):
""" Find stocks tha are worth buying"""
global current_date, total_money, min_date, current_name, dates_dict, mylist, min_date_sell, reduced_stocks
worthing = list()
for stock in new2:
frame = reduced_stocks[stock]
temp = frame.loc[current_date:end_date]
if not temp.empty:
mydate = temp.head(far).Low.idxmin()
if mydate <= min_date_sell:
my_min = temp.head(far).Low.min()
if total_money >= my_min > 0: # find the min date at four months
ans, res, when_sell, total, income = worth_buy(stock, frame, mydate, 'Low',
thres=threl, sell_limit=my_limit)
if ans:
if income > 3 * 10 ** 6:
worthing.append([mydate, stock, res, when_sell, total, income])
if current_date > '1990-01-01':
return sorted(worthing, key=itemgetter(0))
elif current_date > '1985-01-01':
return sorted(worthing, key=itemgetter(0))
else:
answer = sorted(worthing, key=itemgetter(5), reverse=True)
return answer[::11]
so what i have tried is:
import multiprocessing as mp
result_list = []
def log_result(result):
# This is called whenever foo_pool(i) returns a result.
# result_list is modified only by the main process, not the pool workers.
global result_list
result_list.append(result)
def apply_async_with_callback():
global reduced_stocks
temp = list(reduced_stocks.keys())
temp1 = temp[0:1991]
temp2 = temp[1991:]
temp = [temp1, temp2]
pool = mp.Pool(2)
for i in temp:
pool.apply_async(find_something2, args=(i, 1.1, 2200, 1,), callback=log_result)
pool.close()
pool.join()
print(result_list)
if __name__ == '__main__':
apply_async_with_callback()
is this the right way?
I also tried threads but cpu goes max at 15% althoug iam using 12 threads(i have 6 intel core)
def pare():
relist = list(reduced_stocks.keys())
sublist = [relist[x:x+332] for x in range(0, len(relist), 332)]
data = [x for x in sublist]
threads = list()
from threading import Thread
for i in range(12):
process = Thread(target=find_something2, args=(1.4,2500,8,data[i],i,results))
process.start()
threads.append(process)
for process in threads:
process.join()
One way to do multiprocessing is to create a Pool and pass the prepared data to it. Wait for computation done and process the results. The code suggests how to do that.
# setup the function so it gets everything from arguments
def find_something2(new2, threl, my_limit, far, current_date, total_money, min_date_sell, reduced_stocks, end_date):
# ....
pass
# prepare the data
# replace the a1, a2 ... with the actual parameters your function takes
data = [(a1, a2, a3, ...) for your_data in your_dict]
import multiprocessing as mp
with mp.Pool() as pool:
results = pool.starmap(find_something2, data)
print(results)

Time issue with ZMQ Recv and recv_string

this code I use below is a ZMQ sub to a publisher that is giving me data. It uses the counter to tell me when its 30 and 59 seconds to run my write to CSV every 30 seconds or so.
Problem: I am now timing all of the processes in my thread. the lines where message and message2 = socket.recv_string() is taking anywhere from half a second to 20 seconds to receive string. Thus causing the thread to miss the 30 and 59 second intervals I set. This was not happening yesterday. The other timers for the if statements are taking .00001 or 0.0 seconds. So that part isnt the problem
Im wondering what could effect this. Could it be the processing power of my computer? Or is the receive string based on how long it waits for the publisher to actually send something?
I'm not running in a dev or production environment and its on a shared server with something like 15 other people and its virtual. A zero client. I've never had this problem before and on another script i have set up for another ZMQ pub/sub I'm receiving messages in .01 or .001 seconds all the way to 3 seconds. Which is more manageable but the norm was .01.
Any tips or help would be amazing. Thanks in advance
import zmq
import pandas as pd
import time
import threading
df_fills = pd.DataFrame()
df_signal = pd.DataFrame()
second_v = [30,59]
s = 0
m = 0
h = 0
d = 0
def counter():
global h,s,m,d
while True:
s += 1
#print("Second:{}".format(s))
if s >=60:
m +=1
s = 0
if m >= 60:
h += 1
m = 0
if h >= 24:
d += 1
h = 0
#print(s)
time.sleep(1)
class zmq_thread(threading.Thread):
def __init__(self,name):
threading.Thread.__init__(self)
self.name = name
def run(self):
global df_fills, second_v,s
print('zmq started')
context = zmq.Context()
socket = context.socket(zmq.SUB)
socket.connect(SERVER)
socket.setsockopt_string(zmq.SUBSCRIBE,'F')
print('socket connected')
tickers = [a bunch of tickers]
while True:
try:
start2 = time.time()
if s == 30:
print('break')
if df_fills.empty == True:
print('running fill thread again')
z = zmq_thread('Start_ZMQ')
#time.sleep(.7)
z.run()
else:
start = time.time()
print('writing fills')
filename = "a CSV"
with open(filename, 'a') as f:
df_fills.to_csv(f, encoding = 'utf-8', index = False, header = False)
f.close()
print('wrote fills')
end = time.time()
print(end-start)
df_fills = df_fills.iloc[0:0]
z = zmq_thread('Start_ZMQ')
z.run()
return df_fills
end2 = time.time()
print(end2-start2)
start3 = time.time()
message = socket.recv_string()
message2 = socket.recv_string()
end3 = time.time()
print(end3-start3, 'message timing')
print(s)
start1 = time.time()
if message == 'F':
# message2_split = message2.split("'")
message2_split = message2.split(";")
message3_split = [e[3:] for e in message2_split]
message4 = pd.Series(message3_split)
if message4[0] in tickers:
df_fills = df_fills.append(message4, ignore_index=True)
print('fill')
end1 = time.time()
print(end1-start1)
except KeyboardInterrupt:
break
counter = threading.Thread(target = counter)
zmq_loop = zmq_thread('Start_ZMQ')
#%%
counter.start()
zmq_loop.start()
I didn't realize that ZMQ typical recv_string is by default blocking. So I did this
message = socket.recv_string(flags = zmq.NOBLOCK)
message2 = socket.recv_string(flags = zmq.NOBLOCK)
except zmq.ZMQError as e:
if e.errno == zmq.EAGAIN:
pass
else:
if message == 'ABA_BB':
message2_split = message2.split(";")
message3_split = [e[3:] for e in message2_split]
message4 = pd.Series(message3_split)
#print(message4)
if message4[2] == '300':
df_signal = df_signal.append(message4, ignore_index=True)
print('Signal Appended')

Resources