Thread safe counter using python's multiprocessing starmap - python-3.x

At the moment I'm trying to handle results from a calculation which come in very fast. At first I inserted each simulation result into an sqlite database but it turned out to be the bottleneck of the entire calculation. So I ended up using cursor.executemany instead of cursor.execute which is much faster.
My problem is now that I'm somehow not able to implement a thread safe counter.
The executemany task should be run every 1000 calculations. Therefore I implemented an initializer with a multiprocessing.Value I also tried this solution (http://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing) but somehow some values of the counter are duplicates which ends up in running the executemany task to often or not at all.
If anybody has an idea how to solve this issue I'd really appreciate it.
Here's a minimum sample:
import multiprocessing, sqlite3
from multiprocessing import Value, Lock
from itertools import repeat
def worker(Testvalues, TotalValues):
MP_counter.value += 1
counter.increment()
con = sqlite3.connect("Test.db", timeout=30.0)
cur = con.cursor()
# Minimum sample:
Helper = list(range(5))
Helper = [x * Testvalues for x in Helper]
GList.append(Helper)
Execute_Every = 10
print("Counter class: %d" % (counter.value()))
print("MP_counter: %d" % (MP_counter.value))
if counter.value() % Execute_Every == 0 or counter.value() == TotalValues - 1:
print("Execute query")
print("Counter class: %d" % (counter.value()))
print("MP_counter: %d" % (MP_counter.value))
Helper = [tuple(row) for row in GList[:Execute_Every]]
del GList[:Execute_Every]
cur.executemany(
"INSERT INTO Test (One, Two, Three, Four, Five) VALUES (?, ?, ?, ?, ?);", Helper)
con.commit()
con.close()
def setup(t, g, c):
global MP_counter
global GList
global counter
MP_counter = t
GList = g
counter = c
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
def value(self):
with self.lock:
return self.val.value
if __name__ == '__main__':
m = multiprocessing.Manager()
CPUS = multiprocessing.cpu_count()
MP_counter = multiprocessing.Value('i', 0)
GList = m.list([])
thread_safe_counter = Counter(0)
l = multiprocessing.Lock()
WORKERS = multiprocessing.Pool(initializer=setup, initargs=[MP_counter, GList, thread_safe_counter],processes=CPUS)
con = sqlite3.connect("Test.db", timeout=30.0)
cur = con.cursor()
cur.execute('PRAGMA journal_mode=wal')
SQLCommand = "CREATE TABLE IF NOT EXISTS Test (One INT, Two INT, Three INT, Four INT, Five INT);"
cur.execute(SQLCommand)
con.close()
TotalValues = 100
Testvalues = list(range(TotalValues))
WORKERS.starmap(worker, zip(Testvalues, repeat(TotalValues)))
WORKERS.close()
WORKERS.join()
#Check if list is empty
print(GList)
Thank you guys :)

Your counter has an increment() and a value() method, which need to be called separately, so to make this safe you'd have to call both operations while holding the lock. Your increment() method should return the new value after incrementing it, and you should use that without further calls to value(), e.g:
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
self.lock = Lock()
def increment(self):
with self.lock:
self.val.value += 1
return self.val.value
...
def worker(Testvalues, TotalValues):
counter_value = counter.increment()
# use only counter_value from here on
...
Also, a Value is already created with a default RLock, which can be overridden in the constructor call with a different lock type if needed. So you don't really need to allocate your own lock, you could just use:
class Counter(object):
def __init__(self, initval=0):
self.val = Value('i', initval)
# or Value('i', initval, lock=Lock())
def increment(self):
with self.val.get_lock():
self.val.value += 1
return self.val.value

Related

Property method in python not returning correct value

I am trying to solve the question in which I am asked to use property method to count the number of times the circles are created . Below is the code for the same.
import os
import sys
#Add Circle class implementation below
class Circle:
counter = 0
def __init__(self,radius):
self.radius = radius
Circle.counter = Circle.counter + 1
def area(self):
return self.radius*self.radius*3.14
def counters():
print(Circle.counter)
no_of_circles = property(counter)
if __name__ == "__main__":
res_lst = list()
lst = list(map(lambda x: float(x.strip()), input().split(',')))
for radius in lst:
res_lst.append(Circle(radius).area())
print(str(res_lst), str(Circle.no_of_circles))
The above code gives correct output for the area but counter should be 3 and instead I am getting below output . Below is the output for input = 1,2,3
[3.14, 12.56, 28.26] <property object at 0x0000024AB3234D60>
I have tried everything but no luck. In the main section of the code no_of_circles is called as Circle.no_of_circles which suggests me that it will use property method of python. But the output is wrong. Please help me find where I am going wrong.
Here is a simple working example using the property function.
Note: It is always a good practice to make an instance of a class(once) and then uses the instance all over your code. Also better to use self.counter instead of Cirlcle.counter. self is Circle.
#Add Circle class implementation below
class Circle:
def __init__(self,value=0):
self._counter = value
def area(self, radius):
return radius*radius*3.14
def add_counter(self, value):
print('add counter')
self._counter += 1
def get_counter(self):
print('get counter')
return self._counter
no_of_circles = property(get_counter, add_counter)
if __name__ == "__main__":
circle = Circle()
area = []
for idx in range(5):
area.append(circle.area(idx))
circle.add_counter(1)
print("number of calls: ", circle.no_of_circles)
print('area:', area)
output(note how get and add counter are called):
add counter
add counter
add counter
add counter
add counter
get counter
number of calls: 5
area: [0.0, 3.14, 12.56, 28.26, 50.24]
str(Circle.no_of_circles) here you're calling the property of a class not an instance of that class i.e a Circle object; the following will work :-
class Circle:
counter = 0
def __init__(self,radius):
self.radius = radius
Circle.counter = Circle.counter + 1
def area(self):
return self.radius*self.radius*3.14
def counters():
print(Circle.counter)
no_of_circles = property(counter)
if __name__ == "__main__":
lst = list(map(lambda x: float(x.strip()), input("Enter radius : ").split(',')))
cir_lst = [Circle(_) for _ in lst]
res_lst = [__.area() for __ in cir_lst]
print(res_lst, cir_lst[-1].counter)

Multiprocessing.apply on Python 3

I have a function that is looping using values from a dictionary. I want to split my dict keys, so i can break my dict at parts equal to my cpus. My fucntion is:
def find_something2(new2, threl=2.0, my_limit=150, far=365):
""" Find stocks tha are worth buying"""
global current_date, total_money, min_date, current_name, dates_dict, mylist, min_date_sell, reduced_stocks
worthing = list()
for stock in new2:
frame = reduced_stocks[stock]
temp = frame.loc[current_date:end_date]
if not temp.empty:
mydate = temp.head(far).Low.idxmin()
if mydate <= min_date_sell:
my_min = temp.head(far).Low.min()
if total_money >= my_min > 0: # find the min date at four months
ans, res, when_sell, total, income = worth_buy(stock, frame, mydate, 'Low',
thres=threl, sell_limit=my_limit)
if ans:
if income > 3 * 10 ** 6:
worthing.append([mydate, stock, res, when_sell, total, income])
if current_date > '1990-01-01':
return sorted(worthing, key=itemgetter(0))
elif current_date > '1985-01-01':
return sorted(worthing, key=itemgetter(0))
else:
answer = sorted(worthing, key=itemgetter(5), reverse=True)
return answer[::11]
so what i have tried is:
import multiprocessing as mp
result_list = []
def log_result(result):
# This is called whenever foo_pool(i) returns a result.
# result_list is modified only by the main process, not the pool workers.
global result_list
result_list.append(result)
def apply_async_with_callback():
global reduced_stocks
temp = list(reduced_stocks.keys())
temp1 = temp[0:1991]
temp2 = temp[1991:]
temp = [temp1, temp2]
pool = mp.Pool(2)
for i in temp:
pool.apply_async(find_something2, args=(i, 1.1, 2200, 1,), callback=log_result)
pool.close()
pool.join()
print(result_list)
if __name__ == '__main__':
apply_async_with_callback()
is this the right way?
I also tried threads but cpu goes max at 15% althoug iam using 12 threads(i have 6 intel core)
def pare():
relist = list(reduced_stocks.keys())
sublist = [relist[x:x+332] for x in range(0, len(relist), 332)]
data = [x for x in sublist]
threads = list()
from threading import Thread
for i in range(12):
process = Thread(target=find_something2, args=(1.4,2500,8,data[i],i,results))
process.start()
threads.append(process)
for process in threads:
process.join()
One way to do multiprocessing is to create a Pool and pass the prepared data to it. Wait for computation done and process the results. The code suggests how to do that.
# setup the function so it gets everything from arguments
def find_something2(new2, threl, my_limit, far, current_date, total_money, min_date_sell, reduced_stocks, end_date):
# ....
pass
# prepare the data
# replace the a1, a2 ... with the actual parameters your function takes
data = [(a1, a2, a3, ...) for your_data in your_dict]
import multiprocessing as mp
with mp.Pool() as pool:
results = pool.starmap(find_something2, data)
print(results)

Send message when failed(Python)

Im trying to simulate two machines working, and failing at random times. When they fail they call assistance. These two machines is part of bigger system of different machines, which needs to know when its neighbor has failed to do its job.
So far, I have made the simulate of the two machines, but I cant figure out how to send messages to their neighbors without each machine needing to know the whole system?
This is what I have so far:
import simpy
import random
random_seed=42
MTTF = 3500
break_mean = 1 / MTTF
sim_time = 4 * 7*24*60 # 4 weeks 24/7
num_machines = 2
rep_time = 30
tpp = 20 #20 minutes to make each part
neighbour = 3 #How many should it send to?
#Creating a class called messaging which is an object.
class messaging(object):
#DEfing the initilizing function, and initilize self, Environment, and capacity which is set to infinity, by a simpy core-function.
def __init__(self, env, capacity=simpy.core.Infinity):
self.env = env
self.capacity = capacity
self.pipes = []
#Making a function which work on everything that is a part of the message. With name Put.
def put(self, value):
if not self.pipes: #How to get this error?
raise runtime_error('There are no output pipes.')
#Create a variable, events, store to it pipe values
events = broken_machine()
return self.env.all_of(events)
def get_output_conn(self):
#Set the capacity of pipe variable to store infinity.
pipe = simpy.Store(self.env, capacity=self.capacity)
#to each pipes, add(or append) pipe
self.pipes.append(pipe)
return pipe
def mesg_generator(number, env, out_pipe):
msg = ('Failed')
def message_reciever(name, env, in_pipe):
while True:
msg = yield in_pipe.get()
print("%s received message: %s" % (number, msg[1]))
def time_per_part():
return tpp
def ttf():
return random.expovariate(break_mean)
class Machine(object):
def __init__(self, env, number, repair):
#self.arg = arg
self.env = env
self.number = number
self.parts_made = 0
self.times_broken = 0
self.broken = False
self.process = env.process(self.working(repair))
env.process(self.broken_machine())
def working(self, repair):
while True:
work = time_per_part()
while work:
try:
begin = self.env.now
yield self.env.timeout(work)
work = 0
except simpy.Interrupt:
self.broken = True
work -= self.env.now - begin
with repair.request(priority = 1) as req:
yield req
yield self.env.timeout(rep_time)
self.times_broken +=1
yield message_reciever()
#print('Machine down')
self.broken = False #Machine fixed again
self.parts_made +=1
def broken_machine(self):
while True:
yield self.env.timeout(ttf())
if not self.broken:
self.process.interrupt()
def other_jobs(env, repair):
while True:
work = tpp
while work:
with repair.request(priority=2) as req:
yield req
try:
begin = env.now
yield env.timeout(work)
work = 0
except simpy.Interrupt:
work -= env.now - begin
print("This simulates machines 3 and 4 doing the same tasks.")
random.seed(random_seed)
env = simpy.Environment()
pipe = simpy.Store(env)
bc_pipe = messaging(env)
repair = simpy.PreemptiveResource(env, capacity = 1)
machines = [Machine(env, 'Machine %d' % i, repair)
for i in range(num_machines)]
env.process(other_jobs(env, repair))
env.run(until=sim_time)
#Show how many times each machine failed:
for machine in machines:
print("%s broke down %d times" %(machine.number, machine.times_broken))

how to get the updated size of dynamic array in python

How should I edit my code?
import ctypes
import sys
class DynamicArray(object):
def __init__(self):
self.n = 0 #counter
self.capacity = 1
self.A = self.make_array(self.capacity)
def __len__(self):
'''Returns the number of elements'''
b = sys.getsizeof(self.A)
print(b)
return self.n
def __getitem__(self,k):
'''Return elements in array with specified Index'''
if not 0<=k<self.n:
return IndexError('Index out of bounds')
return self.A[k]
def append(self,ele):
'''Add element in the array'''
if self.n==self.capacity:
self._resize(2*self.capacity)
self.A[self.n]=ele
self.n+=1
def _resize(self,new_cap):
B = self.make_array(new_cap)
for k in range(self.n):
B[k]=self.A[k]
self.A = B
self.capacity = new_cap
def make_array(self,new_cap):
'''Make a raw array using ctype module'''
return (new_cap*ctypes.py_object)()
arr = DynamicArray()
arr.append(1)
len(arr)
Output:
80
1
arr.append(1)
len(arr)
Output:
80
2
see https://docs.python.org/3/library/sys.html#sys.getsizeof
from doc:
Return the size of an object in bytes. The object can be any type of
object. All built-in objects will return correct results, but this
does not have to hold true for third-party extensions as it is
implementation specific.
Only the memory consumption directly attributed to the object is
accounted for, not the memory consumption of objects it refers to.
I hope it is clear.

How to use a variable inside a function which is defined (variable) in some other function within a class in python?

class Complex(object):
def __init__(self, real, imaginary):
self.real=real
self.imaginary=imaginary
def __add__(self, no):
self.real=self.real+no.real
self.imaginary=self.imaginary+no.imaginary
def __str__(self):
if self.imaginary == 0:
result = "%.2f+0.00i" % (self.real)
else:
result = "%.2f+%.2fi" % (self.real, self.imaginary)
I want to print the sum of two complex numbers inside add function in the format defined in str method without manipulating the str function.
Meaning how can I access the result variable defined in the str method from the add method.
If i correctly understand your question you want to print result of str method inside add. So i think this is the best decision:
class Complex(object):
def __init__(self, real, imaginary):
self.real = real
self.imaginary = imaginary
def __add__(self, no):
self.real = self.real + no.real
self.imaginary = self.imaginary + no.imaginary
print(str(self))
def __str__(self):
if self.imaginary == 0:
result = "%.2f+0.00i" % (self.real)
else:
result = "%.2f+%.2fi" % (self.real, self.imaginary)
return result
a = Complex(1, 2)
b = Complex(3, 4)
a + b

Resources