there I am a very beginner level in multiprocessing in python. I cant re-formulate my code into multi processing
def Customer_Merchant_value_pass(minicustomer_list):
begin=timer()
sum_val=0
list_avg_score=[]
list_category_val=[]
dict_list=[]
#Avg_score=0
with graphdriver.session()as session:
for i in itertools.islice(minicustomer_list,len(minicustomer_list)):
for key in list_of_unique_merchants:
print("Here at list_of_unique_merchants customer value is ",i)
print("BMCC_Code",key)
valuelist=list_of_unique_merchants[key]
#print("Uniquelistfor:",key,valuelist)
for j in valuelist:
#print("list len",len(valuelist))
#print("Here the iner of value list ",i)
#print("--------------------------------")
#print([i,j])
pref_attach_score=Prefer_Attachment_query2([i,j])
#print(pref_attach_score)
result=session.run(pref_attach_score)
for line in result:
#print(line["score"])
sum_val=sum_val+line["score"]
#Avg_score=sum_val/len(valuelist)
Totalsumval=sum_val
print("Totalsum",Totalsumval)
Avg_score=sum_val/len(valuelist)
print("Avg_score",Avg_score)
sum_val=0
list_avg_score.append(Avg_score)
list_category_val.append(key)
avg_score_list=list_avg_score
category_list=list_category_val
#print("sumval is now",sum_val)
#print(result)
max_dictionary =MaxValue_calc(i,category_list,avg_score_list)
#MaxValue_calc(i,category_list,avg_score_list)
print("max_dicitionary",max_dictionary)
dict_list.append(max_dictionary)
rowlist=dict_list
print(rowlist)
#dict_list=[]
list_avg_score=[]
list_category_val=[]
#print("rowlist", rowlist)
#print("list_category_val is now",list_category_val)
#print("for",i," category AVG scores is now ",category_list)
#print("list_avg_score is now",list_avg_score)
#print("for",i," category AVG scores is now ",avg_score_list)
session.close()
end=timer()
print("Total time :",(end-begin))
return rowlist
datalist=Customer_Merchant_value_pass()
def Prefer_Attachment_query2(listval):
customer_wallet=listval[0]
merchant_wallet=listval[1]
#print(x,y)
prefquery="""MATCH (p1:CUSTOMER {WALLETID: '%s'})
MATCH (p2:MERCHANT {WALLETID: '%s'})
RETURN gds.alpha.linkprediction.preferentialAttachment(p1, p2,{relationshipQuery: "PAYMENT"}) as score"""%(customer_wallet,merchant_wallet)
#print(prefquery)
return prefquery
from collections import Counter
def MaxValue_calc(customer_wallet,category_code,avg_score):
print("In MaxValue_calc")
#print(category_code)
#print(avg_score)
#score_dict={"score":avg_score}
#category_dict={"category":category_code}
#print(score_dict)
#print(category_dict)
#customer_wallet_list=[]
#customer_wallet_list.append(customer_wallet)
#customer_wallet=customer_wallet_list
#customer_wallet_list=[]
#print("category_code_list",category_code)
#print("average_score_list",avg_score)
#print(customer_wallet)
print(" For Customer",customer_wallet)
wallet_dictionary=dict(zip(category_code,avg_score))
print("Wallet dictionary",wallet_dictionary)
#top2score=heapq.nlargest(2, wallet_dictionary, key=wallet_dictionary.get)
#print(wallet_dictionary)
#print(wallet_dictionary)
content=Counter(wallet_dictionary)
topscore=content.most_common(3)
top2value_dict=dict(topscore)
print("Top values",top2value_dict)
top2value_dict.update({'WALLET_ID':customer_wallet})
#score
return top2value_dict
#print(top2value_dict)
#print(type(top2value_dict))
#print( mydataframe)
#print(content)
This code generates a list but takes a huge time in processing for larger values. I tried this code for parallelism
import concurrent.futures
with concurrent.futures.ProcessPoolExecutor() as executor:
f1=executor.submit(Customer_Merchant_value_pass,minicustomer_list)
datalist=f1.result()
This worked but still could not use full core of my CPU. but how could use proper parallism
Here i iterates over a list
key iterates over a dictionary of lists and extract value
j iterates over elements of extracted lists
so the workflow is Customer_Merchant_value function process list mini customer list, a dictionary of lists, and the element from extracted lists from dictionary. then for each element, it calls Prefer_Attachment_query2([i,j]. where i is the element from customer list and j is an element from merchant list. this function dynamically pushes query into neo4j. and return a score. After getting the score average value is calculated. Then it calls MaxValue_calc function for fetching largest 3 average score. so it takes customer list number, category list which is actually list of keys in dictionary and list of average_score list. and it returns largest key wise average_score. then it will return a data list
Related
I want to iterate randomly over some given items in my list
for example:
items = ['rock', 'paper', 'scissors']
for item in items:
print(random.shuffle(items))
I want in each iteration it splits out a random item from my list but everytime it gives 'None'
random.shuffle() shuffles the list in-place and it returns None.
You need to:
import random
items = ['rock', 'paper', 'scissors']
for item in random.sample(items,3): # get list of n=3 random shuffled values no dupes
print(item)
# or
random.shuffle(items) # shuffle in place (might get the same order as well)
for item in items:
print(item)
See differences between:
https://docs.python.org/3.8/library/random.html#random.shuffle (in place)
https://docs.python.org/3.8/library/random.html#random.sample (no dupes)
https://docs.python.org/3.8/library/random.html#random.choices (maybe dupes)
any help would be appreciated! I'm scraping multiple URLs and iterating over the URLs with a for loop. I'm putting relevant data into individual lists. however, I'm trying to organize my data in a list of lists to compare with other data... that I have't scraped yet. How do I iterate through the list of lists and put data into each element of the list? this doesn't seem that hard... don't know what I'm missing?
def get_info(item_urls)#, count): #count is being passed in, leaving this here for context
for item in item_urls:
#get data and stuff from current URL
data = ["beer", "is", "awesome!", "...", "for", "helping", "with", "my", "depression"]
count = len(data) # counting data for a number, that I should have just made up :)
table = [[] for i in range(0, count)]
for truth in data:
for i in range(0, count):
list('table[{}]'.format(i)).append(truth)
print(truth)
for thing in table[0]:
print(thing)
return "borked"
my fake logic:
for each element in data, append the element to table.
Once I iterate through all the URLs, I would like to return the entire built out table.
myList[i] iterates through a list. myList[i][j] iterates through elements in list of lists. j is the index for element in the inner list.
So I'm trying to help a friend out with a solution where people donate to a cause and randomly get selected. The problem is, the more you donate, the greater the chance you have of being selected.
Say I have a dictionary of people:
people = {
"Mike":0,
"Mark":4,
"Zach":2,
"Bryan":2,
"Eddie":1
"Erin":0,
}
Is there a way that a person can be randomly picked from this dictonary, but based on their value, give them a greater chance of winning?
random module has a choices method that takes weights (it returns a list of k=1 by default, so take the first item):
import random
print(random.choices(list(people.keys()), weights=people.values())[0])
(in case you wonder, as I did, if keys() and values() are in matching order, they are. See Python dictionary: are keys() and values() always the same order?)
See in action here: https://repl.it/repls/AmbitiousVioletMacrolanguage
import random
def flatlist(l): return [item for sublist in l for item in sublist]
biglist = flatlist([[d]*people[d] for d in people])
print('weighted people:', biglist)
winner = random.choice(biglist)
print('winner:', winner)
I have a list of suppliers eg.
suppliers=[] in which i have a n number of elements in somewhat following way eg [{"supplierId":"1","aCode":2},{"supplierId":"1","aCode":3}]
Now, I need to check based on the value of a property, lets say areaCode=2 and need to check if area code is in list of suppliers named as aCode. How can I detemine the area Code exists with minimum time and code complexity and by not using for loops as I will have a lot of data in suppliers array.
In your case because it is a list of dict it is hard to not use a loop. If you only want to see if it exists you can one line it such as:
print(any(areaCode==x['aCode'] for x in suppliers))
or if you want the entries you can one line it like this:
suppliers_in_area = [x for x in suppliers if x['aCode'] == areaCode]
Both versions require a for loop and both are equally fast but the first one requires minimal memory.
- Edit -
If you just one the first occurrence (or if only one element exists) then short circuit your for loop.
def get_supplier_by_area(area_code):
for supplier in suppliers:
if supplier['aCode'] == area_code:
return supplier
# It will return None if nothing is found
or you can use a generator if you want to return the next supplier every time to call the function.
def get_supplier_by_area(area_code):
for supplier in suppliers:
if supplier['aCode'] == area_code:
yield supplier
try:
gen = get_supplier_by_area('A01')
print(next(gen)) # will print the first result or will raise a StopIteration error
print(next(gen)) # will print the second result or will raise a StopIteration
except StopIteration:
print("No more results")
The inner items are dictionaries, they can be referenced by their key.
def main():
list1=[{"supplierId":"1","aCode":2},{"supplierId":"1","aCode":3}]
searchKey=2
for item in list1:
if item['aCode']==searchKey:
print(item)
if __name__== "__main__":
main()
How to sort the data that are stored in a global list after inserting them within a method; so that before they are stacked into another list in accordance to their inserted elements? Or is this a bad practice and complicate things in storing data inside of a global list instead of seperated ones within a method; and finally sorting them thereafter ?
Below is the example of the scenario
list= []
dictionary = {}
def MethodA(#returns title):
#searches for corresponding data using beautifulsoup
#adds data into dictionary
# list.append(dictionary)
# returns list
def MethodB(#returns description):
#searches for corresponding data using beautifulsoup
#adds data into dictionary
# list.append(dictionary)
# returns list
Example of Wanted output
MethodA():[title] #scraps(text.title) data from the web
MethodB():[description] #scraps(text.description) from the web
#print(list)
>>>list=[{title,description},{title.description},{title,description},{title.description}]
Actual output
MethodA():[title] #scraps(text.title) data from the web
MethodB():[description] #scraps(text.description) from the web
#print(list)
>>>list =[{title},{title},{description},{description}]
There are a few examples I've seen; such as using Numpy and sorting them in an Array;-
arraylist = np.array(list)
arraylist[:, 0]
#but i get a 'too many indices for array'-
#because I have too much data loading in; including that some of them
#do not have data and are replaced as `None`; so there's an imbalance of indexes.
Im trying to keep it as modulated as possible. I've tried using the norm of iteration;
but it's sort of complicated because I have to indent more loops in it;
I've tried Numpy and Enumerate, but I'm not able to understand how to go about with it. But because it's an unbalanced list; meaning that some value are returned as Nonegives me the return error that; all the input array dimensions except for the concatenation axis must match exactly
Example : ({'Toy Box','Has a toy inside'},{'Phone', None }, {'Crayons','Used for colouring'})
Update; code sample of methodA
def MethodA(tableName, rowName, selectedLink):
try:
for table_tag in selectedLink.find_all(tableName, {'class': rowName}):
topic_title = table_tag.find('a', href=True)
if topic_title:
def_dict1 = {
'Titles': topic_title.text.replace("\n", "")}
global_list.append(def_dict1 )
return def_dict1
except:
def_dict1 = None
Assuming you have something of the form:
x = [{'a'}, {'a1'}, {'b'}, {'b1'}, {'c'}, {None}]
you can do:
dictionary = {list(k)[0]: list(v)[0] for k, v in zip(x[::2], x[1::2])}
or
dictionary = {s.pop(): v.pop() for k, v in zip(x[::2], x[1::2])}
The second method will clear your sets in x