How do I create imap checker that reads from csv file and loop for each line using multithreading? - python-3.x

The script only checks the first 10 lines of csv file, I want the script to iterate over all the lines of the file using threads to speed up the process.
Code:
import time
import csv
import imaplib
from threading import Thread
combo = []
FileToOpen = open("emails.csv", "r")
csvDictReader = csv.DictReader(FileToOpen)
successEmail = open("SuccessEmails.txt", "a")
for email in csvDictReader:
combo.append(email)
rows_count = len(list(csvDictReader))
t1 = time.perf_counter()
combo_new = combo
def ConnectorImap(combo_new):
for Email in combo_new:
login = Email['login']
password = Email['password']
imap_serv = "imap." + login.split('#')[-1]
mail_serv = "mail." + login.split('#')[-1]
try:
print(login,password,imap_serv)
print('logging in as %s' % login)
# create an IMAP4 class with SSL
imap_ssl = imaplib.IMAP4_SSL(imap_serv)
resp_code, response = imap_ssl.login(login,password)
print(resp_code)
if resp_code == "OK":
successEmail.write(login + ',' + password)
successEmail.write("\n")
imap_ssl.logout()
except Exception as e:
print(e)
pass
threads = []
for idx, line in enumerate(rows_count):
# We start one thread per url present.
process = Thread(target=ConnectorImap, args=(combo_new))
process.start()
threads.append(process)
for process in threads:
process.join()
t2 = time.perf_counter()
print(f'finished in{t2 - t1} seconds')
I am new to python please any help! I want the script to iterate over all the lines of the file using threads to speed up the process.

Related

Cannot write CSV file from python

I'm been trying to create a csv file from this code, but it fails every time, I have tried different ways to place it inside the code but nothing has work so far.
I'm new to python and to Stack overflow.
If somebody can explain what I'm doing wrong it will be helpful.
Thanks in advance for any help.
from time import sleep
import os
import sys
from bleson import get_provider, Observer, UUID16
import csv
GOVEE_BT_mac_OUI_PREFIX = "A4:C1:38"
H5075_UPDATE_UUID16 = UUID16(0xEC88)
govee_devices = {}
# ###########################################################################
FORMAT_PRECISION = ".2f"
# Decode H5075 Temperature into degrees Fahrenheit
def decode_temp_in_f(encoded_data):
return format((((encoded_data / 10000) * 1.8) + 32), FORMAT_PRECISION)
# Decode H5075 percent humidity
def decode_humidity(encoded_data):
return format(((encoded_data % 1000) / 10), FORMAT_PRECISION)
#focus here
with open('temp.csv','w',newline='') as record:
record = csv.writer(record)
record.writerow(['Device Name','Device Address','Temp','Humidity'])
def print_values(mac):
govee_device = govee_devices[mac]
print(govee_device['name'],govee_device['address'],govee_device['tempInF'],govee_device['humidity'],govee_device['battery'])
record.writerow(govee_device['name'])
# On BLE advertisement callback
def on_advertisement(advertisement):
if advertisement.address.address.startswith(GOVEE_BT_mac_OUI_PREFIX):
mac = advertisement.address.address
if mac not in govee_devices:
govee_devices[mac] = {}
if H5075_UPDATE_UUID16 in advertisement.uuid16s:
# HACK: Proper decoding is done in bleson > 0.10
name = advertisement.name.split("'")[0]
encoded_data = int(advertisement.mfg_data.hex()[6:12], 16)
battery = int(advertisement.mfg_data.hex()[12:14], 16)
govee_devices[mac]["address"] = mac
govee_devices[mac]["name"] = name
govee_devices[mac]["mfg_data"] = advertisement.mfg_data
govee_devices[mac]["data"] = encoded_data
govee_devices[mac]["tempInF"] = decode_temp_in_f(encoded_data)
govee_devices[mac]["humidity"] = decode_humidity(encoded_data)
govee_devices[mac]["battery"] = battery
print_values(mac)
if advertisement.rssi is not None and advertisement.rssi != 0:
govee_devices[mac]["rssi"] = advertisement.rssi
# ###########################################################################
adapter = get_provider().get_adapter()
observer = Observer(adapter)
observer.on_advertising_data = on_advertisement
try:
while True:
observer.start()
sleep(2)
observer.stop()
except KeyboardInterrupt:
try:
observer.stop()
sys.exit(0)
except SystemExit:
observer.stop()
os._exit(0)
Error that Im getting is:
File "/home/pi/GoveeWatcher-master/python/goveeWatcher.py", line 37, in print_values
record.writerow(govee_device['name'])
ValueError: I/O operation on closed file.
I would be tempted to put the CSV writing functionality inside of the print_values function so it opens the file, writes the data, and then closes the file on each value that is found by the observer.
For example:
#focus here
def print_values(mac):
govee_device = govee_devices[mac]
print(govee_device['name'], govee_device['tempInF'])
with open('temp.csv','a',newline='') as record:
writer = csv.DictWriter(record, fieldnames=govee_device.keys())
writer.writerow(govee_device)

How to find out how long a search for files will take on python?

So I have a little app that searches for all xml files on my pc, copying the files that have 44 digits as the filename to the "output" folder.
The problem is that the final user needs an indication of the progress and remaining time of the task.
This is the module to copy files:
xml_search.py
import os
import re
from threading import Thread
from datetime import datetime
import time
import shutil
import winsound
os.system('cls')
def get_drives():
response = os.popen("wmic logicaldisk get caption")
list1 = []
t1 = datetime.now()
for line in response.readlines():
line = line.strip("\n")
line = line.strip("\r")
line = line.strip(" ")
if (line == "Caption" or line == ""):
continue
list1.append(line + '\\')
return list1
def search1(drive):
for root, dir, files in os.walk(drive):
for file in files:
if re.match("\d{44}.xml", file):
filename = os.path.join(root, file)
try:
shutil.copy(filename, os.path.join('output', file))
except Exception as e:
pass
def exec_(callback):
t1 = datetime.now()
list2 = [] # empty list is created
list1 = get_drives()
for each in list1:
process1 = Thread(target=search1, args=(each,))
process1.start()
list2.append(process1)
for t in list2:
t.join() # Terminate the threads
t2 = datetime.now()
total = str(t2-t1)
print(total, file=open('times.txt', 'a'), end="\n")
for x in range(3):
winsound.Beep(2000,100)
time.sleep(.1)
callback()
if __name__ == "__main__":
exec_()
The below code uses progressbar library and it shows
indication of the progress and remaining time of the task
import progressbar
from time import sleep
bar = progressbar.ProgressBar(maxval=1120, \
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.ETA()])
bar.start()
for i in range(1120):
bar.update(i+1)
sleep(0.1)
bar.finish()
You would need to add the above modified code to your code.
So in your case, you would need to count the number of files and provide it as input to ProgressBar constructor's maxval argument and remove sleep call.
The suggested solution with progress bar should work with one thread. You would need to figure out how to initiate the progress bar and where to put the updates if you insist to work with multiple threads.
Try to implement a timer decorator like the following:
import time
def mytimer(func):
def wrapper():
t1 = time.time()
result = func()
t2 = time.time()
print(f"The function {func.__name__} was run {t2 - t1} seconds")
return result
return wrapper
#mytimer
def TimeConsumingFunction():
time.sleep(3)
print("Hello timers")
TimeConsumingFunction()
Output:
/usr/bin/python3.7 /home/user/Documents/python-workspace/timers/example.py
Hello timers
The function TimeConsumingFunction was run 3.002610206604004 seconds
Process finished with exit code 0

Python Multithreading Producer Consumer Pattern

I'm still learning how to code and these are my first attempts at multithreading.
I've read a bunch of multithreading articles. I thought these were very helpful:
Processing single file from multiple processes
Python module of the week: multiprocessing
Producer-consumer problem in Python
Multiprocessing
There's quite a lot to think about. Especially for a beginner.
Unfortunately, when I try to put this information into practice my code isn't quite working.
The idea behind this code is to read simplified.txt which contains lines of comma delimited numbers. Eg: 0.275,0.28,0.275,0.275,36078.
The producer thread reads each line and strips the newline character from the end of the line. Then each number in the line is split and assigned a variable.
Variable1 is then placed into the queue.
The consumer thread will pick up items in the queue, square it, then add an entry into the log file.
The code I am using comes from this template. This is the code I have so far:
import threading
import queue
import time
import logging
import random
import sys
read_file = 'C:/temp/temp1/simplified.txt'
log1 = open('C:/temp/temp1/simplified_log1.txt', "a+")
logging.basicConfig(level=logging.DEBUG, format='(%(threadName)-9s) %(message)s',)
BUF_SIZE = 10
q = queue.Queue(BUF_SIZE)
class ProducerThread(threading.Thread):
def __init__(self, name, read_file):
super(ProducerThread,self).__init__()
self.name = name
self.read_file = read_file
def run(self, read_file):
while True:
if not q.full():
with open(read_file, 'r') as f:
for line in f:
stripped = line.strip('\n\r')
value1,value2,value3,value4,value5,value6,value7 = stripped.split(',')
q.put(value1)
logging.debug('Putting ' + str(value1) + ' : ' + str(q.qsize()) + ' items in queue')
time.sleep(random.random())
return
class ConsumerThread(threading.Thread):
def __init__(self, name, value1, log1):
super(ConsumerThread,self).__init__()
self.name = name
self.value1 = value1
self.log1 = log1
return
def run(self):
while True:
if not q.empty():
value1 = q.get()
sqr_value1 = value1 * value1
log1.write("The square of " + str(value1) + " is " + str(sqr_value1))
logging.debug('Getting ' + str(value1) + ' : ' + str(q.qsize()) + ' items in queue')
time.sleep(random.random())
return
if __name__ == '__main__':
p = ProducerThread(name='producer')
c = ConsumerThread(name='consumer')
p.start()
time.sleep(2)
c.start()
time.sleep(2)
When I run the code, I get this error:
Traceback (most recent call last):
File "c:/Scripta/A_Simplified_Producer_Consumer_Queue_v0.1.py", line 60, in <module>
p = ProducerThread(name='producer')
TypeError: __init__() missing 1 required positional argument: 'read_file'
I don't know where else I need to add read_file.
Any help would be greatly appreciated. Thanks in advance.
Your ProducerThread class requires 2 parameters (name and read_file) as arguments to its constructor as defined in its __init__ method, where you only provide the first such argument when you create an instance in your main block. You have the same problem with your second class.
You should either provide the read_file to the constructors when creating instances or just remove it from the constructor signature since you don't appear to use it anyways (you use the read_file passed into run function, but I don't think that is correct). Seems like you're attempting to override that method from the Thread superclass and I doubt that takes such a parameter.
Thank you userSeventeen for setting me on the right path.
I thought that in order to use outside variables I needed to place them in the init method, then again into the run method. You've clarified that I only needed to use the variables in the run methods.
This is the working code. I had to remove the while true: statement as I did not want the code to run forever.
import threading
import queue
import time
import logging
import random
import sys
import os
read_file = 'C:/temp/temp1/simplified.txt'
log1 = open('C:/temp/temp1/simplified_log1.txt', "a+")
logging.basicConfig(level=logging.DEBUG, format='(%(threadName)-9s) %(message)s',)
BUF_SIZE = 10
q = queue.Queue(BUF_SIZE)
class ProducerThread(threading.Thread):
def __init__(self, name):
super(ProducerThread,self).__init__()
self.name = name
def run(self):
with open(read_file, 'r') as f:
for line in f:
stripped = line.strip('\n\r')
value1,value2,value3,value4,value5 = stripped.split(',')
float_value1 = float(value1)
if not q.full():
q.put(float_value1)
logging.debug('Putting ' + str(float_value1) + ' : ' + str(q.qsize()) + ' items in queue')
time.sleep(random.random())
return
class ConsumerThread(threading.Thread):
def __init__(self, name):
super(ConsumerThread,self).__init__()
self.name = name
return
def run(self):
while not q.empty():
float_value1 = q.get()
sqr_value1 = float_value1 * float_value1
log1.write("The square of " + str(float_value1) + " is " + str(sqr_value1))
logging.debug('Getting ' + str(float_value1) + ' : ' + str(q.qsize()) + ' items in queue')
time.sleep(random.random())
return
if __name__ == '__main__':
p = ProducerThread(name='producer')
c = ConsumerThread(name='consumer')
p.start()
time.sleep(2)
c.start()
time.sleep(2)

Python: Facebook Graph API - batch request

I want to make a batch request getting campaigns for a specific ad account. I created a simple code based on this issue
but I've used some global arrays and I don't know if time.sleep(2) is necessary for this code. My code is as below:
from facebookads import FacebookAdsApi
from facebookads.api import FacebookRequest
import pandas as pd
import time
batch_body_responses = []
list_of_artists = [1]
def success_callback(response):
try:
pair = [response.json()['data']]
next = [response.json()['paging']['next']]
batch_body_responses.append(pair)
batch_body_responses.append(next)
except IndexError:
pass
except UnicodeEncodeError:
pass
def error_callback(response):
pass
def generate_batches(iterable, batch_size_limit):
# This function can be found in examples/batch_utils.py
batch = []
for item in iterable:
if len(batch) == batch_size_limit:
yield batch
batch = []
batch.append(item)
if len(batch):
yield batch
def get_id_list(art_search_list):
batches = []
your_app_id = '756885'
your_app_secret = '123456789'
your_access_token = 'EAA.....'
api = FacebookAdsApi.init(your_app_id, your_app_secret, your_access_token)
batch_limit = 25
for batch in generate_batches(art_search_list, batch_limit):
next_batch = api.new_batch()
for artt in batch:
requestss = [FacebookRequest(node_id='act_1234/campaigns',method="GET",endpoint="?fields=id,name")]
for req in requestss:
next_batch.add_request(req, success_callback, error_callback)
batches.append(next_batch)
for batch_request in batches:
batch_request.execute()
time.sleep(2)
print(batch_body_responses)
return batch_body_responses
df = pd.DataFrame(get_id_list(list_of_artists))
How can this code optimized by not using global arrays and how to execute without sleep statement and why it is needed sleep?

Python Multithreading missing data

useI am working on a python script to check if the url is working. The script will write the url and response code to a log file.
To speed up the check, I am using threading and queue.
The script works well if the number of url's to check is small but when increasing the number of url's to hundreds, some url's just will miss from the log file.
Is there anything I need to fix?
My script is
#!/usr/bin/env python
import Queue
import threading
import urllib2,urllib,sys,cx_Oracle,os
import time
from urllib2 import HTTPError, URLError
queue = Queue.Queue()
##print_queue = Queue.Queue()
class NoRedirectHandler(urllib2.HTTPRedirectHandler):
def http_error_302(self, req, fp, code, msg, headers):
infourl = urllib.addinfourl(fp, headers, req.get_full_url())
infourl.status = code
infourl.code = code
return infourl
http_error_300 = http_error_302
http_error_301 = http_error_302
http_error_303 = http_error_302
http_error_307 = http_error_302
class ThreadUrl(threading.Thread):
#Threaded Url Grab
## def __init__(self, queue, print_queue):
def __init__(self, queue,error_log):
threading.Thread.__init__(self)
self.queue = queue
## self.print_queue = print_queue
self.error_log = error_log
def do_something_with_exception(self,idx,url,error_log):
exc_type, exc_value = sys.exc_info()[:2]
## self.print_queue.put([idx,url,exc_type.__name__])
with open( error_log, 'a') as err_log_f:
err_log_f.write("{0},{1},{2}\n".format(idx,url,exc_type.__name__))
def openUrl(self,pair):
try:
idx = pair[1]
url = 'http://'+pair[2]
opener = urllib2.build_opener(NoRedirectHandler())
urllib2.install_opener(opener)
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1')
#open urls of hosts
resp = urllib2.urlopen(request, timeout=10)
## self.print_queue.put([idx,url,resp.code])
with open( self.error_log, 'a') as err_log_f:
err_log_f.write("{0},{1},{2}\n".format(idx,url,resp.code))
except:
self.do_something_with_exception(idx,url,self.error_log)
def run(self):
while True:
#grabs host from queue
pair = self.queue.get()
self.openUrl(pair)
#signals to queue job is done
self.queue.task_done()
def readUrlFromDB(queue,connect_string,column_name,table_name):
try:
connection = cx_Oracle.Connection(connect_string)
cursor = cx_Oracle.Cursor(connection)
query = 'select ' + column_name + ' from ' + table_name
cursor.execute(query)
#Count lines in the file
rows = cursor.fetchall()
total = cursor.rowcount
#Loop through returned urls
for row in rows:
#print row[1],row[2]
## url = 'http://'+row[2]
queue.put(row)
cursor.close()
connection.close()
return total
except cx_Oracle.DatabaseError, e:
print e[0].context
raise
def main():
start = time.time()
error_log = "D:\\chkWebsite_Error_Log.txt"
#Check if error_log file exists
#If exists then deletes it
if os.path.isfile(error_log):
os.remove(error_log)
#spawn a pool of threads, and pass them queue instance
for i in range(10):
t = ThreadUrl(queue,error_log)
t.setDaemon(True)
t.start()
connect_string,column_name,table_name = "user/pass#db","*","T_URL_TEST"
tn = readUrlFromDB(queue,connect_string,column_name,table_name)
#wait on the queue until everything has been processed
queue.join()
## print_queue.join()
print "Total retrived: {0}".format(tn)
print "Elapsed Time: %s" % (time.time() - start)
main()
Python's threading module isn't really multithreaded because of the global interpreter lock, http://wiki.python.org/moin/GlobalInterpreterLock as such you should really use multiprocessing http://docs.python.org/library/multiprocessing.html if you really want to take advantage of multiple cores.
Also you seem to be accessing a file simultatnously
with open( self.error_log, 'a') as err_log_f:
err_log_f.write("{0},{1},{2}\n".format(idx,url,resp.code))
This is really bad AFAIK, if two threads are trying to write to the same file at the same time or almost at the same time, keep in mind, their not really multithreaded, the behavior tends to be undefined, imagine one thread writing while another just closed it...
Anyway you would need a third queue to handle writing to the file.
At first glance this looks like a race condition, since many threads are trying to write to the log file at the same time. See this question for some pointers on how to lock a file for writing (so only one thread can access it at a time).

Resources