Gloabal and Local scope confusion, Why is UnboundLocalError not happening? - python-3.x

I am working with WebSocket for a live stream of data and using 2 threads (total counting MainThread) my confusion is as follows
I have started with a dict live_data which is used to store live data in it
smart_soket = soket_plugin(feed_token)
live_data = {}
thread_1 = threading.Thread(target=feed_1, name='thread_1',
args=(smart_soket, token_string))
thread_1.start()
here live_data is updated in function feed_1 which is
def feed_1(smart_soket, token_string):
token=token_string
task="mw"
def on_message(ws, ticks):
#print(f"Got a Tick at {dt.datetime.now()}")
#print(ticks)
for stock in ticks:
try:
if stock['tk'] == '26009':
#print('In for Bank Nifty')
live_data[token_to_symbol_map
[stock['tk']]] = {"ltp": stock["ltp"],
"Last_update_time": stock["ltt"]}
else:
live_data[token_to_symbol_map
[stock['tk']]] = {"ltp": stock["ltp"],
"Last_update_time": stock["ltt"],
"Vwap": stock["ap"]}
except:
continue
def on_open(ws):
print("In on_open")
print("on open")
smart_soket.subscribe(task,token)
Now my doubt is I have not defined live_data in further code (which if I try to put here will fill the pages ) as a global variable, but when I update live_data in feed_1 it still is getting updated and I am able to use it wherever needed so why is it possible? shouldn't it have given UnboundLocalError?

Related

Random key lookup on LMDB/python vs BerkeleyBD/python (How to make LMDB lookup faster)

I have this program written in python that uses berkeleydb to store data (event logs) which i migrated to lmdb. My problem is, before an event gets written, the program does a lookup if the event already exists. I noticed that the berkeleydb version is much faster in doing the single value lookup using 13k+ records (as if the lmdb version is 1 second slower for every lookup) even with transactions enabled in berkeleydb. Any idea how to speed up the lmdb version? Note that I've had 70gb+ (about 30 million records) worth of data already stored in my berkeleydb and doing additional processing on those events takes me more than an hour so I thought switching to lmdb would decrease the processing time.
My LMDB environment was opened this way (I event set the readahead to False (but the database size is just about 35mb so I don't think it matters):
env = lmdb.open(db_folder, map_size=100000000000, max_dbs=4, readahead=False)
database = env.open_db('events'.encode())
My berkeleydb was opened this way:
env = db.DBEnv()
env.open(db_folder, db.DB_INIT_MPOOL | db.DB_CREATE | db.DB_INIT_LOG | db.DB_INIT_TXN | db.DB_RECOVER, 0)
database = db.DB(env)
BerkeleyDB version of check:
if event['eId'].encode('utf-8') in database:
duplicate_count += 1
else:
try:
txn = env.txn_begin(None)
database[event['eId'].encode('utf-8')] = json.dumps(event).encode('utf-8')
except:
if txn is not None:
txn.abort()
txn = None
raise
else:
txn.commit()
txn = None
event_count += 1
lmdb version:
with env.begin(buffers=True, db=database) as txn:
if (txn.get(event['eId'].encode()) is not None):
dup_event_count += 1
else:
txn.put(event['eId'].encode(), json.dumps(event).encode('utf-8'))
event_count += 1
Solution:
Place with env.begin outside the loop:
#case('rand lookup')
def test():
with env.begin() as txn:
for word in words:
txn.get(word)
return len(words)
#case('per txn rand lookup')
def test():
for word in words:
with env.begin() as txn:
txn.get(word)
return len(words)
Figured this out myself. What I'm doing is a per transaction random lookup. I just had to place with env.begin outside of the for loop (not visible in my example) as suggested in this example: https://raw.githubusercontent.com/jnwatson/py-lmdb/master/examples/dirtybench.py

Is it possible to use SQLite in EFS reliably?

Is it possible to use SQLite in AWS EFS safely? In my readings trying to determine if this is viable there appears to be some allusions that it should be doable since AWS EFS implemented NFSv4 back in 2017. In practice I am having no luck getting consistent behavior out of it.
Quick Points:
"Just use AWS RDS": Due to issues with other AWS architecture another team has implemented we are trying to work around resource starving cause by the API (DynamoDB isn't an option)
"This goes against SQLite's primary use case (being a locally access DB): Yes, but given the circumstances it seems like the best approach.
I have verified that we are running nfsv4 on our EC2 instance
Current results are very inconsistent with 3 exceptions encountered irrespective of approach I use
"file is encrypted or is not a database"
"disk I/O error (potentially related to EFS open file limits)"
"database disk image is malformed" (The database actually isn't corrupted after this)
database code:
SQLITE_VAR_LIMIT = 999
dgm_db_file_name = ''
db = SqliteExtDatabase(None)
lock_file = f'{os.getenv("efs_path", "tmp")}/db_lock_file.lock'
def lock_db_file():
with open(lock_file, 'w+') as lock:
limit = 900
while limit:
try:
fcntl.flock(lock, fcntl.LOCK_EX | fcntl.LOCK_NB)
print(f'db locked')
break
except Exception as e:
print(f'Exception: {str(e)}')
limit -= 1
time.sleep(1)
if not limit:
raise ValueError(f'Timed out after 900 seconds while waiting for database lock.')
def unlock_db_file():
with open(lock_file, 'w+') as lock:
fcntl.flock(lock, fcntl.LOCK_UN)
print(f'db unlocked')
def initialize_db(db_file_path=dgm_db_file_name):
print(f'Initializing db ')
global db
db.init(db_file_path, pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000, # 64MB
'foreign_keys': 1})
print(f'db initialized')
class Thing(Model):
name = CharField(primary_key=True)
etag = CharField()
last_modified = CharField()
class Meta:
database = db
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
#staticmethod
def insert_many(stuff):
data = [(k, v['ETag'], v['Last-Modified']) for k, v in stuff.items()]
fields = [Thing.name, Thing.etag, Thing.last_modified]
limit = 900
while True:
try:
with db.atomic():
for key_batch in chunked(data, SQLITE_VAR_LIMIT // len(fields)):
s = Thing.insert_many(key_batch, fields=[Thing.name, Thing.etag, Thing.last_modified]) \
.on_conflict_replace().execute()
break
except Exception as e:
print(f'Exception: {str(e)}')
print(f'Will try for {limit} more seconds.')
limit -= 1
time.sleep(1)
if not limit:
raise ValueError('Failed to exectue query after 900 seconds.')
Example Call:
print(f'Critical section start')
# lock_db_file() # I have tried with a secondary lock file as well
self.stuff_db = Thing()
if not Path(self.db_file_path).exists():
initialize_db(self.db_file_path)
print('creating tables')
db.create_tables([Thing], safe=True)
else:
initialize_db(self.db_file_path)
getattr(Thing, insert_many)(self.stuff_db, stuff_db)
# db.close()
# unlock_db_file()
print(f'Critical section end')
print(f'len after update: {len(stuff)}')
Additional peculiarities:
If a lamda gets stuck catching the "malformed image" exception and a new lambda execution is triggered, the error resolves in the other lambda.
After some trial and error I discovered it is a workable solution. It appears that the design will need to use APSWDatabase(..., vfs='unix-excl') to properly enforce locking.
Database code:
from peewee import *
from playhouse.apsw_ext import APSWDatabase
SQLITE_VAR_LIMIT = 999
db = APSWDatabase(None, vfs='unix-excl')
def initialize_db(db_file_path):
global db
db.init(db_file_path, pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000})
db.create_tables([Thing], safe=True)
return Thing()
class Thing(Model):
field_1 = CharField(primary_key=True)
field_2 = CharField()
field_3 = CharField()
class Meta:
database = db
This allows for the following usage:
db_model = initialize_db(db_file_path)
with db:
# Do database queries here with the db_model
pass
Note: If you don't use the context managed database connection you will need to explicitly call db.close() otherwise the lock will not be released from the file. Additionally, calling db_init(...) causes a lock to be placed on the databased until it is closed.

How to get the client's ip on the server for remote desktop

I am using the following function to implement a program that changes its behavior depending on the IP of the connected PC.
There is a problem with this function that if something tries to login and fails, it may get the IP of the failed one.
And now that we've encountered that possibility, the program is broken.
What edits do I need to make to make this function behave as expected?
import psutil
def get_ip(port=3389):
ip = ""
for x in psutil.net_connections():
if x.status == "ESTABLISHED" and x.laddr.port == port:
ip = x.raddr.ip
break
I changed the function based on Bijay Regmi's comment. Thank you. wmi was difficult for me, so I used win32evtlog to read it out little by little. I am working on improving readability and finding bugs little by little.
def systime(xml):
return datetime.fromisoformat(xml.find(f'{ns}System/{ns}TimeCreated').get('SystemTime')[:-2] + "+00:00")
def last_event(handle,
event_id,
condition: Callable[['Event'], bool] = None) -> Optional['Event']:
now = datetime.now(tz=timezone.utc)
while True:
events = win32evtlog.EvtNext(handle, 20)
if not events:
break
for event in events:
xml_content = win32evtlog.EvtRender(event, win32evtlog.EvtRenderEventXml)
obj = Event(ET.fromstring(xml_content))
if obj.EventID == event_id:
if obj.SystemTime + timedelta(minutes=5) < now:
return None
if condition and not condition(obj):
continue
return obj
class Event:
def __init__(self, xml: ET.Element):
self.EventID = xml and xml.find(f'{ns}System/{ns}EventID').text
self.SystemTime = xml and systime(xml)
self.xml = xml
if self.EventID == '24':
self.IpAddress = xml.find(f'{ns}UserData/{{Event_NS}}EventXML/{{Event_NS}}Address').text
elif self.EventID == '4624':
self.IpAddress = xml.find(f'{ns}EventData/{ns}Data[#Name="IpAddress"]').text
else:
self.IpAddress = None

Python 3 MQTT client storing received payload in Sqlite - Open DB once, store many times, finally close db?

I have a Python 3.6 code that connects to MQTT and subscribes to a topic. Every time that the callback function "on_message" gets triggered, it instantiates a class that has a single method that does the following: Opens the db file, save the received data, closes the db file.
The Python script described above works almost fine. It receives about 7 MQTT messages per second, so for each message it needs to [Open_DB - Save_Data - Close_DB]. There are some messages getting PUBACK but not saved, perhaps due to some many unnecesary operations, so I want to improve:
I spent a lot of time (not an expert) trying to create a class that would open the db once, write many thousands of times to the db, and only when done, close the db file. to create a class that would have three methods:
1. MyDbClass.open_db_file()
2. MyDbClass.save_data()
3. MyDbClass.close_db_file()
The problem as you may guess is that it is not possible to call MyDbClass.save_data() from within the "on_message" callback, even when the object has been placed on a global variable. Here is the non-working code with the proposed idea, that I cleaned up for easier reading:
# -----------------------------
This code has been cleaned-up for faster reading
import paho.mqtt.client as mqtt
import time
import json
import sqlite3
Global Variables
db_object = ""
class MyDbClass():
def __init__(self):
pass
def open_db_file(self, dbfile):
self.db_conn = sqlite3.connect(db_file)
return self.db_conn
def save_data(self, json_data):
self.time_stamp = time.strftime('%Y%m%d%H%M%S')
self.data = json.loads(json_data)
self.sql = '''INSERT INTO trans_reqs (received, field_a, field_b, field_c) \
VALUES (?, ?, ?, ?)'''
self.fields_values = ( self.time_stamp, self.data['one'], self.data['two'], self.data['three']] )
self.cur = self.db_conn.cursor()
self.cur.execute(self.sql, self.fields_values)
self.db_conn.commit()
def close_db_file(self):
self.cur.close()
self.db_conn.close()
def on_mqtt_message(client, userdata, msg):
global db_object
m_decode = msg.payload.decode("utf-8","ignore")
db_object.save_data(m_decode)
def main():
global db_object
Database to use - Trying to create an object to manage DB tasks (from MyDbClass)
db_file = "my_filename.sqlite"
db_object = MyDbClass.open_db_file(db_file)
# MQTT -- Set varibles
broker_address= "..."
port = 1883
client_id = "..."
sub_topic = "..."
sub_qos = 1
# MQTT -- Instanciate the MQTT Client class and set callbacks
client = mqtt.Client(client_id)
client.on_connect = on_mqtt_connect
client.on_disconnect = on_mqtt_disconnect
client.on_message = on_mqtt_message
client.on_log = on_mqtt_log
client.clean_session = True
#client.username_pw_set(usr, password=pwd) #set username and password
print('Will connect to broker ', broker_address)
client.connect(broker_address, port=port, keepalive=45 )
client.loop_start()
client.subscribe(sub_topic, sub_qos)
try:
while True:
time.sleep(.1)
except KeyboardInterrupt:
# Disconnects MQTT
client.disconnect()
client.loop_stop()
print("....................................")
print("........ User Interrupted ..........")
print("....................................")
db_object.close_db_file()
client.loop_stop()
client.disconnect()
if __name__ == "__main__":
main()
Any help on how to do this will be greatly appreciated!

How do I manage TCP Client read/write overlap issues?

I have a TCP client communicating with a LabVIEW GUI.
My program calls connect() at the start and disconnect() at the end. It will call passCommand(x) to read or write data to the LabVIEW GUI. However, in some cases, I have multiple threads which may be calling passCommand() and somehow the return data will get mixed up.
For example, in the main thread I will ask for the voltage, which should be a number between 300 and 400. In a different thread I will ask for the temperature, which should be a number from 0-100. The voltage will be returned as 25, while the temperature will get 250.
Is this a known issue with TCP communication and threading? Is there a way to solve this such as implementing a queue or unique id or something?
import socket as _socket
# get python major version as integer
from sys import version as pythonVersion
pythonVersionMajor = int(pythonVersion[0])
_serverHost = 'localhost'
_serverPort = 50007
isConnected = 0
_sockobj = None
_error_string = "error:"
def connect():
'opens a connection to LabVIEW Server'
global _sockobj, isConnected
_sockobj = _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) # create socket
_sockobj.connect((_serverHost, _serverPort)) # connect to LV
isConnected = 1
def disconnect():
'closes the connection to LabVIEW Server'
global isConnected
_sockobj.close() # close socket
isConnected = 0
def passCommand(command):
'passes a command to LabVIEW Server'
## We prepend the command length (8 char long) to the message and send it to LV
# Compute message length and pad with 0 on the left if required
commandSize=str(len(command)).rjust(8,'0')
# Prepend msg size to msg
completeCommand=commandSize+command
# python 3 requires data to be encoded
if (pythonVersionMajor >= 3):
completeCommand = str.encode(completeCommand)
# Send complete command
_sockobj.send(completeCommand)
data = _sockobj.recv(11565536)
# python 3 requires data to be decoded
if (pythonVersionMajor >= 3):
data = bytes.decode(data)
if data.rfind(_error_string) == 0:
error = True
data = data[len(_error_string):] # get data after "error:" string
else:
error = False
execString = "lvdata = " + data
exec(execString, globals())
if error:
raise _LabVIEWError(lvdata)
else:
return lvdata
class _Error(Exception):
"""Base class for exceptions in this module."""
pass
class _LabVIEWError(_Error):
"""Exception raised for errors generated in LabVIEW.
Attributes:
code -- LabVIEW Error Code
source -- location of the error
message -- explanation of the error
"""
def __init__(self, error):
self.code = error[0]
self.source = error[1]
self.message = error[2]
def __str__(self):
return "%s" % (self.message,)
This is an example of one of the most common problems with threading. You are accessing a resource from multiple threads and the resource is not considered thread-safe (if both threads are sending/receiving at the same time, it's possible for a thread to get the wrong response, or even both responses).
Ideally you should be locking access to passCommand with a mutex so it can only be used with by one thread at a time, or opening one socket per thread, or doing all of your socket operations in a single thread.

Resources