EventHub and Receive - python-3.x

All,
I modified the sample Receive python script for Azure EventHub a bit but when I run it goes into a loop fetching the same events over and over. I'm not sending any events to the eventhub since I want to read what is there and I dont see a while loop here so how is this happening and how do I stop after it reads all the events currently in the EventHub?
Thanks
grajee
# https://learn.microsoft.com/en-us/python/api/overview/azure/eventhub-readme?view=azure-python#consume-events-from-an-event-hub
import logging
from azure.eventhub import EventHubConsumerClient
connection_str = 'Endpoint=sb://testhubns01.servicebus.windows.net/;SharedAccessKeyName=getevents;SharedAccessKey=testtestest='
consumer_group = '$Default'
eventhub_name = 'testpart'
client = EventHubConsumerClient.from_connection_string(connection_str, consumer_group, eventhub_name=eventhub_name)
logger = logging.getLogger("azure.eventhub")
logging.basicConfig(level=logging.INFO)
def on_event(partition_context, event):
logger.info("Received event from partition: \"{}\" : \"{}\"" .format(partition_context.partition_id,event.body_as_str()))
partition_context.update_checkpoint(event)
with client:
client.receive(
on_event=on_event,
starting_position="-1", # "-1" is from the beginning of the partition.
)
# receive events from specified partition:
# client.receive(on_event=on_event, partition_id='0')
client.close()

The below piece of code from here makes it more clear .
import asyncio
from azure.eventhub.aio import EventHubConsumerClient
from azure.eventhub.extensions.checkpointstoreblobaio import BlobCheckpointStore
connection_str = '<< CONNECTION STRING FOR THE EVENT HUBS NAMESPACE >>'
consumer_group = '<< CONSUMER GROUP >>'
eventhub_name = '<< NAME OF THE EVENT HUB >>'
storage_connection_str = '<< CONNECTION STRING FOR THE STORAGE >>'
container_name = '<<NAME OF THE BLOB CONTAINER>>'
async def on_event(partition_context, event):
# do something
await partition_context.update_checkpoint(event) # Or update_checkpoint every N events for better performance.
async def receive(client):
await client.receive(
on_event=on_event,
starting_position="-1", # "-1" is from the beginning of the partition.
)
async def main():
checkpoint_store = BlobCheckpointStore.from_connection_string(storage_connection_str, container_name)
client = EventHubConsumerClient.from_connection_string(
connection_str,
consumer_group,
eventhub_name=eventhub_name,
**checkpoint_store=checkpoint_store, # For load balancing and checkpoint. Leave None for no load balancing**
)
async with client:
await receive(client)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

Related

How to use aioboto3 & asyncio to download file from S3 aws - Python

I have the sync script which is running & working well, but i see some download files takes time, thought of using async approach here.
import json
import os
import io
import time
import gzip
import re
import logging
from logging.handlers import RotatingFileHandler
import boto3
AWS_KEY = "**"
AWS_SECRET = "**"
QUEUE_URL = "***"
OUTPUT_PATH = "./test"
VISIBILITY_TIMEOUT = 10
REGION_NAME = "region"
sqs = boto3.resource('sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
s3 = boto3.client('s3', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
queue = sqs.Queue(url=QUEUE_URL)
def handle_response(msg, path):
"""Logic goes here"""
print('message: %s' % msg)
def download_message_files(msg):
for s3_file in msg['files']:
s3_path = s3_file['path']
with io.BytesIO() as f:
s3.download_fileobj(msg['bucket'], s3_path, f)
f.seek(0)
for line in gzip.GzipFile(fileobj=f):
await handle_response(line.decode('UTF-8'), s3_path)
def consume():
while True:
for msg in queue.receive_messages(VisibilityTimeout=VISIBILITY_TIMEOUT):
body = json.loads(msg.body) # grab the actual message body
download_message_files(body)
msg.delete()
time.sleep(sleep_time)
if __name__ == '__main__':
# Setup our root logger
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
# Create our FDR logger
logger = logging.getLogger("Consumer")
# Rotate log file handler
RFH = RotatingFileHandler("test.log", maxBytes=20971520, backupCount=5)
# Log file output format
F_FORMAT = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
# Set the log file output level to INFO
RFH.setLevel(logging.INFO)
# Add our log file formatter to the log file handler
RFH.setFormatter(F_FORMAT)
# Add our log file handler to our logger
logger.addHandler(RFH)
consume()
I have tried converting this using aioboto3 and got struck in queue approach.
session = aioboto3.Session()
sqs = session.resource('sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
s3 = session.client('s3', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET)
queue = sqs.Queue(url=QUEUE_URL) <---- this gives error as 'ResourceCreatorContext' object has no attribute 'Queue'
As i could understand from this there is no attribute, but could anyone guide me to make this working with async nature.
You can use asyncio and aioboto3 together.
Instead of creating a resource, you can use client. The difference between an aioboto3.client and aioboto3.resource can be found in this answer.
This is a simple working example:
import aioboto3
async def consume():
async with aioboto3.Session().client(service_name='sqs', region_name=REGION_NAME, aws_access_key_id=AWS_KEY, aws_secret_access_key=AWS_SECRET) as client:
messages = await (client.receive_messages(VisibilityTimeout=VISIBILITY_TIMEOUT)
for message in messages:
# Do something
This should solve the error you are facing. This solution can also be extended to S3 as per your requirements.

Listener for Python Queue

I'm using python queue to insert data packets from mqtt listeners
But
I'm not sure when this queue will be loaded by Mqtt packet.
Can we put a listener on to this Queue ?
note : This listener callback will be used to insert the data in to DataBase.
import queue
import time
import threading
print('python queue')
def on_connect():
print('connected')
# gives message from device
def on_message(client,userdata,msg):
#print("Topic",msg.topic + "\nMessage:" + str(msg.payload)
qMqtt.put('msg')
#replace below line from here and move to listener
queueToDB('msg')
def queueToDB(msg):
qMqtt.get(msg)
dbaseInsert(msg)
def dbaseInsert(data):
#insert into query
print("inserted")
def run():
#mqttc= mqtt.Client()
#mqttc.on_connect=on_connect
#mqttc.on_message=on_message
global qMqtt
qMqtt = queue.Queue()
on_connect()
on_message('client','userdata','msg')
run()

How to publish and subscribe .pdf file in Google Pub/Sub (GCP)

In the below code the big .pdf file is split into single pages and uploaded into bucket and enqueued to pubsub simultaneously
def publish_messages(project_id, topic_id, enqueue_file):
publisher = pubsub_v1.PublisherClient()
topic_path = publisher.topic_path(project_id, topic_id)
data = enqueue_file
# Data must be a bytestring
data = data.encode("utf-8")
# When you publish a message, the client returns a future.
future = publisher.publish(topic_path, data=data)
print(future.result())
print(enqueue_file + "has been enqueued to Pub/Sub.")
def upload_local_directory_to_gcs(local_path, bucket, gcs_path):
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
storage_client = storage.Client()
buck = storage_client.bucket(bucket)
blob = buck.blob(remote_path)
blob.upload_from_filename(local_file)
print("Uploaded " + local_file + " to gs bucket " + bucket)
publish_messages("Project1", "my-topic", local_file)
I receive messages using the below code
def receive_messages(project_id, subscription_id , timeout=None):
from concurrent.futures import TimeoutError
from google.cloud import pubsub_v1
subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(project_id, subscription_id)
def callback(message):
print("Received message: {}".format(message))
message.ack()
streaming_pull_future = subscriber.subscribe(subscription_path, callback=callback)
print("Listening for messages on {}..\n".format(subscription_path))
with subscriber:
try:
streaming_pull_future.result(timeout=timeout)
except TimeoutError:
streaming_pull_future.cancel()
if __name__ == "__main__":
receive_messages("Project1", "my-sub")
But when I receive I am getting just string data.
Received message: Message {
data: b'/tmp/doc_pages/document-page17.pdf'
ordering_key: ''
attributes: {}
}
My idea is to get that pdf file and perform some OCR operation using Vision API. Is it possible to get pdf file itself? Is there any other methodology please let me know.
Thanks!

Send data from an other class through ws after client connected

I want to send data through websockets as soon as a client is connected.
The Data is at an other place then the Websocket Handler. How can i get the data to the client ?
The server should hold the loop and the Handler. In the connector i connect to a tcp socket to get the data out of some hardware. I expect to have not more then 6 Websockets open once a time. The Data comes as a stream out of the TCP socket.
server.py
import os
from tornado import web, websocket
import asyncio
import connector
class StaticFileHandler(web.RequestHandler):
def set_default_headers(self):
self.set_header("Access-Control-Allow-Origin", "*")
def get(self):
self.render('index.html')
class WSHandler(websocket.WebSocketHandler):
def open(self):
print('new connection')
self.write_message("connected")
def on_message(self, message):
print('message received %s' % message)
self.write_message("pong")
def on_close(self):
print('connection closed')
public_root = 'web_src'
handlers = [
(r'/', StaticFileHandler),
(r'/ws', WSHandler),
]
settings = dict(
template_path = os.path.join(os.path.dirname(__file__), public_root),
static_path = os.path.join(os.path.dirname(__file__), public_root),
debug = True
)
app = web.Application(handlers, **settings)
sensorIP = "xxx.xxx.xxx.xxx"
if __name__ == "__main__":
app.listen(8888)
asyncio.ensure_future(connector.main_task(sensorIP))
asyncio.get_event_loop().run_forever()
connector.py
import yaml
import asyncio
class RAMReceiver:
def __init__(self, reader):
self.reader = reader
self.remote_data = None
self.initParams = None
async def work(self):
i = 0
while True:
data = await self.reader.readuntil(b"\0")
self.remote_data = yaml.load(data[:-1].decode("utf-8",
"backslashreplace"))
# here i want to emit some data
# send self.remote_data to websockets
if i == 0:
i += 1
self.initParams = self.remote_data
# here i want to emit some data after open event is
# triggered
# send self.initParams as soon as a client has connected
async def main_task(host):
tasks = []
(ram_reader,) = await asyncio.gather(asyncio.open_connection(host,
51000))
receiver = RAMReceiver(ram_reader[0])
tasks.append(receiver.work())
while True:
await asyncio.gather(*tasks)
You can use Tornado's add_callback function to call a method on your websocket handler to send the messages.
Here's an example:
1. Create an additional method on your websocket handler which will receive a message from connector.py and will send to connected clients:
# server.py
class WSHandler(websocket.WebSocketHandler):
# make it a classmethod so that
# it can be accessed directly
# from class without `self`
#classmethod
async def send_data(cls, data):
# write your code for sending data to client
2. Pass the currently running IOLoop and WSHandler.send_data to your connector.py:
# server.py
from tornado import ioloop
...
if __name__ == "__main__":
...
io_loop = ioloop.IOLoop.current() # current IOLoop
callback = WSHandler.send_data
# pass io_loop and callback to main_task
asyncio.ensure_future(connector.main_task(sensorIP, io_loop, callback))
...
3. Then modify main_task function in connector.py to receive io_loop and callback. Then pass io_loop and callback to RAMReceiver.
4. Finally, use io_loop.add_callback to call WSHandler.send_data:
class RAMReceiver:
def __init__(self, reader, io_loop, callback):
...
self.io_loop = io_loop
self.callback = callback
async def work(self):
...
data = "Some data"
self.io_loop.add_callback(self.callback, data)
...

How to store response when http client closed a connection?

I'm using falcon framework in python to form json responses of web api.
For instance I have a function called logic() that works for 30-90min. I want something like this:
When http-client asks for /api/somepath.json we call
somepath_handle()
somepath_handle() runs logic() in another thread/process
When logic() is finished, thread is closed
somepath_handle() reads response of logic() from return
If somepath_handle() was killed before logic() was finished, then thread/etc with logic() isn't stopped until it is finished
The code:
def somepath_handle():
run_async_logic()
response=wait_for_async_logic_response() # read response of logic()
return_response(response)
If your process takes such a long time, I advise you to send the result to the user using email, or maybe a live notification system ?
I am using a simple worker to create the queue where I am processing some commands. If add simple response storage than there will be possibility to process any requests and not loss them when connection was lost.
Example:
It's main function that used falconframework.org to response to requests.
main.py:
from flow import Flow
import falcon
import threading
import storage
__version__ = 0.1
__author__ = 'weldpua2008#gmail.com'
app = falcon.API(
media_type='application/json')
app.add_route('/flow', Flow())
THREADS_COUNT = 1
# adding the workers to process queue of command
worker = storage.worker
for _ in xrange(THREADS_COUNT):
thread = threading.Thread(target=worker)
thread.daemon = True
thread.start()
It's simple storage with worker code
storage.py:
from Queue import Queue
import subprocess
import logging
main_queque = Queue()
def worker():
global main_roles_queque
while True:
try:
cmd = main_queque.get()
#do_work(item)
#time.sleep(5)
handler = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = handler.communicate()
logging.critical("[queue_worker]: stdout:%s, stderr:%s, cmd:%s" %(stdout, stderr, cmd))
main_queque.task_done()
except Exception as error:
logging.critical("[queue_worker:error] %s" %(error))
It's class that will process any requests [POST, GET]
flow.py:
import storage
import json
import falcon
import random
class Flow(object):
def on_get(self, req, resp):
storage_value = storage.main_queque.qsize()
msg = {"qsize": storage_value}
resp.body = json.dumps(msg, sort_keys=True, indent=4)
resp.status = falcon.HTTP_200
#curl -H "Content-Type: application/json" -d '{}' http://10.206.102.81:8888/flow
def on_post(self, req, resp):
r = random.randint(1, 10000000000000)
cmd = 'sleep 1;echo "ss %s"' % str(r)
storage.main_queque.put(cmd)
storage_value = cmd
msg = {"value": storage_value}
resp.body = json.dumps(msg, sort_keys=True, indent=4)
resp.status = falcon.HTTP_200

Resources