I have some problem with unpickling data recived from logger. Given udp_server:
import pickle
import logging
import logging.handlers
import socketserver
import struct
class MyUDPHandler(socketserver.BaseRequestHandler):
def handle(self):
data = self.request[0].strip()
socket = self.request[1]
print("{} wrote:".format(self.client_address[0]))
print(self.unPickle(data)) # here is problem
socket.sendto(data.upper(), self.client_address)
def unPickle(self, data):
return pickle.loads(data)
class LogRecordSocketReceiver(socketserver.UDPServer):
allow_reuse_address = True
def __init__(self, host='localhost',
port=logging.handlers.DEFAULT_TCP_LOGGING_PORT,
handler=MyUDPHandler):
socketserver.UDPServer.__init__(self, (host, port), handler)
self.abort = 0
self.timeout = 1
self.logname = None
def serve_until_stopped(self):
import select
abort = 0
while not abort:
rd, wr, ex = select.select([self.socket.fileno()],
[], [],
self.timeout)
if rd:
self.handle_request()
abort = self.abort
if __name__ == "__main__":
tcpserver = LogRecordSocketReceiver()
print('About to start UDP server...')
tcpserver.serve_until_stopped()
And udp_log_sender:
import logging, logging.handlers
rootLogger = logging.getLogger('')
rootLogger.setLevel(logging.DEBUG)
udp_handler = logging.handlers.DatagramHandler("localhost", logging.handlers.DEFAULT_TCP_LOGGING_PORT)
rootLogger.addHandler(udp_handler)
logging.info('Jackdaws love my big sphinx of quartz.')
When the server recives logging message and want to run unPickle the EOFError is thrown. What could cause such behaviour?
do not strip binary data
omit the first 4 bytes (i.e. use data[4:]) as they contain length of the dumped object
I didn't find this information in logging module documentation - sometimes one has go to the source (or just google harder).
Related
I wanted to create sunrpc client in python using xdrlib library and sunrpc server is already implemented in C. I have implemented one rpc client in python over udp by referencing following link:
https://svn.python.org/projects/stackless/trunk/Demo/rpc/rpc.py
It is giving timeout error as well as can not unpack none object error.
can anyone guide me on this how it can be done?
there is no information available on this on google.
has anyone implemented such type of code?
please help..I am struggling on this like a week now.
Here is my client code:
import rpc
import rpc_new
from tq_const import *
from tq_type import *
import tq_pack
import socket
import os
class PartialTQClient:
def __init__(self):
pass
def addpackers(self):
self.packer = tq_pack.TQPacker(self)
self.unpacker = tq_pack.TQUnpacker(self, '')
def unpack_month_temperatures(self):
return self.unpacker.unpack_array(self.unpacker.unpack_uint)
def call(self, month):
res = self.make_call(0, month, self.packer.pack_uint, self.unpack_month_temperatures)
return res
class UDPTQClient(PartialTQClient, rpc.RawUDPClient):
def __init__(self, host):
rpc.RawUDPClient.__init__(self, host, TQ_PROGRAM, TQ_VERSION, TQ_PORT)
PartialTQClient.__init__(self)
if __name__ == "__main__":
tqcl = UDPTQClient("127.0.0.1")
print(tqcl)
res = tqcl.call(12)
#print ("Got result", res)
Here is my server code:
import rpc
import rpc_new
from tq_const import *
from tq_type import *
import tq_pack
import socket
import os
class TQServer(rpc.UDPServer):
print("Inside TQServer")
def handle_0(self):
print ("Got request")
m = self.unpacker.unpack_uint()
print ("Arguments was", m)
self.turn_around()
self.packer.pack_array([1, 2, 3], self.packer.pack_int)
#res = PFresults(self, status=TRUE, phone="555-12345")
#res.pack()
if __name__ == "__main__":
s = TQServer("", TQ_PROGRAM, TQ_VERSION, TQ_PORT)
print ("Service started...",s)
try:
print("Trying")
s.loop()
finally:
print ("Service interrupted.")
When I am running client and server on localhost I am getting following error: TypeError: cannot unpack non-iterable NoneType object
I have trouble using Flask socketio. Here is the code I use:
import json
import time
from flask import Flask, render_template
from flask_socketio import SocketIO, emit
from engineio.async_drivers import gevent
from flask_cors import CORS
from gevent.pywsgi import WSGIServer
from geventwebsocket.handler import WebSocketHandler
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, cors_allowed_origins="*")
import queue
queue_notification_thread = queue.Queue()
def callback_notification(data):
print("callback device {}".format(data))
notification_thread = threading.Thread(target=notification_job, args=(data))
notification_thread.start()
queue_notification_thread.put(notification_thread)
def notification_job(data):
print("callback device in notification job {}".format(data))
socketio.emit("notification", data, broadcast=True)
#socketio.on('request')
def handle_message(data):
Logger.instance().debug('received message: {}'.format(data))
try:
if data.__contains__('data'):
response_message = dict()
response_message['Devices'] = dict()
response_message['Devices']['event'] = 'MY_EVENT'
socketio.emit('notification', response_message, broadcast=True)
else:
Logger.instance().error('Can\'t parse data {}'.format(data))
except OSError as err:
print('Error: when process {} \n ValueError {}'.format(data, err))
#socketio.on_error_default # handles all namespaces without an explicit error handler
def default_error_handler(e):
print('An error occured:')
print(e)
if __name__ == '__main__':
serialReader = SerialReader()
serialReader.start_reading(callback_notification)
http_server = WSGIServer(('', 5000), app, handler_class=WebSocketHandler)
http_server.serve_forever()
And the reader with call asynchronisly:
class SerialController:
serial_port: str
serial_device: serial.Serial
reading_thread: threading.Thread
device_name: str
def __init__(self, serial_port: str = "/dev/ttyACM0", baudrate=115200, read_timeout=0.2, device_name=''):
self.serial_port = serial_port
self.device_name = device_name
self.serial_device = serial.Serial(port=self.serial_port, baudrate=baudrate, timeout=0.2)
def start_reading(self, callback_function):
self.reading_callback = callback_function
# run the thread to
self.reading_thread = threading.Thread(target=self.read_job)
self.reading_thread.start()
def read_job(self):
available_data = 0
while True:
if self.serial_device.in_waiting > available_data:
available_data = self.serial_device.in_waiting
print('available_data {}'.format(available_data))
time.sleep(0.1)
else:
if available_data != 0:
data = self.serial_device.readall()
available_data = 0
if data != b'' and data != b'\n':
if self.reading_callback != None:
message = dict()
message["Reader"] = dict()
message["Reader"]["device"] = self.device_name
message["Reader"]["data"] = data
self.reading_callback(message)
time.sleep(1)
When I receive a message in #socketio.on('request') the bradcast emission work properly with no delay. When I use callback_notification called from my serial reader the breadcast emission have variable delay ( from 1seconde to 10 secondes).
On my server the message "callback device ..." is printed instantly but the client receive the message after few second.
I tried to the emission call in a thread like in the shown code but there is no improvment
I have written below TCP server, I don't know why handler is not supporting multiple TCP client connections.
import socket
import sys
import ast
# Internal imports
import core
try:
import fcntl
except ImportError:
fcntl = None
import logging
import json
_LOG = logging.getLogger(__name__)
if sys.version_info.major == 2:
import SocketServer
TCPServer = SocketServer.TCPServer
RequestHandler = SocketServer.BaseRequestHandler
if sys.version_info.major == 3:
import socketserver
TCPServer = socketserver.TCPServer
RequestHandler = socketserver.BaseRequestHandler
class TCPServerRequestHandler(RequestHandler):
def handle(self):
"""Receives data from client.
"""
msg = self.request.recv(1024).strip()
if self.client_address and not msg:
_LOG.error("No Data revieved from Client: {}".format(self.client_address[0]))
return
# Send some data to client
# self.wfile.write("Hello Client....Got your message".encode())
data = ast.literal_eval(msg.decode('utf-8'))
if not data:
_LOG.error("No data recieved.")
else:
with core._connect(db="exampledb") as conn:
if "device" in data and data["device"] == "mcu":
table_name = "roku_online_status"
if conn:
data.pop("device")
cols = [c for c in data.keys()] # python 3 dict keys is not list
stored_value = core.doQuery(conn, table_name, cols, "id")
if stored_value[0] != data["online"]:
core.insert_row(data, table_name, conn)
else:
if conn:
core.insert_row(data, "particle_photon", conn)
conn.close()
class Server(TCPServer):
allow_reuse_address = True
# The constant would be better initialized by a systemd module
SYSTEMD_FIRST_SOCKET_FD = 3
def __init__(self, server_address, handler_cls, bind_and_activate=True):
self.handlers = set()
# Invoke base but omit bind/listen steps (performed by systemd activation!)
try:
TCPServer.__init__(self, server_address, handler_cls, bind_and_activate)
except TypeError:
TCPServer.__init__(self, server_address, handler_cls)
# Override socket
self.socket = socket.fromfd(
self.SYSTEMD_FIRST_SOCKET_FD, self.address_family, self.socket_type)
if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'):
flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD)
flags |= fcntl.FD_CLOEXEC
fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags)
def server_bind(self):
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.socket.bind(self.server_address)
def server_close(self):
TCPServer.server_close(self)
print("Shutting down server.")
for handler in self.handlers.copy():
print(handler)
self.shutdown_request(handler.request)
def main(server_address):
"""Starts TCPServer.
"""
logging.basicConfig(level=logging.DEBUG)
# Create a TCP Server instance
server = Server(server_address, TCPServerRequestHandler)
try:
server.serve_forever()
except KeyboardInterrupt:
sys.exit(0)
if __name__ == '__main__':
main(("10.10.10.2", 7111))
For a single connection it works just fine, but when multiple clients tries to connect it gets stuck.
You have implemented a single-threaded server with blocking I/O. This kind of server can only handle a single client at a time since it waits until the client is done (inside TCPServerRequestHandler) before it will be able to process the connection of the next client.
To handle multiple clients at the same time you either have to use multiple threads or processes where each can handle a single client or you have to implement an event based server which can handle multiple clients inside a single thread. To implement the first you might have a look at the ThreadingTCPServer and ForkingTCPServer and for the latter have a look at frameworks like Twisted.
I have one topic and one subscription with multiple subscribers. My application scenario is I want to process messages on different subscribers with specific number of messages to be processed at a time. Means at first suppose 8 messages are processing then if one message processing done then after acknowledging processed message next message should take from the topic while taking care of no duplicate message to be found on any subscriber and every time 8 message should processed in the background.
For this I have use synchronous pull method with max_messages = 8 but next pulling is done after all messages process completed. So we have created own scheduler where at same time 8 process should be running at background and pulling 1 message at a time but still after all 8 message processing completed next message is delivered.
Here is my code:
#!/usr/bin/env python3
import logging
import multiprocessing
import time
import sys
import random
from google.cloud import pubsub_v1
project_id = 'xyz'
subscription_name = 'abc'
NUM_MESSAGES = 4
ACK_DEADLINE = 50
SLEEP_TIME = 20
multiprocessing.log_to_stderr()
logger = multiprocessing.get_logger()
logger.setLevel(logging.INFO)
def worker(msg):
logger.info("Received message:{}".format(msg.message.data))
random_sleep = random.randint(200,800)
logger.info("Received message:{} for {} sec".format(msg.message.data, random_sleep))
time.sleep(random_sleep)
def message_puller():
subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(project_id, subscription_name)
while(True):
try:
response = subscriber.pull(subscription_path, max_messages=1)
message = response.received_messages[0]
msg = message
ack_id = message.ack_id
process = multiprocessing.Process(target=worker, args=(message,))
process.start()
while process.is_alive():
# `ack_deadline_seconds` must be between 10 to 600.
subscriber.modify_ack_deadline(subscription_path,[ack_id],ack_deadline_seconds=ACK_DEADLINE)
time.sleep(SLEEP_TIME)
# Final ack.
subscriber.acknowledge(subscription_path, [ack_id])
logger.info("Acknowledging message: {}".format(msg.message.data))
except Exception as e:
print (e)
continue
def synchronous_pull():
p = []
for i in range(0,NUM_MESSAGES):
p.append(multiprocessing.Process(target=message_puller))
for i in range(0,NUM_MESSAGES):
p[i].start()
for i in range(0,NUM_MESSAGES):
p[i].join()
if __name__ == '__main__':
synchronous_pull()
Also for sometime subscriber.pull not pulling any messages even the while loop is always True. It gives me error as
list index (0) out of range
Concluding that subscriber.pull not pulling in message even messages are on the topic but after sometime it starts pulling. Why it is so?
I have tried with asynchronous pulling and flow control but duplicate message are found on multiple subscriber. If any other method will resolve my issue then let mi know. Thanks in advance.
Google Cloud PubSub ensures At least Once (docs). Which means, the messages may be delivered more than once. To tackle this, you need to make your program/system idempotent
You have multiple subscribers pulling 8 messages each.
To avoid the same message getting processed by multiple subscribers, acknowledge the message as soon as any subscriber pulls that message and proceeds further for processing rather than acknowledging it at the end, after the entire processing of the message.
Also, instead of running your main script continuously, use sleep for some constant time when there are no messages in the queue.
I had a similar code, where I used synchronous pull except I did not use parallel processing.
Here's the code:
PubSubHandler - Class to handle Pubsub related operations
from google.cloud import pubsub_v1
from google.api_core.exceptions import DeadlineExceeded
class PubSubHandler:
def __init__(self, subscriber_config):
self.project_name = subscriber_config['PROJECT_NAME']
self.subscriber_name = subscriber_config['SUBSCRIBER_NAME']
self.subscriber = pubsub_v1.SubscriberClient()
self.subscriber_path = self.subscriber.subscription_path(self.project_name,self.subscriber_name)
def pull_messages(self,number_of_messages):
try:
response = self.subscriber.pull(self.subscriber_path, max_messages = number_of_messages)
received_messages = response.received_messages
except DeadlineExceeded as e:
received_messages = []
print('No messages caused error')
return received_messages
def ack_messages(self,message_ids):
if len(message_ids) > 0:
self.subscriber.acknowledge(self.subscriber_path, message_ids)
return True
Utils - Class for util methods
import json
class Utils:
def __init__(self):
pass
def decoded_data_to_json(self,decoded_data):
try:
decoded_data = decoded_data.replace("'", '"')
json_data = json.loads(decoded_data)
return json_data
except Exception as e:
raise Exception('error while parsing json')
def raw_data_to_utf(self,raw_data):
try:
decoded_data = raw_data.decode('utf8')
return decoded_data
except Exception as e:
raise Exception('error converting to UTF')
Orcestrator - Main script
import time
import json
import logging
from utils import Utils
from db_connection import DbHandler
from pub_sub_handler import PubSubHandler
class Orcestrator:
def __init__(self):
self.MAX_NUM_MESSAGES = 2
self.SLEEP_TIME = 10
self.util_methods = Utils()
self.pub_sub_handler = PubSubHandler(subscriber_config)
def main_handler(self):
to_ack_ids = []
pulled_messages = self.pub_sub_handler.pull_messages(self.MAX_NUM_MESSAGES)
if len(pulled_messages) < 1:
self.SLEEP_TIME = 1
print('no messages in queue')
return
logging.info('messages in queue')
self.SLEEP_TIME = 10
for message in pulled_messages:
raw_data = message.message.data
try:
decoded_data = self.util_methods.raw_data_to_utf(raw_data)
json_data = self.util_methods.decoded_data_to_json(decoded_data)
print(json_data)
except Exception as e:
logging.error(e)
to_ack_ids.append(message.ack_id)
if self.pub_sub_handler.ack_messages(to_ack_ids):
print('acknowledged msg_ids')
if __name__ == "__main__":
orecestrator = Orcestrator()
print('Receiving data..')
while True:
orecestrator.main_handler()
time.sleep(orecestrator.SLEEP_TIME)
After a lot of investigating, I found out that after serving hundreds of thousands of HTTP POST requests, there's a memory leak. The strange part is that the memory leak only occurs when using PyPy.
Here's an example code:
from twisted.internet import reactor
import tornado.ioloop
do_tornado = False
port = 8888
if do_tornado:
from tornado.web import RequestHandler, Application
else:
from cyclone.web import RequestHandler, Application
class MainHandler(RequestHandler):
def get(self):
self.write("Hello, world")
def post(self):
self.write("Hello, world")
if __name__ == "__main__":
routes = [(r"/", MainHandler)]
application = Application(routes)
print port
if do_tornado:
application.listen(port)
tornado.ioloop.IOLoop.instance().start()
else:
reactor.listenTCP(port, application)
reactor.run()
Here is the test code I am using to generate requests:
from twisted.internet import reactor, defer
from twisted.internet.task import LoopingCall
from twisted.web.client import Agent, HTTPConnectionPool
from twisted.web.iweb import IBodyProducer
from zope.interface import implements
pool = HTTPConnectionPool(reactor, persistent=True)
pool.retryAutomatically = False
pool.maxPersistentPerHost = 10
agent = Agent(reactor, pool=pool)
bid_url = 'http://localhost:8888'
class StringProducer(object):
implements(IBodyProducer)
def __init__(self, body):
self.body = body
self.length = len(body)
def startProducing(self, consumer):
consumer.write(self.body)
return defer.succeed(None)
def pauseProducing(self):
pass
def stopProducing(self):
pass
def callback(a):
pass
def error_callback(error):
pass
def loop():
d = agent.request('POST', bid_url, None, StringProducer("Hello, world"))
#d = agent.request('GET', bid_url)
d.addCallback(callback).addErrback(error_callback)
def main():
exchange = LoopingCall(loop)
exchange.start(0.02)
#log.startLogging(sys.stdout)
reactor.run()
main()
Note that this code does not leak with CPython nor with Tornado and Pypy! The code leaks only when using Twisted and Pypy together, and ONLY when using a POST request.
To see the leak, you have to send hundreds of thousands of requests.
Note that when setting PYPY_GC_MAX, the process eventually crashes.
What's going on?
Turns out that the cause of the leak is the BytesIO module.
Here's how to simulate the leak on Pypy.
from io import BytesIO
while True: a = BytesIO()
Here's the fix:
https://bitbucket.org/pypy/pypy/commits/40fa4f3a0740e3aac77862fe8a853259c07cb00b