I have currently written a code that has multiple threads (as example I used 50 threads) and for each thread only one proxy is allowed to be in one of these threads (meaning that 1 proxy cannot be in two threads).
import contextlib
import random
import threading
import time
import requests
my_proxies = [
'http://140.99.107.100:2100',
'http://140.99.107.101:2100',
'http://140.99.107.102:2100',
'http://140.99.107.103:2100',
'http://140.99.107.104:2100',
'http://140.99.107.105:2100',
'http://140.99.107.106:2100',
'http://140.99.107.107:2100',
'http://140.99.107.108:2100',
'http://140.99.107.109:2100',
'http://140.99.107.110:2100',
'http://140.99.107.111:2100',
'http://140.99.107.112:2100',
'http://140.99.107.113:2100',
'http://140.99.107.114:2100',
'http://140.99.107.115:2100',
'http://140.99.107.116:2100',
'http://140.99.107.117:2100',
'http://140.99.107.118:2100',
'http://140.99.107.119:2100',
'http://140.99.107.120:2100',
'http://140.99.107.121:2100',
'http://140.99.107.122:2100',
]
# --------------------------------------------------------------------------- #
class AvailableProxiesManager:
_proxy_lock: threading.Lock = threading.Lock()
def __init__(self):
self._proxy_dict = dict.fromkeys(my_proxies, True)
#property
#contextlib.contextmanager
def proxies(self):
"""
Context manager that yields a random proxy from the list of available proxies.
:return: dict[str, str] - A random proxy.
"""
proxy = None
with self._proxy_lock:
while not proxy:
if available := [att for att, value in self._proxy_dict.items() if value]:
proxy = random.choice(available)
self._proxy_dict[proxy] = False
else:
print('Waiting ... no proxies available')
time.sleep(.2)
yield proxy
self._proxy_dict[proxy] = True # Return the proxy to the list of available proxies
# --------------------------------------------------------------------------- #
available_proxies = AvailableProxiesManager()
def main():
while True:
with available_proxies.proxies as proxy:
response = requests.get('https://httpbin.org/ip', proxies={'https': proxy})
if response.status_code == 403:
print('Lets put proxy on cooldown for 10 minutes and try with new one!')
time.sleep(120)
if __name__ == '__main__':
threads = []
for i in range(50):
t = threading.Thread(target=main)
threads.append(t)
t.start()
time.sleep(1)
However my problem is that currently for every while True that is going on, it uses a new random proxy and instead what I am trying to achieve is that I want the same proxy to be used in the same thread until the response status is 403. That means that in the beginning if thread-1 gets the proxy: http://140.99.107.100:2100 then it should be used in thread-1 until it gets 403.
My question is, how can I be able to make the same proxy to be used until it hits response 403?
Expect:
Proxy to be the same until 403
Actual:
New proxy for every GET requests
What if you stop using a context manager,
(remove #contextlib.contextmanager)
and do something like this:
def main():
proxy = next(available_proxies.proxies)
while True:
response = requests.get('https://httpbin.org/ip', proxies={'https': proxy})
if response.status_code == 403:
proxy = next(available_proxies.proxies)
time.sleep(120)
Hope that helps, good luck !
Related
I discover FastAPI for the first time with which I try to discuss 2 services.
When a service asks "how are you?" the other service replies "Good!", and vice versa. The same requests are therefore made in both directions, they are crossed.... And this is repeated after a certain delay.
Here is my code for a service (it's the same code for the 2 services).
It works well, except when the delay is too short!
from fastapi import FastAPI
import requests
import uvicorn
import asyncio
app = FastAPI()
def get(url, headers, json_data=False):
if json_data:
r = requests.get(url, headers=headers, json=json_data)
else:
r = requests.get(url, headers=headers)
return r
#app.get("/")
async def how_are_you():
message = f"{service_name}: Good!"
print(message)
return message
async def toc_toc(message):
await asyncio.sleep(5)
url = f"http://{remote_host}:{remote_port}/"
headers = {
'accept': 'application/json',
}
count = 0
while True:
await asyncio.sleep(waiting)
print(f"{count}\t{service_name}: {message}!")
get(url, headers)
count += 1
async def run_webserver():
config = uvicorn.Config(app, host=host, port=port)
server = uvicorn.Server(config)
await server.serve()
async def main():
await asyncio.gather(
toc_toc("How are you"),
run_webserver()
)
Here are the input variables
of my first serve:
if __name__ == "__main__":
waiting = 0.1
service_name = "service_1"
host = "0.0.0.0"
port = 82
remote_host = "0.0.0.0"
remote_port = 83
loop = asyncio.get_event_loop()
tasks = main()
loop.run_until_complete(tasks)
And those of my second service:
if __name__ == "__main__":
waiting = 0.1
service_name = "service_2"
host = "0.0.0.0"
port = 83
remote_host = "0.0.0.0"
remote_port = 82
loop = asyncio.get_event_loop()
tasks = main()
loop.run_until_complete(tasks)
If I set this delay "waiting" to 1 second (between 2 requests "how are you?"), I have no problem. Everything works perfectly for minutes at least.
On the other hand, if I reduce this variable to 0.1 seconds (or even lower), the services send each other requests and respond to each other a hundred times and then the applications seem to freeze.
So I don't have any error message that could help me to debug.
I need to be able to lower this delay to 0.001 seconds to simulate a production load peak that my services will have to be able to support.
I have the impression that the problem comes from the fact that the requests are crossed between my 2 services. Because in the case where only one service can send requests to the other (by commenting out the get(url, headers) line for service 2), it works very well even with a delay of 0.001 seconds.
Do you have any ideas ?
Thanks in advance
PS: I run my services directly on my Mac M1 but also on my local Kubernets cluster, and I have the same result.
Apologies for the long post. I am trying to subscribe to rabbitmq queue and then trying to create a worker-queue to execute tasks. This is required since the incoming on the rabbitmq would be high and the processing task on the item from the queue would take 10-15 minutes to execute each time. Hence necessitating the need for a worker-queue. Now I am trying to initiate only 4 items in the worker-queue, and register a callback method for processing the items in the queue. The expectation is that my code handles the part when all the 4 instances in the worker-queue are busy, the new incoming would be blocked until a free slot is available.
The rabbitmq piece is working well. The problem is I cannot figure out why the items from my worker-queue are not executing the task, i.e the callback is not working. In fact, the item from the worker queue gets executed only once when the program execution starts. For the rest of the time, tasks keep getting added to the worker-queue without being consumed. Would appreciate it if somebody could help out with the understanding on this one.
I am attaching the code for rabbitmqConsumer, driver, and slaveConsumer. Some information has been redacted in the code for privacy issues.
# This is the driver
#!/usr/bin/env python
import time
from rabbitmqConsumer import BasicMessageReceiver
basic_receiver_object = BasicMessageReceiver()
basic_receiver_object.declare_queue()
while True:
basic_receiver_object.consume_message()
time.sleep(2)
#This is the rabbitmqConsumer
#!/usr/bin/env python
import pika
import ssl
import json
from slaveConsumer import slave
class BasicMessageReceiver:
def __init__(self):
# SSL Context for TLS configuration of Amazon MQ for RabbitMQ
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
url = <url for the queue>
parameters = pika.URLParameters(url)
parameters.ssl_options = pika.SSLOptions(context=ssl_context)
self.connection = pika.BlockingConnection(parameters)
self.channel = self.connection.channel()
# worker-queue object
self.slave_object = slave()
self.slave_object.start_task()
def declare_queue(self, queue_name=“abc”):
print(f"Trying to declare queue inside consumer({queue_name})...")
self.channel.queue_declare(queue=queue_name, durable=True)
def close(self):
print("Closing Receiver")
self.channel.close()
self.connection.close()
def _consume_message_setup(self, queue_name):
def message_consume(ch, method, properties, body):
print(f"I am inside the message_consume")
message = json.loads(body)
self.slave_object.execute_task(message)
ch.basic_ack(delivery_tag=method.delivery_tag)
self.channel.basic_qos(prefetch_count=1)
self.channel.basic_consume(on_message_callback=message_consume,
queue=queue_name)
def consume_message(self, queue_name=“abc”):
print("I am starting the rabbitmq start_consuming")
self._consume_message_setup(queue_name)
self.channel.start_consuming()
#This is the slaveConsumer
#!/usr/bin/env python
import pika
import ssl
import json
import requests
import threading
import queue
import os
class slave:
def __init__(self):
self.job_queue = queue.Queue(maxsize=3)
self.job_item = ""
def start_task(self):
def _worker():
while True:
json_body = self.job_queue.get()
self._parse_object_from_queue(json_body)
self.job_queue.task_done()
threading.Thread(target=_worker, daemon=True).start()
def execute_task(self, obj):
print("Inside execute_task")
self.job_item = obj
self.job_queue.put(self.job_item)
# print(self.job_queue.queue)
def _parse_object_from_queue(self, json_body):
if bool(json_body[‘entity’]):
if json_body['entity'] == 'Hello':
print("Inside Slave: Hello")
elif json_body['entity'] == 'World':
print("Inside Slave: World")
self.job_queue.join()
Into:
I am working in a TCP server that receives events over TCP. For this task, I decided to use asyncio Protocol libraries (yeah, maybe I should have used Streams), the reception of events works fine.
Problem:
I need to be able to connect to the clients, so I create another "server" used to look up all my connected clients, and after finding the correct one, I use the Protocol class transport object to send a message and try to grab the response by reading a buffer variable that always has the last received message.
My problem is, after sending the message, I don't know how to wait for the response, so I always get the previous message from the buffer.
I will try to simplify the code to illustrate (please, keep in mind that this is an example, not my real code):
import asyncio
import time
CONN = set()
class ServerProtocol(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
CONN.add(self)
def data_received(self, data):
self.buffer = data
# DO OTHER STUFF
print(data)
def connection_lost(self, exc=None):
CONN.remove(self)
class ConsoleProtocol(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
# Get first value just to ilustrate
self.client = next(iter(CONN))
def data_received(self, data):
# Forward the message to the client
self.client.transport.write(data)
# wait a fraction of a second
time.sleep(0.2)
# foward the response of the client
self.transport.write(self.client.buffer)
def main():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(
loop.create_server(protocol_factory=ServerProtocol,
host='0.0.0.0',
port=6789))
loop.run_until_complete(
loop.create_server(protocol_factory=ConsoleProtocol,
host='0.0.0.0',
port=9876))
try:
loop.run_forever()
except Exception as e:
print(e)
finally:
loop.close()
if __name__ == '__main__':
main()
This is not only my first experience writing a TCP server, but is also my first experience working with parallelism. So it took me days to realize that my sleep not only would not work, but I was locking the server while it "sleeps".
Any help is welcome.
time.sleep(0.2) is blocking, should not used in async programming, which will block the whole execution, if your program runing with 100 clients, the last client will be delayed for 0.2*99 seconds, which is not what you want.
the right way is trying to let program wait 0.2s but not blocking, then other concurrent clients would not be delayed,we can use thread.
import asyncio
import time
import threading
CONN = set()
class ServerProtocol(asyncio.Protocol):
def dealy_thread(self):
time.sleep(0.2)
def connection_made(self, transport):
self.transport = transport
CONN.add(self)
def data_received(self, data):
self.buffer = data
# DO OTHER STUFF
print(data)
def connection_lost(self, exc=None):
CONN.remove(self)
class ConsoleProtocol(asyncio.Protocol):
def connection_made(self, transport):
self.transport = transport
# Get first value just to ilustrate
self.client = next(iter(CONN))
def data_received(self, data):
# Forward the message to the client
self.client.transport.write(data)
# wait a fraction of a second
thread = threading.Thread(target=self.delay_thread, args=())
thread.daemon = True
thread.start()
# foward the response of the client
self.transport.write(self.client.buffer)
The idea I have is to run a flask webapp, encapsulating mitmproxy, so if I use the proxy and perform a get request to specific URL, capture it (data + session), and replay it when initiated by flask
from flask import Flask
from mitmproxy import http
from mitmproxy.options import Options
from mitmproxy.proxy.config import ProxyConfig
from mitmproxy.proxy.server import ProxyServer
from mitmproxy.tools.dump import DumpMaster
import requests
import threading
import asyncio
proxies = {
'http': 'http://127.0.0.1:8080',
'https': 'http://127.0.0.1:8080',
}
class repeat:
def __init__(self):
self.response=''
def request(self, flow):
if 'http://dummy.com/get_details.html' in flow.request.pretty_url:
'''
STEP 1
Capture authenticated requests for replay later
'''
elif 'repeat_again' in flow.request.pretty_url:
'''
STEP 2
Repeat earlier get_details request
then save data
'''
def response(self, flow: http.HTTPFlow):
'''
return ddynamic details
save the get_details data into class variable self.response
'''
addons=[repeat()]
app = Flask("proxapp")
#app.route('/get_again')
def get_again():
requests.get('repeat_again',proxies=proxies)
return repeat.response
'''
cause a repeat request
'''
def loop_in_thread(loop, m):
asyncio.set_event_loop(loop)
m.run_loop(loop.run_forever)
if __name__ == "__main__":
options = Options(listen_host='0.0.0.0', listen_port=8080, http2=True)
m = DumpMaster(options, with_termlog=True, with_dumper=True)
config = ProxyConfig(options)
m.server = ProxyServer(config)
m.addons.add(addons)
# run mitmproxy in backgroud, especially integrated with other server
loop = asyncio.get_event_loop()
t = threading.Thread( target=loop_in_thread, args=(loop,m) )
t.start()
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=int('28888'))
So here my test browser browsing dummy.com as per normal, but the moment it does get_details.html page, I like to capture the request for repeat replay (can be done with mitmdump easily). the response should be saved in a variable of the class.
So now if I like to replay the earlier request, I can call http://127.0.0.1:2888/get_again to replay the same request.
Any ideas? i can do this manually with mitmdump, but I trying to automate it
Storing replay content must be in response method. In request method, flow returns with replay_response.
from flask import Flask
from mitmproxy.options import Options
from mitmproxy.proxy.config import ProxyConfig
from mitmproxy.proxy.server import ProxyServer
from mitmproxy.tools.dump import DumpMaster
import requests
import threading
import asyncio
proxies = {
'http': 'http://127.0.0.1:8080',
'https': 'http://127.0.0.1:8080',
}
# Change replay_url -> http://dummy.com/get_details.html
replay_url = 'http://wttr.in/Innsbruck?0'
class Repeat:
def __init__(self):
self.replay_response = None
def request(self, flow):
if 'repeat_again' in flow.request.pretty_url:
flow.response = self.replay_response
def response(self, flow):
if replay_url in flow.request.pretty_url:
self.replay_response = flow.response
app = Flask("proxapp")
repeat = Repeat()
#app.route('/get_again')
def get_again():
resp = requests.get('http://repeat_again', proxies=proxies)
return (resp.text, resp.status_code, resp.headers.items())
def loop_in_thread(loop, m):
asyncio.set_event_loop(loop)
m.run_loop(loop.run_forever)
if __name__ == "__main__":
options = Options(listen_host='0.0.0.0', listen_port=8080, http2=True)
m = DumpMaster(options, with_termlog=True, with_dumper=True)
m.addons.add(repeat)
config = ProxyConfig(options)
m.server = ProxyServer(config)
# run mitmproxy in background, especially integrated with other server
loop = asyncio.get_event_loop()
t = threading.Thread(target=loop_in_thread, args=(loop, m))
t.start()
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=int('28888'))
I have tested with http://wttr.in/Innsbruck?0
Output:
$ curl --proxy http://localhost:8080 "http://wttr.in/Innsbruck?0"
Weather report: Innsbruck
\ / Partly cloudy
_ /"".-. 2..3 °C
\_( ). → 7 km/h
/(___(__) 10 km
0.0 mm
$ curl --proxy http://localhost:8080 "http://repeat_again"
Weather report: Innsbruck
\ / Partly cloudy
_ /"".-. 2..3 °C
\_( ). → 7 km/h
/(___(__) 10 km
0.0 mm
$ curl http://localhost:28888/get_again
Weather report: Innsbruck
\ / Partly cloudy
_ /"".-. 2..3 °C
\_( ). → 7 km/h
/(___(__) 10 km
0.0 mm
Iam trying to improve the performance of my application. It is a Python3.6 asyncio.Protocol based TCP server (SSL wrapped) handling a lot of requests.
It works fine and the performance is acceptable when only one connection is active, but as soon as another connection is opened, the client part of the application slows down. This is really noticeable once there are 10-15 client connection.
Is there a way to properly handle requests in parallel or should I resort to running multiple server instances?
/edit Added code
main.py
if __name__ == '__main__':
import package.server
server = package.server.TCPServer()
server.join()
package.server
import multiprocessing, asyncio, uvloop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
from package.connection import Connection
class TCPServer(multiprocessing.Process):
name = 'tcpserver'
def __init__(self, discord_queue=None):
multiprocessing.Process.__init__(self)
self.daemon = True
# some setup in here
self.start()
def run(self):
loop = uvloop.new_event_loop()
self.loop = loop
# db setup, etc
server = loop.create_server(Connection, HOST, PORT, ssl=SSL_CONTEXT)
loop.run_until_complete(server)
loop.run_forever()
package.connection
import asyncio, hashlib, os
from time import sleep, time as timestamp
class Connection(asyncio.Protocol):
connections = {}
def setup(self, peer):
self.peer = peer
self.ip, self.port = self.peer[0], self.peer[1]
self.buffer = []
#property
def connection_id(self):
if not hasattr(self, '_connection_id'):
self._connection_id = hashlib.md5('{}{}{}'.format(self.ip, self.port, timestamp()).encode('utf-8')).hexdigest()
return self._connection_id
def connection_lost(self, exception):
del Connection.connections[self.connection_id]
def connection_made(self, transport):
self.transport = transport
self.setup(transport.get_extra_info('peername'))
Connection.connections[self.connection_id] = self
def data_received(self, data):
# processing, average server side execution time is around 30ms
sleep(0.030)
self.transport.write(os.urandom(64))
The application runs on Debian 9.9 and is started via systemd
To "benchmark" I use this script:
import os, socket
from multiprocessing import Pool
from time import time as timestamp
def foobar(i):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(('127.0.0.1', 60000))
while True:
ms = timestamp()*1000
s.send(os.urandom(128))
s.recv(1024*2)
print(i, timestamp()*1000-ms)
if __name__ == '__main__':
instances = 4
with Pool(instances) as p:
print(p.map(foobar, range(0, instances)))
To answer my own question here. I went with a solution that spawned multiple instances which were listening on base_port + x and I put a nginx TCP loadbalancer in front of it.
The individual TCPServer instances are still spawned as own process and communicate among themselves via a separate UDP connection and with the main process via multiprocessing.Queue.
While this does not "fix" the problem, it provides a somewhat scalable solution for my very specific problem.