How to push post parameter into scrapy-redis - python-3.x

I have a post request like
def start_requests(self):
yield FormRequest(url,formdata={'id': "parameter from redis"})
Can I use redis-cli lpush to save post parameter and that my crawler run it?

By default the scrapy-redis queue working only with url as messages.
One message = one url. But you can modify this behavior.
For example you can use some object for your messages/requests:
class ScheduledRequest:
def __init__(self, url, method, body)
self.url = url
self.method = method
self.body = body
Pass it to queue as json encoded dic:
redis.lpush(
queue_key,
json.dumps(
ScheduledRequest(
url='http://google.com',
method='POST',
body='some body data ...'
).__dict__
)
)
And rewrite the make_request_from_data and schedule_next_requests methods:
class MySpiderBase(RedisCrawlSpider, scrapy.Spider):
def __init__(self, *args, **kwargs):
super(MySpiderBase, self).__init__(*args, **kwargs)
def make_request_from_data(self, data):
scheduled = ScheduledRequest(
**json.loads(
bytes_to_str(data, self.redis_encoding)
)
)
# here you can use and FormRequest
return scrapy.Request(url=scheduled.url, method=scheduled.method, body=scheduled.body)
def schedule_next_requests(self):
for request in self.next_requests():
self.crawler.engine.crawl(request, spider=self)
def parse(self, response):
pass

Related

count successful and unsuccessful post requests for asynchronous post call/request

I need help in implementing the logic to count number of successful post calls which are asynchronous in nature (status_code=200) as well as failed_calls (status_code != 200)
I am new to coroutines. Would appreciate if someone can suggest a better way of making a post asynchronous call which can be retried, polled for status, and that can emit metrics for successful post requests as well.
Following is my code:
asyncio.get_event_loop().run_in_executor(
None,
self.publish_actual,
event_name,
custom_payload,
event_message_params,
)
which calls publish_actual:
def publish_actual(
self,
event_name: str,
custom_payload={},
event_message_params=[],
):
"""Submits a post request using the request library
:param event_name: name of the event
:type event_name: str
:param key: key for a particular application
:param custom_payload: custom_payload, defaults to {}
:type custom_payload: dict, optional
:param event_message_params: event_message_params, defaults to []
:type event_message_params: list, optional
"""
json_data = {}
path = f"/some/path"
self.request(path, "POST", json=json_data)
which calls following request function
def request(self, api_path, method="GET", **kwargs):
try:
self._validate_configuration()
headers = {}
api_endpoint = self.service_uri.to_url(api_path)
logger.debug(api_endpoint)
if "headers" in kwargs and kwargs["headers"]:
headers.update(kwargs["headers"])
headers = {"Content-Type": "application/json"}
begin = datetime.now()
def build_success_metrics(response, *args, **kwargs):
tags = {
"name": "success_metrics",
"domain": api_endpoint,
"status_code": 200,
}
build_metrics(tags)
def check_for_errors(response, *args, **kwargs):
response.raise_for_status()
response = self.session.request(
method=method,
url=api_endpoint,
headers=headers,
timeout=self.timeout,
hooks={"response": [build_success_metrics, check_for_errors]},
**kwargs,
)
end = datetime.now()
logger.debug(
f"'{method}' request against endpoint '{api_endpoint}' took {round((end - begin).total_seconds() * 1000, 3)} ms"
)
logger.debug(f"response: {response}")
except RequestException as e:
tags = {
"name": "error_metrics",
"domain": api_endpoint,
"exception_class": e.__class__.__name__,
}
build_metrics(tags)
return f"Exception occured: {e}"
Let me know if anything else is required from my end to explain what exactly I have done and what I am trying to achieve.
There is not much await and async in your example so I've just addressed the counting part of your question in general terms in asyncio. asyncio.Queue is good for this because you can separate out the counting from the cause quite simply.
import asyncio
import aiohttp
class Count():
def __init__(self, queue: asyncio.Queue):
self.queue = queue
self.good = 0
self.bad = 0
async def count(self):
while True:
result = await self.queue.get()
if result == 'Exit':
return
if result == 200:
self.good += 1
else:
self.bad += 1
async def request(q: asyncio.Queue):
async with aiohttp.ClientSession() as session:
for _ in range(5): # just poll 30 times in this instance
await asyncio.sleep(0.1)
async with session.get(
'https://httpbin.org/status/200%2C500', ssl=False
) as response:
q.put_nowait(response.status)
q.put_nowait('Exit')
async def main():
q = asyncio.Queue()
cnt = Count(q)
tasks = [cnt.count(), request(q)]
await asyncio.gather(*[asyncio.create_task(t) for t in tasks])
print(cnt.good, cnt.bad)
if __name__ == "__main__":
asyncio.run(main())
Output is random given httpbin response. Should add to 5.
4 1

How do I apply Django middleware everywhere except for a single path?

I'm using Python 3.9 with Django 3. I have defined this middleware ...
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'corsheaders.middleware.CorsMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'directory.middleware.extend_token_response.ExtendTokenResponse'
]
However, I don't want the middleware to apply to a certain URL. I have hard-coded this in the middleware like so
class ExtendTokenResponse:
def __init__(self, get_response):
self.get_response = get_response
# One-time configuration and initialization.
def __call__(self, request):
response = self.get_response(request)
if request.path != '/' + LOGOUT_PATH:
# Code to be executed for each request before
# the view (and later middleware) are called.
is_expired = True
try:
token = request.auth
print("req path: %s" % request.path)
is_expired = is_token_expired(token) if token else True
except Exception as err:
print(err)
if not is_expired:
but this seems a little sloppy and I would think the middleware comes with somethign out of the box to configure that this wouldn't need to be applied to my "/logout" path. Is there a more elegant way to configure this?
Edit: In response to Bernhard Vallant's answer, I changed my middleware to the below
def token_response_exempt(view_func):
# Set an attribute on the function to mark it as exempt
def wrapped_view(*args, **kwargs):
return view_func(*args, **kwargs)
wrapped_view.token_response_exempt = True
return wraps(view_func)(wrapped_view)
class ExtendTokenResponse:
def init(self, get_response):
self.get_response = get_response
# One-time configuration and initialization.
def process_view(self, request, view_func, view_args, view_kwargs):
print("in process view method ...\n")
if getattr(view_func, "token_response_exempt", False):
print("returning none ...\n")
return None
# Code to be executed for each request before
# the view (and later middleware) are called.
is_expired = True
try:
token = request.auth
print("req path: %s" % request.path)
is_expired = is_token_expired(token) if token else True
except Exception as err:
print(err)
if not is_expired:
token.delete()
new_token = Token.objects.create(user = token.user)
# Code to be executed for each request/response after
# the view is called.
print("setting new token to %s" % new_token)
request.token = new_token
def __call__(self, request):
response = self.get_response(request)
print("---- in call method ----\n")
if getattr(request, "token", None) is not None:
print("setting refresh token header = %s" % request.token)
response['Refresh-Token'] = request.token
return response
but any call to an endpoint, e.g.,
curl --header "Content-type: application/json" --data "$req" --request POST "http://localhost:8000/login"
results in no token being retrieved from the reqeust. "request.auth" generates the error
'WSGIRequest' object has no attribute 'auth'
Django itself doesn't provide a solution for this. Probably hardcoding/defining paths in your settings/middleware is fine as long it is a middleware that primarly exists for one specific project.
However if you want to mark certain views to exclude them from being processed you could use decorators in the same way Django does with the csrf_exempt decorator.
from functools import wraps
def token_response_exempt(view_func):
# Set an attribute on the function to mark it as exempt
def wrapped_view(*args, **kwargs):
return view_func(*args, **kwargs)
wrapped_view.token_response_exempt = True
return wraps(view_func)(wrapped_view)
# your middleware
class ExtendTokenResponse:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
response = self.get_response(request)
if getattr(request, "token", None) is not None:
response['Refresh-Token'] = request.token
return response
def process_view(self, request, view_func, view_args, view_kwargs):
if getattr(view_func, "token_response_exempt", False):
return None
# do your token generation here
request.token = token
And then you can use decorator like the following:
# urls.py
urlpatterns = [
path('logout/', token_response_exempt(LogOutView.as_view())),
]
About your case, I have 2 recommendations below:
Method 1: use process_view and define a list func will be excluded with structure "app.module.func" and check to skip in process_view
# In settings.py
EXCLUDE_FROM_MY_MIDDLEWARE =set({'custom_app.views.About'})
# In middlewares.py
class ExtendTokenResponse:
def __init__(self, get_response):
self.get_response = get_response
# One-time configuration and initialization.
def __call__(self, request):
# Code to be executed for each request before
# the view (and later middleware) are called.
logger.info(f'request hit request {request}')
response = self.get_response(request)
# Code to be executed for each request/response after
# the view is called.
return response
def process_view(self, request, view_func, view_args, view_kwargs):
view_function = '.'.join((view_func.__module__, view_func.__name__))
exclusion_set=getattr(settings,'EXCLUDE_FROM_MY_MIDDLEWARE',set() )
if view_function in exclusion_set:
return None
Method 2: Use decorator_from_middleware and apply middleware to each function needed it.
from django.utils.decorators import decorator_from_middleware
# with function view
#decorator_from_middleware(ExtendTokenResponse)
def view_function(request):
...
#with class view
class SimpleMiddlewareMixin:
#decorator_from_middleware(ExtendTokenResponse)
def dispatch(*args, **kwargs):
return super().dispatch(*args, **kwargs)
class MyClassBasedView(SimpleMiddlewareMixin, ListView):

Django Redirect If Authenticated Mixin

i want to create a mixin to be redirect user to a specified page if they're already authenticated. i want to be able to use this mixin in different parts of the application without having to rewrite the logic over and over again.
i get a accounts.views.view didn't return an HttpResponse object. It returned None instead. error if the user is not authenticated but it works if user is authenticated. accounts is the app_name
here's my code in mixin.py
class RedirectIfAuthenticatedMixin:
"""
RedirectIfAuthenticatedMixin: redirect authenticated user to different page via redirect_to parameter
"""
redirect_to = None
def get(self, request):
"""
Get request handler to check if user is already authenticated
then redirect user to specified url with redirect_to
"""
if request.user.is_authenticated:
return HttpResponseRedirect(self.get_redirect_url())
# return ??? <- WHAT TO WRITE HERE TO ALLOW REQUEST TO CONTINUE EXECUTION
def get_redirect_url(self):
"""
Get the specified redirect_to url
"""
if not self.redirect_to:
raise ImproperlyConfigured('no url to redirect to. please specify a redirect url')
return str(self.redirect_to)
it works when i add this to the view itself
class RegisterView(RedirectIfAuthenticatedMixin, FormView):
"""
RegisterView: form view to handle user registration
"""
template_name = 'registration/register.html'
success_url = reverse_lazy('accounts:activation-sent')
form_class = RegistrationForm
def form_valid(self, form):
"""
Method to handle form submission and validation
"""
# save user information
user = form.save(commit = False)
user.email = form.cleaned_data['email']
user.set_password(form.cleaned_data['password'])
user.is_active = False
user.save()
# email configuration/compose to send to user
current_site = get_current_site(self.request)
subject = 'Activate Your Padumba Account'
message = render_to_string('registration/account_activation_email.html', {
'user': user,
'domain': current_site.domain,
'uid': urlsafe_base64_encode(force_bytes(user.pk)),
'token': account_activation_token.make_token(user),
})
# send the account confirmation email to the user
user.email_user(subject = subject, message = message)
# send a flash message to the user
messages.success(self.request, ('Check Your Email For Account Activation Link'))
return super().form_valid(form)
def get(self, request, *args, **kwargs):
if request.user.is_authenticated:
return HttpResponseRedirect(reverse_lazy('accounts:index'))
return super(RegisterView, self).get(request, *args, **kwargs)
The logic will not be evaluated if there is an override of the get method for that view. You probably better override the dispatch method. This will also prevent making POST, PUT, PATCH, DELETE, etc. requests to the view:
class RedirectIfAuthenticatedMixin:
redirect_to = None
def get_redirect_url(self):
if not self.redirect_to:
raise ImproperlyConfigured('no url to redirect to. please specify a redirect url')
return str(self.redirect_to)
def dispatch(self, request, *args, **kwargs):
if request.user.is_authenticated:
return HttpResponseRedirect(self.get_redirect_url())
return super().dispatch(request, *args, **kwargs)
But you can actually make this a special case of the UserPassesTestMixin mixin [Django-doc]:
from django.contrib.auth.mixins import UserPassesTestMixin
class RedirectIfAuthenticatedMixin(UserPassesTestMixin):
redirect_to = None
def get_redirect_url(self):
if not self.redirect_to:
raise ImproperlyConfigured('no url to redirect to. please specify a redirect url')
return str(self.redirect_to)
def handle_no_permission(self):
return HttpResponseRedirect(self.get_redirect_url())
def test_func(self):
return not self.request.user.is_authenticated
#coder-six
I think implementation of such class will require you handle the main request
in implementing class i.e. implement also get method in the child class and
then from there call the get method of the of your mixin class. To make things easier you can change the RedirectIfAuthenticatedMixin get method to redirect_if_authenticated.
You will have to then call this method in every child class where you want this functionality.
Example:
class RedirectIfAuthenticatedMixin:
"""
RedirectIfAuthenticatedMixin: redirect authenticated user to different page via redirect_to parameter
"""
redirect_to = None
def redirect_if_authenticated(self, request):
"""
Get request handler to check if user is already authenticated
then redirect user to specified url with redirect_to
"""
if request.user.is_authenticated:
return HttpResponseRedirect(self.get_redirect_url())
#other methods of class here ....
class RegisterView(FormView, RedirectIfAuthenticatedMixin):
"""
RegisterView: form view to handle user registration
"""
def form_valid(self, form):
"""
Method to handle form submission and validation
"""
pass
# THIS WORKS,...
def get(self, request, *args, **kwargs):
self.redirect_if_authenticated(request)
return #continue your request here if not authenticated
But I also think using a decorator might make things eaisier for you.
Decorator:
def redirect_on_authenticated(func):
def wrapper(self, request, *args, **kwargs):
if request.user.is_authenticated:
return HttpResponseRedirect(self.get_redirect_url())
else:
return func(self, request, *args, **kwargs)
return wrapper
class RegisterView(FormView, RedirectIfAuthenticatedMixin):
"""
RegisterView: form view to handle user registration
"""
def form_valid(self, form):
"""
Method to handle form submission and validation
"""
pass
#redirect_on_authenticated
def get(self, request, *args, **kwargs):
self.redirect_if_authenticated(request)
return #continue your request here if not authenticated

Add variable to Django request object in Middleware (once)

class CustomMiddleware:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
response = self.get_response(request)
request.variable = 1
response = self.get_response(request)
return response
This works but it it processing the request twice. I am unsure of how to set this variable after the view has been processed (for every view), only once. process_template_response is not a valid option because it will not work with every view. Is there a better way to do this?

i cant access to post data in get from django

I need read 'dataset' in get function for showing amount and name in the template but I can't access to 'dataset' in get function
class Port(View):
def post(self, request, pid):
session = requests.Session()
response = session.get("http://localhost:8001/pay/" + str(pid))
if response.status_code is 200:
try:
dataset = json.loads(request.body.decode('utf-8'))
print("###",data['amount'],' - ',data['name'],' - ',pid)
return dataset
except ConnectionError as ce:
print(ce)
return HttpResponse("Json Parse Error")
return dataset
else:
return HttpResponse("* wrong request *")
def get(self, request, pid):
context = {
'amount' : dataset['amount'],
'firstname' : dataset['name'],
'order_id' : pid,
}
return render(request, 'index.html',context)
I believe dataset is initialized as an attribute in Port class. Use self.dataset

Resources