I want to schedule a function in APScheduler to happen only once in the specified time from now. The run_date date trigger accepts datetime objects and/or strings there of:
from datetime import datetime
import os
from apscheduler.schedulers.asyncio import AsyncIOScheduler
try:
import asyncio
except ImportError:
import trollius as asyncio
def tick():
print('Tick! The time is: %s' % datetime.now())
if __name__ == '__main__':
scheduler = AsyncIOScheduler()
scheduler.add_job(tick, 'date', run_date=datetime.fromordinal(datetime.toordinal(datetime.now())+1))
scheduler.start()
print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
# Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed.
try:
asyncio.get_event_loop().run_forever()
except (KeyboardInterrupt, SystemExit):
pass
Is there a built-in functionality in APScheduler to specify the delay time directly in seconds, e.g.something like application_start_time + specified_delay? I tried datetime.fromordinal(datetime.toordinal(datetime.now())+1) as argument to run_date for 1 second after the starting point of my application, but this only hangs for ever without calling the tick function showing the following deprecation message:
Press Ctrl+C to exit
/tmp/a.py:30: DeprecationWarning: There is no current event loop
asyncio.get_event_loop().run_forever()
What you probably wanted was:
from datetime import datetime, timedelta, timezone
scheduler.add_job(tick, "date", run_date=datetime.now(timezone.utc) + timedelta(seconds=1))
I'm trying to schedule a repeating event to run every minute in Python 3.
I've seen class sched.scheduler but I'm wondering if there's another way to do it. I've heard mentions I could use multiple threads for this, which I wouldn't mind doing.
I'm basically requesting some JSON and then parsing it; its value changes over time.
To use sched.scheduler I have to create a loop to request it to schedule the even to run for one hour:
scheduler = sched.scheduler(time.time, time.sleep)
# Schedule the event. THIS IS UGLY!
for i in range(60):
scheduler.enter(3600 * i, 1, query_rate_limit, ())
scheduler.run()
What other ways to do this are there?
You could use threading.Timer, but that also schedules a one-off event, similarly to the .enter method of scheduler objects.
The normal pattern (in any language) to transform a one-off scheduler into a periodic scheduler is to have each event re-schedule itself at the specified interval. For example, with sched, I would not use a loop like you're doing, but rather something like:
def periodic(scheduler, interval, action, actionargs=()):
scheduler.enter(interval, 1, periodic,
(scheduler, interval, action, actionargs))
action(*actionargs)
and initiate the whole "forever periodic schedule" with a call
periodic(scheduler, 3600, query_rate_limit)
Or, I could use threading.Timer instead of scheduler.enter, but the pattern's quite similar.
If you need a more refined variation (e.g., stop the periodic rescheduling at a given time or upon certain conditions), that's not too hard to accomodate with a few extra parameters.
You could use schedule. It works on Python 2.7 and 3.3 and is rather lightweight:
import schedule
import time
def job():
print("I'm working...")
schedule.every(10).minutes.do(job)
schedule.every().hour.do(job)
schedule.every().day.at("10:30").do(job)
while 1:
schedule.run_pending()
time.sleep(1)
My humble take on the subject:
from threading import Timer
class RepeatedTimer(object):
def __init__(self, interval, function, *args, **kwargs):
self._timer = None
self.function = function
self.interval = interval
self.args = args
self.kwargs = kwargs
self.is_running = False
self.start()
def _run(self):
self.is_running = False
self.start()
self.function(*self.args, **self.kwargs)
def start(self):
if not self.is_running:
self._timer = Timer(self.interval, self._run)
self._timer.start()
self.is_running = True
def stop(self):
self._timer.cancel()
self.is_running = False
Usage:
from time import sleep
def hello(name):
print "Hello %s!" % name
print "starting..."
rt = RepeatedTimer(1, hello, "World") # it auto-starts, no need of rt.start()
try:
sleep(5) # your long-running job goes here...
finally:
rt.stop() # better in a try/finally block to make sure the program ends!
Features:
Standard library only, no external dependencies
Uses the pattern suggested by Alex Martnelli
start() and stop() are safe to call multiple times even if the timer has already started/stopped
function to be called can have positional and named arguments
You can change interval anytime, it will be effective after next run. Same for args, kwargs and even function!
Based on MestreLion answer, it solve a little problem with multithreading:
from threading import Timer, Lock
class Periodic(object):
"""
A periodic task running in threading.Timers
"""
def __init__(self, interval, function, *args, **kwargs):
self._lock = Lock()
self._timer = None
self.function = function
self.interval = interval
self.args = args
self.kwargs = kwargs
self._stopped = True
if kwargs.pop('autostart', True):
self.start()
def start(self, from_run=False):
self._lock.acquire()
if from_run or self._stopped:
self._stopped = False
self._timer = Timer(self.interval, self._run)
self._timer.start()
self._lock.release()
def _run(self):
self.start(from_run=True)
self.function(*self.args, **self.kwargs)
def stop(self):
self._lock.acquire()
self._stopped = True
self._timer.cancel()
self._lock.release()
You could use the Advanced Python Scheduler. It even has a cron-like interface.
Use Celery.
from celery.task import PeriodicTask
from datetime import timedelta
class ProcessClicksTask(PeriodicTask):
run_every = timedelta(minutes=30)
def run(self, **kwargs):
#do something
Based on Alex Martelli's answer, I have implemented decorator version which is more easier to integrated.
import sched
import time
import datetime
from functools import wraps
from threading import Thread
def async(func):
#wraps(func)
def async_func(*args, **kwargs):
func_hl = Thread(target=func, args=args, kwargs=kwargs)
func_hl.start()
return func_hl
return async_func
def schedule(interval):
def decorator(func):
def periodic(scheduler, interval, action, actionargs=()):
scheduler.enter(interval, 1, periodic,
(scheduler, interval, action, actionargs))
action(*actionargs)
#wraps(func)
def wrap(*args, **kwargs):
scheduler = sched.scheduler(time.time, time.sleep)
periodic(scheduler, interval, func)
scheduler.run()
return wrap
return decorator
#async
#schedule(1)
def periodic_event():
print(datetime.datetime.now())
if __name__ == '__main__':
print('start')
periodic_event()
print('end')
Doc: Advanced Python Scheduler
#sched.cron_schedule(day='last sun')
def some_decorated_task():
print("I am printed at 00:00:00 on the last Sunday of every month!")
Available fields:
| Field | Description |
|-------------|----------------------------------------------------------------|
| year | 4-digit year number |
| month | month number (1-12) |
| day | day of the month (1-31) |
| week | ISO week number (1-53) |
| day_of_week | number or name of weekday (0-6 or mon,tue,wed,thu,fri,sat,sun) |
| hour | hour (0-23) |
| minute | minute (0-59) |
| second | second (0-59) |
Here's a quick and dirty non-blocking loop with Thread:
#!/usr/bin/env python3
import threading,time
def worker():
print(time.time())
time.sleep(5)
t = threading.Thread(target=worker)
t.start()
threads = []
t = threading.Thread(target=worker)
threads.append(t)
t.start()
time.sleep(7)
print("Hello World")
There's nothing particularly special, the worker creates a new thread of itself with a delay. Might not be most efficient, but simple enough. northtree's answer would be the way to go if you need more sophisticated solution.
And based on this, we can do the same, just with Timer:
#!/usr/bin/env python3
import threading,time
def hello():
t = threading.Timer(10.0, hello)
t.start()
print( "hello, world",time.time() )
t = threading.Timer(10.0, hello)
t.start()
time.sleep(12)
print("Oh,hai",time.time())
time.sleep(4)
print("How's it going?",time.time())
There is a new package, called ischedule. For this case, the solution could be as following:
from ischedule import schedule, run_loop
from datetime import timedelta
def query_rate_limit():
print("query_rate_limit")
schedule(query_rate_limit, interval=60)
run_loop(return_after=timedelta(hours=1))
Everything runs on the main thread and there is no busy waiting inside the run_loop. The startup time is very precise, usually within a fraction of a millisecond of the specified time.
Taking the original threading.Timer() class implementation and fixing the run() method I get something like:
class PeriodicTimer(Thread):
"""A periodic timer that runs indefinitely until cancel() is called."""
def __init__(self, interval, function, args=None, kwargs=None):
Thread.__init__(self)
self.interval = interval
self.function = function
self.args = args if args is not None else []
self.kwargs = kwargs if kwargs is not None else {}
self.finished = Event()
def cancel(self):
"""Stop the timer if it hasn't finished yet."""
self.finished.set()
def run(self):
"""Run until canceled"""
while not self.finished.wait(self.interval):
self.function(*self.args, **self.kwargs)
The wait() method called is using a condition variable, so it should be rather efficient.
See my sample
import sched, time
def myTask(m,n):
print n+' '+m
def periodic_queue(interval,func,args=(),priority=1):
s = sched.scheduler(time.time, time.sleep)
periodic_task(s,interval,func,args,priority)
s.run()
def periodic_task(scheduler,interval,func,args,priority):
func(*args)
scheduler.enter(interval,priority,periodic_task,
(scheduler,interval,func,args,priority))
periodic_queue(1,myTask,('world','hello'))
I ran into a similar issue a while back so I made a python module event-scheduler to address this. It has a very similar API to the sched library with a few differences:
It utilizes a background thread and is always able to accept and run jobs in the background until the scheduler is stopped explicitly (no need for a while loop).
It comes with an API to schedule recurring events at a user specified interval until explicitly cancelled.
It can be installed by pip install event-scheduler
from event_scheduler import EventScheduler
event_scheduler = EventScheduler()
event_scheduler.start()
# Schedule the recurring event to print "hello world" every 60 seconds with priority 1
# You can use the event_id to cancel the recurring event later
event_id = event_scheduler.enter_recurring(60, 1, print, ("hello world",))
i have a spider for crawling a site and i want to run it every 10 minutes. put it in python schedule and run it. after first run i got
ReactorNotRestartable
i try this sulotion and got
AttributeError: Can't pickle local object 'run_spider..f'
error.
edit:
try how-to-schedule-scrapy-crawl-execution-programmatically python program run without error and crawl function run every 30 seconds but spider doesn't run and i don't get data.
def run_spider():
def f(q):
try:
runner = crawler.CrawlerRunner()
deferred = runner.crawl(DivarSpider)
#deferred.addBoth(lambda _: reactor.stop())
#reactor.run()
q.put(None)
except Exception as e:
q.put(e)
runner = crawler.CrawlerRunner()
deferred = runner.crawl(DivarSpider)
q = Queue()
p = Process(target=f, args=(q,))
p.start()
result = q.get()
p.join()
if result is not None:
raise result
The multiprocessing solution is a gross hack to work-around lack of understanding of how Scrapy and reactor management work. You can get rid of it and everything is much simpler.
from twisted.internet.task import LoopingCall
from twisted.internet import reactor
from scrapy.crawler import CrawlRunner
from scrapy.utils.log import configure_logging
from yourlib import YourSpider
configure_logging()
runner = CrawlRunner()
task = LoopingCall(lambda: runner.crawl(YourSpider()))
task.start(60 * 10)
reactor.run()
Easiest way I know to do it is using a separate script to call the script containing your twisted reactor, like this:
cmd = ['python3', 'auto_crawl.py']
subprocess.Popen(cmd).wait()
To run your CrawlerRunner every 10 minutes, you could use a loop or crontab on this script.