OneAtATime

  • Thread starter Lawrence D'Oliveiro
  • Start date
L

Lawrence D'Oliveiro

#!/usr/bin/python
#+
# This script ensures there is only one instance of a particular command
# running at a time. It may be useful, for example, for potentially
# lengthy cron tasks, to ensure that a new task is not started if the
# previous one has not finished yet.
#
# Invoke this script from the command line as follows:
#
# OneAtATime [opts] cmd
#
# where cmd is the Shell command line to execute. Valid options are:
#
# --id=s
# use the string s as the unique identifier for this command
# (should not contain "/" or nul characters). Any other execution
# of OneAtATime specifying the same id will not proceed as long
as
# this instance is still executing.
# If omitted, the id is computed as a hash on the cmd string.
#
# --timeout=n
# If an existing instance has been executing for at least n
# seconds, assume it's taking too long and try to kill it
# before proceeding. If omitted, the timeout is infinity.
#
# --verbose
# increases the verbosity level by 1 each time it is specified.
# The initial default verbosity level is 0.
#
# --wait
# If specified, then this invocation should sleep until any
# previous instance is finished (or until the timeout elapses,
# if specified). If not specified, then this invocation will
# simply exit if an existing instance is already running.
# Note that, if more than one invocation is waiting on a
# previous instance to complete, there is no guarantee in
# what order they'll execute.
#
# Start of development 2006 March 23 by Lawrence D'Oliveiro
# <[email protected]>.
# First working version 2006 March 24.
# Fix hang if previous process dies without freeing instance lock and
# --timeout was not specified 2006 March 24.
# Ensure lock-breaking recovery is still invoked even without --wait,
# include effective user ID in instance lock name for uniqueness
# 2006 March 25.
# Last modified 2006 March 27.
#-

import sys
import os
import time
import errno
import signal
import getopt
import sha

SignalName = {}
for Name in dir(signal) :
# collect symbolic names for all signals
Value = getattr(signal, Name)
if Name.startswith("SIG") and type(Value) == int :
SignalName[Value] = Name
#end if
#end for

#+
# Mainline
#-

(Opts, Args) = getopt.getopt \
(
sys.argv[1:],
"",
["id=", "timeout=", "verbose", "wait"]
)
if len(Args) != 1 :
raise getopt.GetoptError("need exactly one arg, the cmd to execute")
#end if
Cmd = Args[0]
ID = None
Timeout = None
Wait = False
Verbosity = 0
for Keyword, Value in Opts :
if Keyword == "--id" :
ID = Value
elif Keyword == "--timeout" :
Timeout = int(Value)
elif Keyword == "--verbose" :
Verbosity += 1
elif Keyword == "--wait" :
Wait = True
#end if
#end for
if ID == None :
ID = sha.new(Cmd).hexdigest()[:16]
# less than 40-char file name should be OK
#end if

def Debug(Level, Msg) :
if Verbosity >= Level :
print Msg
#end if
#end Debug

PidFileName = "/tmp/OneAtATime-pid-%d" % os.getpid()
PidFile = file(PidFileName, "w")
PidFile.write("%s\n" % os.getpid())
PidFile.close()
InstanceLockName = "/tmp/OneAtATime-lock-%d-%s" % (os.geteuid(), ID)
# presence of this file is used to guard against more than one
# invocation of a task with the same id at once
InstanceLock2Name = "/tmp/OneAtaTime-lock2-%s" % ID
# presence of this file is used to serialize checking for
# presence of InstanceLockName and of the process whose PID is
# contained in that file
GotLock = False
GotLock2 = False
LastLastMod = None
OtherPid = None
while True :
# try to get instance lock
if not GotLock2 :
while True :
# grab the secondary lock for checking the instance lock
try :
os.symlink(PidFileName, InstanceLock2Name)
# guaranteed atomic operation that will fail if
# destination name already exists. Note use of
# symlink rather than link because my pid file
# might be older than secondary lock timeout
# if I've been waiting a while.
GotLock2 = True
break
except OSError, (ErrNo, Msg) :
if ErrNo != errno.EEXIST :
# Not bothering to recover from privilege failures
raise OSError(ErrNo, Msg)
#end if
#end try
# Somebody else has the lock, check it looks reasonable
try :
LastMod = os.lstat(InstanceLock2Name).st_mtime
# note use of lstat to find out how long symlink has
# been present, not age of pid file
except OSError, (ErrNo, Msg) :
if ErrNo == errno.ENOENT :
LastMod = None
else :
# Not bothering to recover from privilege failures
raise OSError(ErrNo, Msg)
#end if
#end try
if LastMod != None :
if LastMod + 10 < time.time() :
Debug(0, "Breaking secondary lock")
try :
os.unlink(InstanceLock2Name)
except OSError, (ErrNo, Msg) :
if ErrNo != errno.ENOENT :
# Not bothering to recover from privilege
failures
raise OSError(ErrNo, Msg)
#end if
#end try
else :
# lock looks valid, wait awhile and try again
time.sleep(1)
#end if
else :
pass # disappeared while I was looking at it
#end if
#end while
#end if not GotLock2
# At this point, I have the secondary lock, so nobody else will be
# trying the following operations at the same time.
try :
LastMod = os.stat(InstanceLockName).st_mtime
except OSError, (ErrNo, Msg) :
if ErrNo == errno.ENOENT :
os.link(PidFileName, InstanceLockName)
GotLock = True
else :
# Not bothering to recover from privilege failures
raise OSError(ErrNo, Msg)
#end if
#end try
if GotLock :
Debug(2, "Got the lock")
break
#end if
if LastMod != LastLastMod :
# instance lock file seems to have changed, (re)fetch OtherPid.
Debug(2, "Getting existing process ID")
try :
PidFile = file(InstanceLockName, "r")
except OSError, (ErrNo, Msg) :
if ErrNo == errno.ENOENT :
PidFile = None
else :
# Not bothering to recover from privilege failures
raise OSError(ErrNo, Msg)
#end if
#end try
if PidFile != None :
OtherPid = PidFile.readline()
OtherPid = int(OtherPid.replace("\n", "").replace(" ", ""))
# not bothering to guard against bad pidfile contents
PidFile.close()
Debug(1, "Existing process ID is %d" % OtherPid)
else :
OtherPid = None
#end if
LastLastMod = LastMod
#end if
if OtherPid != None :
# check the process still exists
try :
os.kill(OtherPid, 0)
except OSError, (ErrNo, Msg) :
if ErrNo == errno.ESRCH :
OtherPid = None # nope, it's gone
else :
# Not bothering to recover from privilege failures
raise OSError(ErrNo, Msg)
#end if
#end try
#end if
BreakLock = \
OtherPid == None \
or \
Timeout != None and time.time() > LastMod + Timeout
if BreakLock :
Debug(0, "Breaking lock and killing existing lock owner")
if OtherPid != None :
ExtremePrejudice = False
while True :
if ExtremePrejudice :
Signal = signal.SIGKILL
else :
Signal = signal.SIGTERM # give it a chance to clean
up
#end if
try :
os.kill(OtherPid, Signal)
except OSError, (ErrNo, Msg) :
if ErrNo == errno.ESRCH :
break # OK, it's gone
else :
# Not bothering to recover from privilege
failures
raise OSError(ErrNo, Msg)
#end if
#end try
if ExtremePrejudice :
break # it should be gone
StartWait = time.time()
while True :
# wait up to a certain amount of time for process to
# terminate by itself
try :
os.kill(OtherPid, 0)
StillThere = True
except OSError, (ErrNo, Msg) :
if ErrNo == errno.ESRCH :
StillThere = False
else :
# Not bothering to recover from privilege
failures
raise OSError(ErrNo, Msg)
#end if
#end try
if not StillThere or time.time() - StartWait > 5.0 :
# Note this timeout must be less than secondary
lock timeout,
# otherwise another invocation could take
secondary lock
# away from me
break
time.sleep(1)
#end while
if not StillThere :
break # OK, it's gone
ExtremePrejudice = True # out of patience
#end while
OtherPid = None
#end if
# Lock owner killed, now clean up lock file (if owner
# didn't already clean it up) so I can grab it for myself
try :
os.unlink(InstanceLockName)
except OSError, (ErrNo, Msg) :
if ErrNo != errno.ENOENT :
# Not bothering to recover from privilege failures
raise OSError(ErrNo, Msg)
#end if
#end try
elif OtherPid != None :
if GotLock2 :
# mustn't hang on to secondary lock for too long
os.unlink(InstanceLock2Name)
GotLock2 = False
#end if
if not Wait :
Debug(1, "Can't get lock, exiting")
break
#end if
time.sleep(1)
#end if
#end while
if GotLock2 :
os.unlink(InstanceLock2Name)
#end if
os.unlink(PidFileName)
if GotLock :
Debug(2, "Spawning \"%s\"" % Cmd)
ChildPid = os.spawnl(os.P_NOWAIT, "/bin/bash", "/bin/bash", "-c",
Cmd)
if Timeout != None :
StartWait = time.time()
while True :
time.sleep(5)
(ChildPidAgain, ChildStatus) = os.waitpid(ChildPid,
os.WNOHANG)
if ChildPidAgain != 0 :
break
if time.time() > StartWait + Timeout :
Debug(0, "Command taking too long, killing")
os.kill(ChildPid, signal.SIGKILL)
ChildStatus = os.waitpid(ChildPid, 0)[1]
break
#end if
#end while
else :
ChildStatus = os.waitpid(ChildPid, 0)[1]
#end if
if os.WIFEXITED(ChildStatus) :
if os.WEXITSTATUS(ChildStatus) != 0 :
Debug(1, "Child exited with status %d" %
os.WEXITSTATUS(ChildStatus))
#end if
elif os.WIFSIGNALED(ChildStatus) :
SigValue = os.WTERMSIG(ChildStatus)
if SignalName.has_key(SigValue) :
SigName = SignalName[SigValue]
else :
SigName = "?"
#end if
Debug(0, "Child died from signal %d (%s)" % (SigValue, SigName))
else :
Debug(0, "Child ?terminated? status %d" % ChildStatus)
#end if
os.unlink(InstanceLockName)
#end if
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,982
Messages
2,570,186
Members
46,740
Latest member
JudsonFrie

Latest Threads

Top