-#!/usr/bin/python2
+#!/usr/bin/python3
#
# CDDL HEADER START
#
import calendar
import signal
import argparse
+import logging
+from logging.handlers import SysLogHandler
-import glib
-import gobject
+from gi.repository import GLib as glib
+from gi.repository import GObject as gobject
import dbus
import dbus.service
import dbus.mainloop
import dbus.mainloop.glib
-import dbussvc
-import zfs
-import smf
-import timeslidersmf
-import autosnapsmf
-import plugin
+from . import dbussvc
+from . import zfs
+from . import smf
+import time_slider.linux.timeslidersmf as timeslidersmf
+import time_slider.linux.autosnapsmf as autosnapsmf
+# import plugin
from time_slider.linux.rbac import RBACprofile
-import util
+from . import util
+
+import time_slider.linux.timesliderconfig as timesliderconfig
_MINUTE = 60
_HOUR = _MINUTE * 60
self._zpools = []
self._poolstatus = {}
self._destroyedsnaps = []
+ self.logger = logging.getLogger('time-slider')
# This is also checked during the refresh() method but we need
# to know it sooner for instantiation of the PluginManager
self._smf = timeslidersmf.TimeSliderSMF()
try:
self.verbose = self._smf.get_verbose()
- except RuntimeError,message:
- sys.stderr.write("Error determing whether debugging is enabled\n")
+ except RuntimeError as message:
+ self.logger.error("Error determing whether debugging is enabled")
self.verbose = False
self._dbus = dbussvc.AutoSnap(bus,
'/org/opensolaris/TimeSlider/autosnap',
self)
- self._plugin = plugin.PluginManager(self.verbose)
+ # self._plugin = plugin.PluginManager(self.verbose)
self.exitCode = smf.SMF_EXIT_OK
self.refresh()
- # Seems we're up and running OK.
+ # Seems we're up and running OK.
# Signal our parent so we can daemonise
os.kill(os.getppid(), signal.SIGUSR1)
volume.set_auto_snap(False)
except IndexError:
pass
-
+
nexttime = None
waittime = None
while True:
nexttime = self._check_snapshots()
# Overdue snapshots are already taken automatically
# inside _check_snapshots() so nexttime should never be
- # < 0. It can be None however, which is fine since it
+ # < 0. It can be None however, which is fine since it
# will cause the scheduler thread to sleep indefinitely
# or until a SIGHUP is caught.
if nexttime:
util.debug("Waiting until " + str (nexttime), self.verbose)
waittime = None
if nexttime != None:
- waittime = nexttime - long(time.time())
+ waittime = nexttime - int(time.time())
if (waittime <= 0):
# We took too long and missed a snapshot, so break out
# and catch up on it the next time through the loop
self.verbose)
self._conditionLock.wait(_MINUTE * 15)
- except OSError, message:
- sys.stderr.write("Caught OSError exception in snapshot" +
- " manager thread\n")
- sys.stderr.write("Error details:\n" + \
+ except OSError as message:
+ self.logger.error("Caught OSError exception in snapshot" +
+ " manager thread")
+ self.logger.error("Error details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
- "\n--------END ERROR MESSAGE--------\n")
+ "\n--------END ERROR MESSAGE--------")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Exit this thread
break
- except RuntimeError,message:
- sys.stderr.write("Caught RuntimeError exception in snapshot" +
- " manager thread\n")
- sys.stderr.write("Error details:\n" + \
+ except RuntimeError as message:
+ self.logger.error("Caught RuntimeError exception in snapshot" +
+ " manager thread")
+ self.logger.error("Error details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
- "\n--------END ERROR MESSAGE--------\n")
+ "\n--------END ERROR MESSAGE--------")
# Exit this thread
break
self._configure_svc_props()
self._rebuild_schedules()
self._update_schedules()
- self._plugin.refresh()
+ # self._plugin.refresh()
self._stale = False
self._refreshLock.release()
def _configure_svc_props(self):
try:
self.verbose = self._smf.get_verbose()
- except RuntimeError,message:
- sys.stderr.write("Error determing whether debugging is enabled\n")
+ except RuntimeError as message:
+ self.logger.error("Error determing whether debugging is enabled")
self.verbose = False
try:
util.debug("Critical level value is: %d%%" % crit, self.verbose)
emer = self._smf.get_cleanup_level("emergency")
util.debug("Emergency level value is: %d%%" % emer, self.verbose)
- except RuntimeError,message:
- sys.stderr.write("Failed to determine cleanup threshhold levels\n")
- sys.stderr.write("Details:\n" + \
+ except RuntimeError as message:
+ self.logger.error("Failed to determine cleanup threshhold levels")
+ self.logger.error("Details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
"\n---------END ERROR MESSAGE---------\n")
- sys.stderr.write("Using factory defaults of 80%, 90% and 95%\n")
+ self.logger.error("Using factory defaults of 80%, 90% and 95%")
#Go with defaults
#FIXME - this would be an appropriate case to mark svc as degraded
self._remedialCleanup = True
try:
self._keepEmpties = self._smf.get_keep_empties()
- except RuntimeError,message:
+ except RuntimeError as message:
# Not fatal, just assume we delete them (default configuration)
- sys.stderr.write("Can't determine whether to keep empty snapshots\n")
- sys.stderr.write("Details:\n" + \
+ self.logger.error("Can't determine whether to keep empty snapshots")
+ self.logger.error("Details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
- "\n---------END ERROR MESSAGE---------\n")
- sys.stderr.write("Assuming default value: False\n")
+ "\n---------END ERROR MESSAGE---------")
+ self.logger.error("Assuming default value: False")
self._keepEmpties = False
- # Previously, snapshot labels used the ":" character was used as a
+ # Previously, snapshot labels used the ":" character was used as a
# separator character for datestamps. Windows filesystems such as
# CIFS and FAT choke on this character so now we use a user definable
# separator value, with a default value of "_"
else:
self._zpools.append(zpool)
util.debug(str(zpool), self.verbose)
- except RuntimeError,message:
- sys.stderr.write("Could not list Zpools\n")
+ except RuntimeError as message:
+ self.logger.error("Could not list Zpools")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate exception up to thread's run() method
- raise RuntimeError,message
+ raise RuntimeError(message)
def _rebuild_schedules(self):
try:
_defaultSchedules = autosnapsmf.get_default_schedules()
_customSchedules = autosnapsmf.get_custom_schedules()
- except RuntimeError,message:
+ except RuntimeError as message:
self.exitCode = smf.SMF_EXIT_ERR_FATAL
- raise RuntimeError, "Error reading SMF schedule instances\n" + \
- "Details:\n" + str(message)
+ raise RuntimeError("Error reading SMF schedule instances\n" + \
+ "Details:\n" + str(message))
else:
# Now set it in stone.
self._defaultSchedules = tuple(_defaultSchedules)
self._customSchedules = tuple(_customSchedules)
-
+
# Build the combined schedule tuple from default + custom schedules
_defaultSchedules.extend(_customSchedules)
self._allSchedules = tuple(_defaultSchedules)
last = None
for schedule,interval,period,keep in self._allSchedules:
- # Shortcut if we've already processed this schedule and it's
+ # Shortcut if we've already processed this schedule and it's
# still up to date. Don't skip the default schedules though
# because overlap affects their scheduling
if [schedule,interval,period,keep] not in \
snaps = self._datasets.list_snapshots("%s%s" % \
(self._prefix,
schedule))
- except RuntimeError,message:
+ except RuntimeError as message:
self.exitCode = smf.SMF_EXIT_ERR_FATAL
- sys.stderr.write("Failed to list snapshots during schedule update\n")
+ self.logger.error("Failed to list snapshots during schedule update")
#Propogate up to the thread's run() method
- raise RuntimeError,message
+ raise RuntimeError(message)
if len(snaps) > 0:
util.debug("Last %s snapshot was: %s" % \
totalinterval = intervals[interval] * period
except KeyError:
self.exitCode = smf.SMF_EXIT_ERR_CONFIG
- sys.stderr.write(schedule + \
+ self.logger.error(schedule + \
" schedule has invalid interval: " + \
- "'%s\'\n" % interval)
+ "'%s\'" % interval)
#Propogate up to thread's run() method
raise RuntimeError
if [schedule,interval,period,keep] in self._defaultSchedules:
snap_tm = time.gmtime(self._last[schedule])
# Increment year if period >= than 1 calender year.
year = snap_tm.tm_year
- year += period / 12
+ year += period // 12
period = period % 12
mon = (snap_tm.tm_mon + period) % 12
mday = snap_tm.tm_mday
if dlastmon > dnewmon and snap_tm.tm_mday > dnewmon:
mday = dnewmon
-
+
tm =(year, mon, mday, \
snap_tm.tm_hour, snap_tm.tm_min, snap_tm.tm_sec, \
0, 0, -1)
def _next_due(self):
schedule = None
earliest = None
- now = long(time.time())
-
+ now = int(time.time())
+
for s,i,p,k in self._defaultSchedules:
due = self._next[s]
if due <= now:
- #Default Schedule - so break out at the first
+ #Default Schedule - so break out at the first
#schedule that is overdue. The subordinate schedules
#will re-adjust afterwards.
earliest,schedule = due,s
self._refreshLock.acquire()
next,schedule = self._next_due()
self._refreshLock.release()
- now = long(time.time())
+ now = int(time.time())
while next != None and next <= now:
label = self._take_snapshots(schedule)
- self._plugin.execute_plugins(schedule, label)
+ # self._plugin.execute_plugins(schedule, label)
self._refreshLock.acquire()
self._update_schedules()
next,schedule = self._next_due();
(schedule, dt.isoformat()), \
self.verbose)
return next
-
+
def _take_snapshots(self, schedule):
# Set the time before taking snapshot to avoid clock skew due
# to time taken to complete snapshot.
- tm = long(time.time())
+ tm = int(time.time())
label = "%s%s%s-%s" % \
(autosnapsmf.SNAPLABELPREFIX, self._separator, schedule,
datetime.datetime.now().strftime("%Y-%m-%d-%Hh%M"))
try:
self._datasets.create_auto_snapshot_set(label, tag=schedule)
- except RuntimeError, message:
+ except RuntimeError as message:
# Write an error message, set the exit code and pass it up the
# stack so the thread can terminate
- sys.stderr.write("Failed to create snapshots for schedule: %s\n" \
+ self.logger.error("Failed to create snapshots for schedule: %s" \
% (schedule))
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
- raise RuntimeError,message
+ raise RuntimeError(message)
self._last[schedule] = tm;
self._perform_purge(schedule)
return label
"""Cleans out zero sized snapshots, kind of cautiously"""
# Per schedule: We want to delete 0 sized
# snapshots but we need to keep at least one around (the most
- # recent one) for each schedule so that that overlap is
+ # recent one) for each schedule so that that overlap is
# maintained from frequent -> hourly -> daily etc.
# Start off with the smallest interval schedule first and
# move up. This increases the amount of data retained where
# Clone the list because we want to remove items from it
# while iterating through it.
remainingsnaps = snaps[:]
- except RuntimeError,message:
- sys.stderr.write("Failed to list snapshots during snapshot cleanup\n")
+ except RuntimeError as message:
+ self.logger.error("Failed to list snapshots during snapshot cleanup")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
- raise RuntimeError,message
+ raise RuntimeError(message)
if (self._keepEmpties == False):
try: # remove the newest one from the list.
for snapname in snaps:
try:
snapshot = zfs.Snapshot(snapname)
- except Exception,message:
- sys.stderr.write(str(message))
+ except Exception as message:
+ self.logger.error(str(message))
# Not fatal, just skip to the next snapshot
continue
self.verbose)
try:
snapshot.destroy()
- except RuntimeError,message:
- sys.stderr.write("Failed to destroy snapshot: " +
- snapname + "\n")
+ except RuntimeError as message:
+ self.logger.error("Failed to destroy snapshot: " +
+ snapname)
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
# Propogate exception so thread can exit
- raise RuntimeError,message
+ raise RuntimeError(message)
remainingsnaps.remove(snapname)
- except RuntimeError,message:
- sys.stderr.write("Can not determine used size of: " + \
- snapname + "\n")
+ except RuntimeError as message:
+ self.logger.error("Can not determine used size of: " + \
+ snapname)
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
#Propogate the exception to the thead run() method
- raise RuntimeError,message
+ raise RuntimeError(message)
# Deleting individual snapshots instead of recursive sets
# breaks the recursion chain and leaves child snapshots
- # dangling so we need to take care of cleaning up the
+ # dangling so we need to take care of cleaning up the
# snapshots.
target = len(remainingsnaps) - self._keep[schedule]
counter = 0
while counter < target:
util.debug("Destroy expired snapshot: " + \
- remainingsnaps[counter],
+ remainingsnaps[counter],
self.verbose)
try:
snapshot = zfs.Snapshot(remainingsnaps[counter])
- except Exception,message:
- sys.stderr.write(str(message))
+ except Exception as message:
+ self.logger.error(str(message))
# Not fatal, just skip to the next snapshot
counter += 1
continue
try:
snapshot.destroy()
- except RuntimeError,message:
- sys.stderr.write("Failed to destroy snapshot: " +
- snapshot.name + "\n")
+ except RuntimeError as message:
+ self.logger.error("Failed to destroy snapshot: " +
+ snapshot.name)
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate exception so thread can exit
- raise RuntimeError,message
+ raise RuntimeError(message)
else:
counter += 1
for name in self._datasets.list_auto_snapshot_sets(schedule):
dataset = zfs.ReadWritableDataset(name)
self._prune_snapshots(dataset, schedule)
- except RuntimeError,message:
- sys.stderr.write("Error listing datasets during " + \
- "removal of expired snapshots\n")
+ except RuntimeError as message:
+ self.logger.error("Error listing datasets during " + \
+ "removal of expired snapshots")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate up to thread's run() method
- raise RuntimeError,message
+ raise RuntimeError(message)
def _needs_cleanup(self):
if self._remedialCleanup == False:
# Sys admin has explicitly instructed for remedial cleanups
# not to be performed.
return False
- now = long(time.time())
+ now = int(time.time())
# Don't run checks any less than 15 minutes apart.
if self._cleanupLock.acquire(False) == False:
#Indicates that a cleanup is already running.
self.verbose)
self._cleanupLock.release()
return True
- except RuntimeError, message:
- sys.stderr.write("Error checking zpool capacity of: " + \
- zpool.name + "\n")
+ except RuntimeError as message:
+ self.logger.error("Error checking zpool capacity of: " + \
+ zpool.name)
self._cleanupLock.release()
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate up to thread's run() mehod.
- raise RuntimeError,message
- self._lastCleanupCheck = long(time.time())
+ raise RuntimeError(message)
+ self._lastCleanupCheck = int(time.time())
self._cleanupLock.release()
return False
self._poolstatus[zpool.name] = 4
# This also catches exceptions thrown from _run_<level>_cleanup()
# and _run_cleanup() in methods called by _perform_cleanup()
- except RuntimeError,message:
- sys.stderr.write("Remedial space cleanup failed because " + \
+ except RuntimeError as message:
+ self.logger.error("Remedial space cleanup failed because " + \
"of failure to determinecapacity of: " + \
- zpool.name + "\n")
+ zpool.name)
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self._cleanupLock.release()
# Propogate up to thread's run() method.
- raise RuntimeError,message
+ raise RuntimeError(message)
- # Bad - there's no more snapshots left and nothing
+ # Bad - there's no more snapshots left and nothing
# left to delete. We don't disable the service since
# it will permit self recovery and snapshot
# retention when space becomes available on
# Avoid needless list iteration for non-debug mode
if self.verbose == True and len(self._destroyedsnaps) > 0:
for snap in self._destroyedsnaps:
- sys.stderr.write("\t%s\n" % snap)
+ self.logger.error("\t%s" % snap)
self._cleanupLock.release()
def _run_warning_cleanup(self, zpool):
snapshots = []
try:
clonedsnaps = self._datasets.list_cloned_snapshots()
- except RuntimeError,message:
- sys.stderr.write("Error (non-fatal) listing cloned snapshots" +
- " while recovering pool capacity\n")
- sys.stderr.write("Error details:\n" + \
+ except RuntimeError as message:
+ self.logger.error("Error (non-fatal) listing cloned snapshots" +
+ " while recovering pool capacity")
+ self.logger.error("Error details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
- "\n--------END ERROR MESSAGE--------\n")
+ "\n--------END ERROR MESSAGE--------")
# Build a list of snapshots in the given schedule, that are not
# cloned, and sort the result in reverse chronological order.
% (self._prefix,schedule)) \
if not s in clonedsnaps]
snapshots.reverse()
- except RuntimeError,message:
- sys.stderr.write("Error listing snapshots" +
- " while recovering pool capacity\n")
+ except RuntimeError as message:
+ self.logger.error("Error listing snapshots" +
+ " while recovering pool capacity")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate the error up to the thread's run() method.
- raise RuntimeError,message
-
+ raise RuntimeError(message)
+
while zpool.get_capacity() > threshold:
if len(snapshots) == 0:
- syslog.syslog(syslog.LOG_NOTICE,
+ self.logger.info( \
"No more %s snapshots left" \
% schedule)
return
- """This is not an exact science. Deleteing a zero sized
+ """This is not an exact science. Deleteing a zero sized
snapshot can have unpredictable results. For example a
pair of snapshots may share exclusive reference to a large
amount of data (eg. a large core file). The usage of both
util.debug("Destroying %s" % snapname, self.verbose)
try:
snapshot.destroy()
- except RuntimeError,message:
+ except RuntimeError as message:
# Would be nice to be able to mark service as degraded here
# but it's better to try to continue on rather than to give
# up alltogether (SMF maintenance state)
- sys.stderr.write("Warning: Cleanup failed to destroy: %s\n" % \
+ self.logger.error("Warning: Cleanup failed to destroy: %s" % \
(snapshot.name))
- sys.stderr.write("Details:\n%s\n" % (str(message)))
+ self.logger.error("Details:\n%s" % (str(message)))
else:
self._destroyedsnaps.append(snapname)
# Give zfs some time to recalculate.
time.sleep(3)
-
+
def _send_to_syslog(self):
for zpool in self._zpools:
status = self._poolstatus[zpool.name]
if status == 4:
- syslog.syslog(syslog.LOG_EMERG,
- "%s is over %d%% capacity. " \
+ self.logger.critical( \
"All automatic snapshots were destroyed" \
% (zpool.name, self._emergencyLevel))
elif status == 3:
- syslog.syslog(syslog.LOG_ALERT,
+ self.logger.error( \
"%s exceeded %d%% capacity. " \
"Automatic snapshots over 1 hour old were destroyed" \
% (zpool.name, self._emergencyLevel))
elif status == 2:
- syslog.syslog(syslog.LOG_CRIT,
+ self.logger.critical( \
"%s exceeded %d%% capacity. " \
"Weekly, hourly and daily automatic snapshots were destroyed" \
- % (zpool.name, self._criticalLevel))
+ % (zpool.name, self._criticalLevel))
elif status == 1:
- syslog.syslog(syslog.LOG_WARNING,
+ self.logger.warning( \
"%s exceeded %d%% capacity. " \
"Hourly and daily automatic snapshots were destroyed" \
% (zpool.name, self._warningLevel))
if len(self._destroyedsnaps) > 0:
- syslog.syslog(syslog.LOG_NOTICE,
+ self.logger.warning( \
"%d automatic snapshots were destroyed" \
% len(self._destroyedsnaps))
def monitor_threads(snapthread):
+ logger = logging.getLogger('time-slider')
if snapthread.is_alive():
return True
else:
- sys.stderr.write("Snapshot monitor thread exited.\n")
+ logger.error("Snapshot monitor thread exited.")
if snapthread.exitCode == smf.SMF_EXIT_MON_DEGRADE:
# FIXME - it would be nicer to mark the service as degraded than
# go into maintenance state for some situations such as a
sys.exit(smf.SMF_EXIT_ERR_FATAL)
return False
else:
- sys.stderr.write("Snapshot monitor thread exited abnormally\n")
- sys.stderr.write("Exit code: %d\n" % (snapthread.exitCode))
+ logger.error("Snapshot monitor thread exited abnormally")
+ logger.error("Exit code: %d" % (snapthread.exitCode))
#subprocess.call(["/usr/sbin/svcadm", "mark", "maintenance",
# os.getenv("SMF_FMRI")])
sys.exit(smf.SMF_EXIT_ERR_FATAL)
signal.signal(signal.SIGALRM, child_sig_handler)
try:
pid = os.fork()
- except OSError, e:
- raise Exception, "%s [%d]" % (e.strerror, e.errno)
+ except OSError as e:
+ raise Exception("%s [%d]" % (e.strerror, e.errno))
if (pid == 0):
#Reset signals that we set to trap in parent
parser = argparse.ArgumentParser()
parser.add_argument('--foreground', action='store_true', help='Do not daemonize', default=False)
+ parser.add_argument('--config', '-c', type=str, help='Configuration file', default='/etc/time-slider/timesliderd.conf')
+ parser.add_argument('--configdump', action='store_true', help='Dump default values in config file format', default=False)
args, _ = parser.parse_known_args()
+ logger = logging.getLogger('time-slider')
+ logger.setLevel(logging.DEBUG)
+ if args.foreground:
+ handler = logging.StreamHandler()
+ handler.setFormatter(logging.Formatter('%(message)s'))
+ else:
+ handler = SysLogHandler(address='/dev/log')
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s: %(message)s', '%b %d %H:%M:%S time-sliderd:'))
+ handler.setLevel(logging.DEBUG)
+ logger.addHandler(handler)
+
+ if args.configdump:
+ timesliderconfig.configdump()
+ sys.exit(smf.SMF_EXIT_OK)
+
+ timesliderconfig.configfile = args.config
+
# Daemonise the service.
if not args.foreground:
create_daemon()
# The user security attributes checked are the following:
# Note that UID == 0 will match any profile search so
# no need to check it explicitly.
- syslog.openlog("time-sliderd", 0, syslog.LOG_DAEMON)
rbacp = RBACprofile()
if rbacp.has_profile("ZFS File System Management"):
mainloop.quit()
sys.exit(smf.SMF_EXIT_OK)
else:
- syslog.syslog(syslog.LOG_ERR,
+ logger.error( \
"%s has insufficient privileges to run time-sliderd!" \
% rbacp.name)
- syslog.closelog()
sys.exit(smf.SMF_EXIT_ERR_PERM)
- syslog.closelog()
sys.exit(smf.SMF_EXIT_OK)