import time_slider.linux.timeslidersmf as timeslidersmf
import time_slider.linux.autosnapsmf as autosnapsmf
# import plugin
from time_slider.linux.rbac import RBACprofile
import time_slider.linux.timeslidersmf as timeslidersmf
import time_slider.linux.autosnapsmf as autosnapsmf
# import plugin
from time_slider.linux.rbac import RBACprofile
# Signal our parent so we can daemonise
os.kill(os.getppid(), signal.SIGUSR1)
# Signal our parent so we can daemonise
os.kill(os.getppid(), signal.SIGUSR1)
nexttime = self._check_snapshots()
# Overdue snapshots are already taken automatically
# inside _check_snapshots() so nexttime should never be
nexttime = self._check_snapshots()
# Overdue snapshots are already taken automatically
# inside _check_snapshots() so nexttime should never be
# will cause the scheduler thread to sleep indefinitely
# or until a SIGHUP is caught.
if nexttime:
util.debug("Waiting until " + str (nexttime), self.verbose)
waittime = None
if nexttime != None:
# will cause the scheduler thread to sleep indefinitely
# or until a SIGHUP is caught.
if nexttime:
util.debug("Waiting until " + str (nexttime), self.verbose)
waittime = None
if nexttime != None:
if (waittime <= 0):
# We took too long and missed a snapshot, so break out
# and catch up on it the next time through the loop
if (waittime <= 0):
# We took too long and missed a snapshot, so break out
# and catch up on it the next time through the loop
self.logger.error("Caught RuntimeError exception in snapshot" +
" manager thread")
self.logger.error("Error details:\n" + \
self.logger.error("Caught RuntimeError exception in snapshot" +
" manager thread")
self.logger.error("Error details:\n" + \
util.debug("Critical level value is: %d%%" % crit, self.verbose)
emer = self._smf.get_cleanup_level("emergency")
util.debug("Emergency level value is: %d%%" % emer, self.verbose)
util.debug("Critical level value is: %d%%" % crit, self.verbose)
emer = self._smf.get_cleanup_level("emergency")
util.debug("Emergency level value is: %d%%" % emer, self.verbose)
self.logger.error("Failed to determine cleanup threshhold levels")
self.logger.error("Details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
self.logger.error("Failed to determine cleanup threshhold levels")
self.logger.error("Details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
# Not fatal, just assume we delete them (default configuration)
self.logger.error("Can't determine whether to keep empty snapshots")
self.logger.error("Details:\n" + \
# Not fatal, just assume we delete them (default configuration)
self.logger.error("Can't determine whether to keep empty snapshots")
self.logger.error("Details:\n" + \
# separator character for datestamps. Windows filesystems such as
# CIFS and FAT choke on this character so now we use a user definable
# separator value, with a default value of "_"
# separator character for datestamps. Windows filesystems such as
# CIFS and FAT choke on this character so now we use a user definable
# separator value, with a default value of "_"
else:
self._zpools.append(zpool)
util.debug(str(zpool), self.verbose)
else:
self._zpools.append(zpool)
util.debug(str(zpool), self.verbose)
self.logger.error("Could not list Zpools")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate exception up to thread's run() method
self.logger.error("Could not list Zpools")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate exception up to thread's run() method
try:
_defaultSchedules = autosnapsmf.get_default_schedules()
_customSchedules = autosnapsmf.get_custom_schedules()
try:
_defaultSchedules = autosnapsmf.get_default_schedules()
_customSchedules = autosnapsmf.get_custom_schedules()
- raise RuntimeError, "Error reading SMF schedule instances\n" + \
- "Details:\n" + str(message)
+ raise RuntimeError("Error reading SMF schedule instances\n" + \
+ "Details:\n" + str(message))
# Build the combined schedule tuple from default + custom schedules
_defaultSchedules.extend(_customSchedules)
self._allSchedules = tuple(_defaultSchedules)
# Build the combined schedule tuple from default + custom schedules
_defaultSchedules.extend(_customSchedules)
self._allSchedules = tuple(_defaultSchedules)
last = None
for schedule,interval,period,keep in self._allSchedules:
last = None
for schedule,interval,period,keep in self._allSchedules:
# still up to date. Don't skip the default schedules though
# because overlap affects their scheduling
if [schedule,interval,period,keep] not in \
# still up to date. Don't skip the default schedules though
# because overlap affects their scheduling
if [schedule,interval,period,keep] not in \
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self.logger.error("Failed to list snapshots during schedule update")
#Propogate up to the thread's run() method
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self.logger.error("Failed to list snapshots during schedule update")
#Propogate up to the thread's run() method
tm =(year, mon, mday, \
snap_tm.tm_hour, snap_tm.tm_min, snap_tm.tm_sec, \
0, 0, -1)
tm =(year, mon, mday, \
snap_tm.tm_hour, snap_tm.tm_min, snap_tm.tm_sec, \
0, 0, -1)
for s,i,p,k in self._defaultSchedules:
due = self._next[s]
if due <= now:
for s,i,p,k in self._defaultSchedules:
due = self._next[s]
if due <= now:
#schedule that is overdue. The subordinate schedules
#will re-adjust afterwards.
earliest,schedule = due,s
#schedule that is overdue. The subordinate schedules
#will re-adjust afterwards.
earliest,schedule = due,s
self._refreshLock.acquire()
next,schedule = self._next_due()
self._refreshLock.release()
self._refreshLock.acquire()
next,schedule = self._next_due()
self._refreshLock.release()
while next != None and next <= now:
label = self._take_snapshots(schedule)
# self._plugin.execute_plugins(schedule, label)
while next != None and next <= now:
label = self._take_snapshots(schedule)
# self._plugin.execute_plugins(schedule, label)
def _take_snapshots(self, schedule):
# Set the time before taking snapshot to avoid clock skew due
# to time taken to complete snapshot.
def _take_snapshots(self, schedule):
# Set the time before taking snapshot to avoid clock skew due
# to time taken to complete snapshot.
label = "%s%s%s-%s" % \
(autosnapsmf.SNAPLABELPREFIX, self._separator, schedule,
datetime.datetime.now().strftime("%Y-%m-%d-%Hh%M"))
try:
self._datasets.create_auto_snapshot_set(label, tag=schedule)
label = "%s%s%s-%s" % \
(autosnapsmf.SNAPLABELPREFIX, self._separator, schedule,
datetime.datetime.now().strftime("%Y-%m-%d-%Hh%M"))
try:
self._datasets.create_auto_snapshot_set(label, tag=schedule)
# Write an error message, set the exit code and pass it up the
# stack so the thread can terminate
self.logger.error("Failed to create snapshots for schedule: %s" \
% (schedule))
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
# Write an error message, set the exit code and pass it up the
# stack so the thread can terminate
self.logger.error("Failed to create snapshots for schedule: %s" \
% (schedule))
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
"""Cleans out zero sized snapshots, kind of cautiously"""
# Per schedule: We want to delete 0 sized
# snapshots but we need to keep at least one around (the most
"""Cleans out zero sized snapshots, kind of cautiously"""
# Per schedule: We want to delete 0 sized
# snapshots but we need to keep at least one around (the most
# maintained from frequent -> hourly -> daily etc.
# Start off with the smallest interval schedule first and
# move up. This increases the amount of data retained where
# maintained from frequent -> hourly -> daily etc.
# Start off with the smallest interval schedule first and
# move up. This increases the amount of data retained where
# Clone the list because we want to remove items from it
# while iterating through it.
remainingsnaps = snaps[:]
# Clone the list because we want to remove items from it
# while iterating through it.
remainingsnaps = snaps[:]
self.logger.error("Failed to list snapshots during snapshot cleanup")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self.logger.error("Failed to list snapshots during snapshot cleanup")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self.logger.error(str(message))
# Not fatal, just skip to the next snapshot
continue
self.logger.error(str(message))
# Not fatal, just skip to the next snapshot
continue
self.logger.error("Failed to destroy snapshot: " +
snapname)
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
# Propogate exception so thread can exit
self.logger.error("Failed to destroy snapshot: " +
snapname)
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
# Propogate exception so thread can exit
self.logger.error("Can not determine used size of: " + \
snapname)
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
#Propogate the exception to the thead run() method
self.logger.error("Can not determine used size of: " + \
snapname)
self.exitCode = smf.SMF_EXIT_MON_DEGRADE
#Propogate the exception to the thead run() method
# Deleting individual snapshots instead of recursive sets
# breaks the recursion chain and leaves child snapshots
# Deleting individual snapshots instead of recursive sets
# breaks the recursion chain and leaves child snapshots
self.logger.error(str(message))
# Not fatal, just skip to the next snapshot
counter += 1
continue
try:
snapshot.destroy()
self.logger.error(str(message))
# Not fatal, just skip to the next snapshot
counter += 1
continue
try:
snapshot.destroy()
self.logger.error("Failed to destroy snapshot: " +
snapshot.name)
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate exception so thread can exit
self.logger.error("Failed to destroy snapshot: " +
snapshot.name)
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate exception so thread can exit
for name in self._datasets.list_auto_snapshot_sets(schedule):
dataset = zfs.ReadWritableDataset(name)
self._prune_snapshots(dataset, schedule)
for name in self._datasets.list_auto_snapshot_sets(schedule):
dataset = zfs.ReadWritableDataset(name)
self._prune_snapshots(dataset, schedule)
self.logger.error("Error listing datasets during " + \
"removal of expired snapshots")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate up to thread's run() method
self.logger.error("Error listing datasets during " + \
"removal of expired snapshots")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate up to thread's run() method
def _needs_cleanup(self):
if self._remedialCleanup == False:
# Sys admin has explicitly instructed for remedial cleanups
# not to be performed.
return False
def _needs_cleanup(self):
if self._remedialCleanup == False:
# Sys admin has explicitly instructed for remedial cleanups
# not to be performed.
return False
# Don't run checks any less than 15 minutes apart.
if self._cleanupLock.acquire(False) == False:
#Indicates that a cleanup is already running.
# Don't run checks any less than 15 minutes apart.
if self._cleanupLock.acquire(False) == False:
#Indicates that a cleanup is already running.
self.logger.error("Error checking zpool capacity of: " + \
zpool.name)
self._cleanupLock.release()
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate up to thread's run() mehod.
self.logger.error("Error checking zpool capacity of: " + \
zpool.name)
self._cleanupLock.release()
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate up to thread's run() mehod.
- raise RuntimeError,message
- self._lastCleanupCheck = long(time.time())
+ raise RuntimeError(message)
+ self._lastCleanupCheck = int(time.time())
self._poolstatus[zpool.name] = 4
# This also catches exceptions thrown from _run_<level>_cleanup()
# and _run_cleanup() in methods called by _perform_cleanup()
self._poolstatus[zpool.name] = 4
# This also catches exceptions thrown from _run_<level>_cleanup()
# and _run_cleanup() in methods called by _perform_cleanup()
self.logger.error("Remedial space cleanup failed because " + \
"of failure to determinecapacity of: " + \
zpool.name)
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self._cleanupLock.release()
# Propogate up to thread's run() method.
self.logger.error("Remedial space cleanup failed because " + \
"of failure to determinecapacity of: " + \
zpool.name)
self.exitCode = smf.SMF_EXIT_ERR_FATAL
self._cleanupLock.release()
# Propogate up to thread's run() method.
# left to delete. We don't disable the service since
# it will permit self recovery and snapshot
# retention when space becomes available on
# left to delete. We don't disable the service since
# it will permit self recovery and snapshot
# retention when space becomes available on
self.logger.error("Error (non-fatal) listing cloned snapshots" +
" while recovering pool capacity")
self.logger.error("Error details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
self.logger.error("Error (non-fatal) listing cloned snapshots" +
" while recovering pool capacity")
self.logger.error("Error details:\n" + \
"--------BEGIN ERROR MESSAGE--------\n" + \
str(message) + \
# Build a list of snapshots in the given schedule, that are not
# cloned, and sort the result in reverse chronological order.
# Build a list of snapshots in the given schedule, that are not
# cloned, and sort the result in reverse chronological order.
self.logger.error("Error listing snapshots" +
" while recovering pool capacity")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate the error up to the thread's run() method.
self.logger.error("Error listing snapshots" +
" while recovering pool capacity")
self.exitCode = smf.SMF_EXIT_ERR_FATAL
# Propogate the error up to the thread's run() method.
snapshot can have unpredictable results. For example a
pair of snapshots may share exclusive reference to a large
amount of data (eg. a large core file). The usage of both
snapshot can have unpredictable results. For example a
pair of snapshots may share exclusive reference to a large
amount of data (eg. a large core file). The usage of both
# Would be nice to be able to mark service as degraded here
# but it's better to try to continue on rather than to give
# up alltogether (SMF maintenance state)
# Would be nice to be able to mark service as degraded here
# but it's better to try to continue on rather than to give
# up alltogether (SMF maintenance state)
def _send_to_syslog(self):
for zpool in self._zpools:
status = self._poolstatus[zpool.name]
def _send_to_syslog(self):
for zpool in self._zpools:
status = self._poolstatus[zpool.name]
self.logger.critical( \
"%s exceeded %d%% capacity. " \
"Weekly, hourly and daily automatic snapshots were destroyed" \
self.logger.critical( \
"%s exceeded %d%% capacity. " \
"Weekly, hourly and daily automatic snapshots were destroyed" \