#!/bin/sh
#=========================================================================
# name:         rkbackup
# description:  perform system backups
# author:       G.R.Keech
# date:         2001-09-18
# $Id: rkbackup,v 1.12 2002/04/20 07:47:52 rkeech Exp $
#=========================================================================
# Notes:  
# 1.     This script is designed to be run from cron and does not send
#        to standard output.  All output goes to log file.
#--------------------------------------------------------------------------
# Set signal handlers.

# Log files required in cleanup, so set them before defining the traps.
LOGF="backup-default.$(date +%Y%m%d)"
LOG="/var/tmp/${LOGF}"
trap 'aborting=true;cleanup 1' 1
trap 'aborting=true;cleanup 2' 2
trap 'aborting=true;cleanup 3' 3
trap 'aborting=true;cleanup 15' 15
#--------------------------------------------------------------------------
# Set variables.
OS=Linux
DUMP=/sbin/dump
OPSNAME=ops  # Operations account permitted to do backups (site-specific).
BASEDIR="/usr/"  # The base directory for location of scripts and configs.
BINDIR=${BASEDIR}sbin/
MAINLOG="/var/log/backup/${LOGF}"
CONFIG_FILE="/etc/rkbackup/backup.conf"   # should be owned by root, mode 644
PRE_ACTIONS="${BINDIR}backup.pre"   # should be owned by root, mode 744
POST_ACTIONS="${BINDIR}backup.post" # should be owned by root, mode 744
PROGNAME=$(basename $0)
accompanying_actions=active  # This is the default.
NO_OFFLINE=false  # This is the default.
BLOCKING=64  # Tape blocking factor; b flag in dump
total_size=0
total_files=0
aborting=false
#--------------------------------------------------------------------------
# Set paths to programs.
TAPEFILE=${BINDIR}tapefile
CHECK_RUN=${BINDIR}checkbu-run

#--------------------------------------------------------------------------
# Handle an error.  
# Note: this should only be called before the main logging starts
# since the main logging goes to a file in /var/tmp initially.
error () {
  echo Summary: >> $MAINLOG
  echo Error: $* >> $MAINLOG
  rm -f $pid_file
  exit 1
}
#--------------------------------------------------------------------------
# Check that file has appropriate security.
checkfile () {

  if [ ! -f "${1}" ]
  then
    echo  "File \"${1}\" does not exist or is not in the expected location."
    to_continue
    return 1
  fi

  lsout="$(ls -l $1 2> /dev/null)"
  othermodes=$(echo $lsout|awk '{print $1}'|cut -c8-10)
  owner=$(echo $lsout|awk '{print $3}')
  group=$(echo $lsout|awk '{print $4}')

  if [ "${othermodes}z" != "r--z" -a "${othermodes}z" != "---z" ]
  then
    error For security, file $1 must only be \
writable and executable \(where appropriate\) by owner and group. 
  fi

  if [ "${owner}x" != rootx -a "${owner}x" != "${OPSNAME}x" -a "${owner}x" != binx ]
  then
    error For security, file $1 must be owned by only root or ${OPSNAME} or bin.
  fi

  if [ "${group}x" != rootx -a "${group}x" != opsx -a "${group}x" != binx ]
  then
    error For security, file $1 must be associated only with group ops or root or bin.
  fi
}  # checkfile ()
#---------------------------------------------------------------------------
# Send a message to the log.
logmsg () {
  printf "$*\n"  >> $LOG 2>&1
}
#---------------------------------------------------------------------------
# Show program usage.
usage () {
  printf "echo usage:\n\
    $PROGNAME [-f configuration_file | -c configuration_name] [-s]\n\
    eg:  $PROGNAME -f /etc/rkbackup/backup-2.conf\n\
    -f specifies a backup configuration file.\n\
    -c specifies a configuration name.\n\
    -s suppresses the accompanying pre- and post-backup actions.\n"
  exit 1
}
#--------------------------------------------------------------------------
# Prepare a table of file systems indicating their size.
# to determine in advance the size of the dump.
tabulate () {

  n=1
  total_size=0
  echo 'count  file_system         size (MB)  cumulative (MB)'
  echo '-----------------------------------------------------'
  for f in ${FILESYSTEMS}
  do
    # The S flag of dump is used to size the file system.
    size=$(echo $(${DUMP} -0S ${f} 2>/dev/null)/1048576 | bc)

    # Note: bc used because expr can overflow.
    total_size=$(($total_size + $size))  # cumulative size
    printf "%2s   %-25s %6s %12s\n" $n $f $size $total_size
    n=$(($n + 1))
  done
  echo '-----------------------------------------------------'
  echo
  total_files=$(($n-1))
  if [ $total_size -lt 4096 ]
  then
    echo total in backup configuration  = $total_size MB
  else
    echo total in backup configuration  = $(($total_size / 1024)) GB
  fi

} # end of function tabulate
#----------------------------------------------------------------------------
cleanup () {
  # Do the wrap up.
  logmsg " ------------------------------------------------------\n\
Summary:"
  if [ "$aborting" = true ]
  then
    echo aborting: >> $pid_file
    logmsg "Backup aborted."
    if [ "$1" != 0 ]
    then
      logmsg "Received signal ${1}."
    fi
  else
    # Check if there were any failures.
    if [ "$failed" = "true" ]
    then
      logmsg "WARNING: backup failures detected on tapefile(s) ${whichfailed}."
    else
      logmsg "Backup was successful."
      if [ $total_size -lt 4096 ]
      then
        logmsg "Size of backups was $total_size MB."
      else
        logmsg "Size of backups was $(( $total_size / 1024)) GB."
      fi
    fi
    if [ $accompanying_actions != suppressed ]
    then
      if [ -f "$PRE_ACTIONS" ]
      then
	logmsg "Pre-backup actions returned with code ${r_code1}."
      fi
      if [ -f "$POST_ACTIONS" ]
      then
	logmsg "Post-backup actions returned with code ${r_code2}."
      fi
    else
      logmsg "No pre- or post-backup actions were performed."
    fi
  fi

  duration_secs=$(echo "$(date +%s) - $start_secs" | bc)
  duration_mins=$(($duration_secs / 60))
  duration_hours=$(($duration_mins / 60))
  duration_fraction=$(( $duration_mins % 60))
  
  logmsg "Backup duration  $duration_hours hour(s), \
$duration_fraction minute(s)."
  logmsg "Log file closed at $(date +%H:%M)."
  
  #----------------------------------------------------------------------
  # Move the log file into its permanent location.
  mv $LOG $MAINLOG >/dev/null 2>&1
  
  rm -f $pid_file

  if [ "$aborting" = true ]
  then
    exit 1
  else
    exit 0
  fi
} # end of cleanup ()
#============================================================================
#                 Start of main block of script.
#----------------------------------------------------------------------------
# Check the this is being run by root or ops.

idstr=$(whoami)
case "${idstr}" in
  root|$OPSNAME);;
  *) echo Error: must be run as root or ${OPSNAME}. Being run as \"${idstr}\" 
     exit 1;;
esac
#----------------------------------------------------------------------------
# Process command-line arguments.
#
config_file_set=false
config_set=false

while [ $# -ne 0 ]
do
  case $1 in
    "-f") 
          if [ $# -ge 2 ]
          then
            CONFIG_FILE=$2;
            shift 2;
            config_file_set=true
          else
            # In this case -f was specified with nothing following.
            usage
          fi
          ;;
    "-c") 
          if [ $# -ge 2 ]
          then
            CONFIG=$2;
            shift 2; 
            config_set=true
          else
            # In this case -c was specified with nothing following.
            usage
          fi
          ;;
    "-s") accompanying_actions=suppressed; shift;;
    *) usage
  esac
  if [ $# -eq 0 ]
  then
    break
  fi
done # processing command line arguments


# Test for -f and -c being set together.
if [ "$config_file_set" = true -a "$config_set" = true ]
then
  # -f and -c are mutually exclusive.
  usage
fi
#----------------------------------------------------------------------------
# Get a time stamp so that duration of backup can be determined later.
start_secs=$(date +%s)
#----------------------------------------------------------------------------
# Manage the configuration name.

if [ $config_set = true ]
then
  # In this case we need to derive a config file name from the config name.
  if [ "$CONFIG" = default ]
  then
    CONFIG_FILE=/etc/rkbackup/backup.conf
  else
    CONFIG_FILE=/etc/rkbackup/backup-${CONFIG}.conf
  fi
else
  # In this case we need to derive a config name from the config file name.

  if [ $config_file_set != true ]
  then
    # Default configuration applies.
    CONFIG=default
  else
    # A configuration file was specified.

    # First guess:  pull x from /etc/rkbackup/backup-x.config
    first_guess=$(basename ${CONFIG_FILE} .conf|cut -d- -f2)
    if [ "${first_guess}x" != x ]
    then
      CONFIG=$first_guess
    else
      # Second guess:  pull x from /blah-blah/x.config
      second_guess=$(basename ${CONFIG_FILE} .conf | cut -d. -f1)
      if [ "${second_guess}x" != x ]
      then
        CONFIG=$first_guess
      else
        error There is a problem with the name of the config file: ${CONFIG_FILE}
      fi
    fi

  fi
fi

LOGF="backup-${CONFIG}.$(date +%Y%m%d)"
LOG="/var/tmp/${LOGF}"
MAINLOG="/var/log/backup/${LOGF}"
pid_file=/var/tmp/backup-${CONFIG}.pid
#---------------------------------------------------------------------------
# Create the PID file.  Test for contention with other running backups.
pid=$($CHECK_RUN -c $CONFIG 2> /dev/null)
if [ $? = 0 ]
then
  # Backup is running
  echo Error: Tried to run two instances of backup using configuration $CONFIG 1>&2
  echo Aborting this instance. Leaving latent process $pid running. 1>&2
  exit 1
else
  # The PID file does not exist or can be overwritten, so create it.
  echo pid: $$ > $pid_file
  echo start_secs: $start_secs >> $pid_file
fi
#----------------------------------------------------------------------------
# Check if the log file already exists.
if [ -f "$MAINLOG" ]
then
  if [ -s "$MAINLOG" ]
  then
    # File has non-zero size, so save it.
    mv $MAINLOG ${MAINLOG}.save 2>/dev/null
  else
    # File is empty, so just removed it.
    rm -f $MAINLOG 2>/dev/null
  fi
fi
#----------------------------------------------------------------------------
# Check if backups have been disabled.
DISABLE_FILE="/etc/rkbackup/backup-${CONFIG}.disable"
# Note: DISABLE_FILE cannot be assigned at the start of the
# script because CONFIG is not known at that point.

# Only allow for disabling of backups when called from cron.
if (ancestry | grep crond > /dev/null)
then
  called_from_cron=true
  if [ -f "$DISABLE_FILE" ]
  then
    # Backups have been disabled.
    error "Backups are currently disabled.  Backups invoked interactively will \
still be performed. To enable, remove file ${DISABLE_FILE}"
  fi
else
  called_from_cron=false
fi
#--------------------------------------------------------------------------
# Find the host-specific configuration file.
if [ ! -f "$CONFIG_FILE" -o ! -r "$CONFIG_FILE" ]
then
  error  Problem reading configuration file ${CONFIG_FILE}.
fi
#----------------------------------------------------------------------------
# Check the accompanying files for correct file ownership and modes.
if [ $accompanying_actions != suppressed ]
then
  if [ -f "$PRE_ACTIONS" ]
  then
    checkfile "$PRE_ACTIONS"
  fi
  
  if [ -f "$POST_ACTIONS" ]
  then
    checkfile "$POST_ACTIONS"
  fi
fi

checkfile $CONFIG_FILE
#----------------------------------------------------------------------------
# Read  and check the FILESYSTEMS and TAPE etc from the backup config file.
. $CONFIG_FILE

if [ "${FILESYSTEMS}x" = x -o "${TAPE}x" = x ]
then
  error "FILESYSTEMS and or TAPE not properly specified in ${CONFIG_FILE}."
fi

export TAPE
#----------------------------------------------------------------------------
# Rewind the tape.
echo rewind: >> $pid_file
/bin/mt rewind >/dev/null 2>&1
if [ $? -ne 0 ]
then
  error "Failed to rewind the tape.  Tape $TAPE may not be loaded. Aborting backup."
fi
#--------------------------------------------------------------------------
# Put title in log file.
echo "${ORGANISATION} System backup log." > $LOG 2>&1
logmsg "Backup for system \"$(hostname)\" on $(date +'%a  %Y-%m-%d   %H:%M').\n\
Using backup device \"${TAPE}\".\n\
Host type \"${OS}\".\n\
Using backup configuration file \"${CONFIG_FILE}\".\n\
Backups made with ${DUMP}, blocking factor of ${BLOCKING}.\n\
Called from cron: ${called_from_cron}.\n\
Run by user: ${idstr}.\n\
========================================================================"
#----------------------------------------------------------------------------
# Run the pre-backup actions if required.
echo pre_actions: >> $pid_file
if [ $accompanying_actions != "suppressed" -a -f "$PRE_ACTIONS" ]
then
  if [ -r "$PRE_ACTIONS" -a -x "$PRE_ACTIONS" ]
  then
    logmsg "Running the sytstem-specific, \
Pre-backup actions at $(date +%H:%M).\n\
Pre-backup actions file: ${PRE_ACTIONS}."
    
    $PRE_ACTIONS  2>&1 >> $LOG
    r_code1=$?
    
    if [ "${r_code1}x" != 0x ]
    then
      logmsg "WARNING: Pre-backup gave code \"${r_code1}\" at $(date +%H:%M)."
    else
      logmsg "Pre-backup actions completed OK at $(date +%H:%M)."
    fi
  else
    logmsg "WARNING: Pre-backup file is not readable or is not executable.\n\
WARNING: Ignoring file <$PRE_ACTIONS>." 
  fi
else
  if [ $accompanying_actions != suppressed ]
  then
    logmsg "NOTE:  There is no pre-backup action to perform."
  else
    logmsg "NOTE:  Pre-backup actions suppressed."
  fi
fi
logmsg ========================================================
#--------------------------------------------------------------------------
# Create logfile header info.
echo header: >> $pid_file
logmsg "Tapefile   Containing\n\
----------------------------------------\n\
0           header containing contents"

n=1

for filesystem in $FILESYSTEMS
do
  logmsg "$n           $filesystem" 
  n=$(($n + 1))
done

logmsg " ---------------------------------------------------"
tabulate >> $LOG 2>&1
logmsg ===================================================
echo total_gb: $total_size >> $pid_file
echo total_files: $total_files >> $pid_file
#--------------------------------------------------------------------------
# Write the tape header containing what is in the log file so far.
logmsg "0      tape header  at $(date +%H:%M)" 

/bin/dd conv=sync if=$LOG bs=1024 of=$TAPE >> $LOG 2>&1

if [ $? -ne 0 ]
then
#  logmsg "ERROR: failed to write the tape header; aborting backup." 
  logmsg "Warning: creation of tape header indicates an error.  Continuing on."
#  aborting=true
#  cleanup 0
fi
#--------------------------------------------------------------------------
# Test that the tape is advancing.
tf=$($TAPEFILE)
# We expect the tapefile to be 1.  If it is 0 then a rewinding tape device
# is set which is a problem since the drive will rewind after writing
# every tape file.
# Note: not all OS types support this type of checking.
case "${tf}x" in
  1x) advance_test=true;;  # this is what we want to see; proceed.
  *)  advance_test=false
      logmsg "NOTE: Unable to determine if tape advances. Check manually.";;
esac
# At this point tape is at tape file 1.
#--------------------------------------------------------------------------
# Backup one file system at a time using the dump program
logmsg " ---------------------------------------------------" 
failed=false
whichfailed=""
n=1
#--------------------------------------------------------------------------
# Iterate through the filesystems and perform dump on each in turn.
echo started_first_file: >> $pid_file

for filesystem in $FILESYSTEMS
do
  logmsg "$n           $filesystem  at $(date +%H:%M)"

  # Perform the dump.
  ${DUMP} 0uabqf $BLOCKING $TAPE $filesystem >> $LOG 2>&1

  # Failure logic is checked.
  if [ $? -ne 0 ]
  then
    failed=true
    whichfailed="$whichfailed $n"
  fi
  echo completed_file: $n >> $pid_file

  n=$(($n + 1))

  # Normally, if one tape file fails, then the remaining will still
  # be attempted (so long as the tape hasn't re-wound.  If
  # the backup is required to abort if a dump fails, then
  # ABORT_ON_FAILURE should be set to true in the backup configuration.
  if [ "$ABORT_ON_FAILURE"x = truex -a "$failed"x = truex ]
  then
    aborting=true
    break
  fi

  if [ "$advance_test"x = truex ]
  then
    # Tapefile number should advance in step with n.
    tf=$($TAPEFILE)
    if [ $n -ne $tf ]
    then
      logmsg "ERROR: Tape file has not advanced as expected.\n\
ERROR: Perhaps the end of the tape is reached.\n\
ERROR: Expected to be at <$n>; tape is at <$tf>\n\
ERROR: Skipping the rest of the the file systems."
      failed=true
      whichfailed="$whichfailed $(($n - 1))"
      break
    fi
  fi
  logmsg " -------------------------------------------------------" 
done
#----------------------------------------------------------------------
if [ "$aborting" = false ]
then
  # Rewind and eject the tape.
  echo rewinding_offline: >> $pid_file
  logmsg "Tape now being rewound."
  # Normally offline the tape, but give the option not to.
  if [ $NO_OFFLINE = false ]
  then
    # This is the normal case.
    /bin/mt rewoffl >> $LOG 2>&1
    logmsg "Tape has been taken offline."
  else
    # This is the exception.
    /bin/mt rewind >> $LOG 2>&1
    logmsg "Tape not taken offline because NO_OFFLINE has been set."
  fi

  logmsg "Finished backup at $(date +%H:%M)."
fi
#----------------------------------------------------------------------
# Run the post-backup actions if required.
if [ "$accompanying_actions" != "suppressed" -a -f "$POST_ACTIONS" -a "$aborting" = false ]
then
  if [ -r "$POST_ACTIONS" -a -x "$POST_ACTIONS" ]
  then
    echo post_actions: >> $pid_file
    logmsg "Running the sytstem-specific, post-backup \
actions at $(date +%H:%M).\n\
Pre-backup actions file: ${POST_ACTIONS}.\n"

    $POST_ACTIONS  2>&1 >> $LOG
    r_code2=$?

    if [ ${r_code2}x != 0x ]
    then
      logmsg "WARNING: Post-backup gave code \"${r_code2}\" at $(date +%H:%M)."
    else
      logmsg "Post-backup actions completed OK at $(date +%H:%M)."
    fi
  else
    logmsg "WARNING: Post-backup file is not readable or is not executable.\n\
WARNING: Ignoring file <$POST_ACTIONS>." 
  fi
else
  if [ "$aborting" = false ]
  then
    if [ "$accompanying_actions" != suppressed ]
    then
      logmsg "NOTE:  There are no post-backup actions to perform."
    else
      logmsg "NOTE:  Post-backup actions suppressed."
    fi
  fi
fi

cleanup 0
