Compare commits

...

2 Commits

Author SHA1 Message Date
9807075149 Committed runbook 2025-12-31 14:30:50 +11:00
f85636f48f removed pods from exclusions
added timezones to backup and restore
2025-12-31 14:29:26 +11:00
4 changed files with 129 additions and 46 deletions

83
RESTORE.md Normal file
View File

@@ -0,0 +1,83 @@
# Restore Procedure Runbook
## Set Variables
```bash
VELERO_NS=openshift-adp
SRC_NS=n8n
TS=$(date +%Y%m%d-%H%M%S)
DST_NS=n8n-restore-test-$TS
RESTORE_NAME=n8n-restore-test-$TS
TEST_HOST=n8n-restore-$TS.apilab.us
```
## Create Namespace
```bash
oc create ns $DST_NS
```
## Apply Restore
```bash
cat <<EOF | oc apply -f -
apiVersion: velero.io/v1
kind: Restore
metadata:
name: $RESTORE_NAME
namespace: $VELERO_NS
spec:
backupName: $BACKUP_NAME
includeClusterResources: false
includedNamespaces:
- $SRC_NS
namespaceMapping:
$SRC_NS: $DST_NS
restorePVs: true
excludedResources:
- routes.route.openshift.io
EOF
```
## Monitor Restore
```bash
watch -n 5 "oc -n $VELERO_NS get restore $RESTORE_NAME -o jsonpath='{.status.phase}{\"\n\"}'"
```
## Check when complete
```bash
oc -n $VELERO_NS describe restore $RESTORE_NAME
```
## Monitor Deployments
```bash
oc -n $DST_NS rollout status deploy/postgres --timeout=10m
oc -n $DST_NS rollout status deploy/n8n --timeout=10m
```
## Create Route and Test
```bash
cat <<EOF | oc -n $DST_NS apply -f -
apiVersion: route.openshift.io/v1
kind: Route
metadata:
name: n8n-restore-test
spec:
host: $TEST_HOST
path: /
to:
kind: Service
name: n8n
port:
targetPort: 5678
tls:
termination: edge
insecureEdgeTerminationPolicy: Redirect
EOF
curl -kfsS https://$TEST_HOST/ >/dev/null && echo "PASS: UI reachable"
```
## Cleanup
```bash
oc -n $VELERO_NS delete restore $RESTORE_NAME
oc delete ns $DST_NS
```

View File

@@ -6,7 +6,8 @@ metadata:
name: monthly-restore-test
namespace: openshift-adp
spec:
schedule: "0 06 15 * *" # 15th of month, 6 AM
timeZone: "Australia/Sydney"
schedule: "0 06 15 * *" # 15th of month, 6 AM
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
@@ -20,44 +21,45 @@ spec:
serviceAccountName: velero
restartPolicy: OnFailure
containers:
- name: restore-test
image: quay.io/konveyor/velero:latest
env:
- name: VELERO_NAMESPACE
value: openshift-adp
command:
- /bin/bash
- -c
- |
set -e
- name: restore-test
image: quay.io/konveyor/velero:latest
env:
- name: VELERO_NAMESPACE
value: openshift-adp
command:
- /bin/bash
- -c
- |
set -e
echo "=== Velero Restore Test ==="
echo "Date: $(date)"
echo "=== Velero Restore Test ==="
echo "Date: $(date)"
# Get latest daily-config backup
CONFIG_BACKUP=$(velero backup get --selector="backup-type=config" \
-o json | jq -r '.items[0].metadata.name')
# Get latest daily-config backup
CONFIG_BACKUP=$(velero backup get --selector="backup-type=config" \
-o json | jq -r '.items[0].metadata.name')
# Get latest daily-stateful backup
STATEFUL_BACKUP=$(velero backup get --selector="backup-type=stateful" \
-o json | jq -r '.items[0].metadata.name')
# Get latest daily-stateful backup
STATEFUL_BACKUP=$(velero backup get --selector="backup-type=stateful" \
-o json | jq -r '.items[0].metadata.name')
echo "Latest config backup: $CONFIG_BACKUP"
echo "Latest stateful backup: $STATEFUL_BACKUP"
echo "Latest config backup: $CONFIG_BACKUP"
echo "Latest stateful backup: $STATEFUL_BACKUP"
# Verify backups are successful
CONFIG_STATUS=$(velero backup get $CONFIG_BACKUP -o json | \
jq -r '.status.phase')
STATEFUL_STATUS=$(velero backup get $STATEFUL_BACKUP -o json | \
jq -r '.status.phase')
# Verify backups are successful
CONFIG_STATUS=$(velero backup get $CONFIG_BACKUP -o json | \
jq -r '.status.phase')
STATEFUL_STATUS=$(velero backup get $STATEFUL_BACKUP -o json | \
jq -r '.status.phase')
echo "Config backup status: $CONFIG_STATUS"
echo "Stateful backup status: $STATEFUL_STATUS"
echo "Config backup status: $CONFIG_STATUS"
echo "Stateful backup status: $STATEFUL_STATUS"
if [ "$CONFIG_STATUS" != "Completed" ] || [ "$STATEFUL_STATUS" != "Completed" ]; then
echo "ERROR: Backups not in Completed state"
exit 1
fi
if [ "$CONFIG_STATUS" != "Completed" ] || [ "$STATEFUL_STATUS" != "Completed" ]; then
echo "ERROR: Backups not in Completed state"
exit 1
fi
echo "=== Test Passed ==="
echo "All backups verified successfully"
echo "=== Test Passed ==="
echo "All backups verified successfully"

View File

@@ -6,7 +6,7 @@ metadata:
name: daily-config
namespace: openshift-adp
spec:
schedule: "0 02 * * *" # 2 AM daily
schedule: "CRON_TZ=Australia/Sydney 0 02 * * *" # 2 AM daily
# Make backups readable, sortable, unique
#nameTemplate: "{{ .ScheduleName }}-{{ .Timestamp }}"

View File

@@ -6,7 +6,7 @@ metadata:
name: daily-stateful
namespace: openshift-adp
spec:
schedule: "0 03 * * *" # 3 AM daily (after config backup)
schedule: "CRON_TZ=Australia/Sydney 0 03 * * *" # 3 AM daily (after config backup)
#nameTemplate: "{{ .ScheduleName }}-{{ .Timestamp }}"
@@ -32,8 +32,6 @@ spec:
- events.events.k8s.io
- pipelineruns.tekton.dev
- taskruns.tekton.dev
- replicasets.apps
- pods
# Use Kopia for volume backups
snapshotVolumes: false