completely reworked

This commit is contained in:
2025-12-30 17:38:06 +11:00
parent 4e17c1634a
commit c1e8b9bf35
5 changed files with 201 additions and 13 deletions

21
argo-git.yaml Normal file
View File

@@ -0,0 +1,21 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: oadp-backups
namespace: openshift-gitops
spec:
project: default
source:
repoURL: <YOUR_REPO_URL>
targetRevision: main
path: gitops/oadp/base
destination:
server: https://kubernetes.default.svc
namespace: openshift-adp
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=false
- ServerSideApply=true

63
backup-test-cronjob.yaml Normal file
View File

@@ -0,0 +1,63 @@
---
# Optional: Monthly restore test automation
apiVersion: batch/v1
kind: CronJob
metadata:
name: monthly-restore-test
namespace: openshift-adp
spec:
schedule: "0 06 15 * *" # 15th of month, 6 AM
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
metadata:
labels:
app: restore-test
spec:
serviceAccountName: velero
restartPolicy: OnFailure
containers:
- name: restore-test
image: quay.io/konveyor/velero:latest
env:
- name: VELERO_NAMESPACE
value: openshift-adp
command:
- /bin/bash
- -c
- |
set -e
echo "=== Velero Restore Test ==="
echo "Date: $(date)"
# Get latest daily-config backup
CONFIG_BACKUP=$(velero backup get --selector="backup-type=config" \
-o json | jq -r '.items[0].metadata.name')
# Get latest daily-stateful backup
STATEFUL_BACKUP=$(velero backup get --selector="backup-type=stateful" \
-o json | jq -r '.items[0].metadata.name')
echo "Latest config backup: $CONFIG_BACKUP"
echo "Latest stateful backup: $STATEFUL_BACKUP"
# Verify backups are successful
CONFIG_STATUS=$(velero backup get $CONFIG_BACKUP -o json | \
jq -r '.status.phase')
STATEFUL_STATUS=$(velero backup get $STATEFUL_BACKUP -o json | \
jq -r '.status.phase')
echo "Config backup status: $CONFIG_STATUS"
echo "Stateful backup status: $STATEFUL_STATUS"
if [ "$CONFIG_STATUS" != "Completed" ] || [ "$STATEFUL_STATUS" != "Completed" ]; then
echo "ERROR: Backups not in Completed state"
exit 1
fi
echo "=== Test Passed ==="
echo "All backups verified successfully"

View File

@@ -1,20 +1,41 @@
---
# Schedule 1: Daily config-only backup (fast, all namespaces)
apiVersion: velero.io/v1
kind: Schedule
metadata:
name: daily-config
namespace: openshift-adp
spec:
schedule: "0 02 * * *"
schedule: "0 02 * * *" # 2 AM daily
# Make backups readable, sortable, unique
nameTemplate: "{{ .ScheduleName }}-{{ .Timestamp }}"
template:
# Backup all namespaces
includedNamespaces:
- "*"
# No PV data — just manifests
# Labels for filtering and reporting
labels:
backup-type: config
schedule: daily
retention: short
# Exclude ephemeral/generated resources
excludedResources:
- events
- events.events.k8s.io
- pipelineruns.tekton.dev # Completed pipeline runs
- taskruns.tekton.dev # Completed task runs
- replicasets.apps # Managed by deployments
- pods # Recreated by controllers
- endpoints # Auto-generated
- endpointslices.discovery.k8s.io
# No volume data - manifests only
snapshotVolumes: false
defaultVolumesToFsBackup: false
# 14 days retention
ttl: 336h

View File

@@ -1,3 +1,5 @@
---
# DPA Configuration - optimized for SNO with Kopia
apiVersion: oadp.openshift.io/v1alpha1
kind: DataProtectionApplication
metadata:
@@ -28,6 +30,14 @@ spec:
nodeAgent:
enable: true
uploaderType: kopia
podConfig:
resourceAllocations:
limits:
cpu: "1" # Increased for database compression
memory: "1Gi" # Increased for larger chunks
requests:
cpu: "200m"
memory: "512Mi"
velero:
defaultPlugins:
- openshift

View File

@@ -1,14 +1,17 @@
---
# Schedule 2: Daily stateful backup (with volume data)
apiVersion: velero.io/v1
kind: Schedule
metadata:
name: daily-stateful
namespace: openshift-adp
spec:
schedule: "0 03 * * *"
schedule: "0 03 * * *" # 3 AM daily (after config backup)
nameTemplate: "{{ .ScheduleName }}-{{ .Timestamp }}"
template:
# Only namespaces with persistent data
includedNamespaces:
- gitea
- authentik
@@ -18,9 +21,79 @@ spec:
- apim
- gitea-ci
labels:
backup-type: stateful
schedule: daily
retention: short
# No CSI snapshots; use nodeAgent/kopia only
# Exclude ephemeral resources
excludedResources:
- events
- events.events.k8s.io
- pipelineruns.tekton.dev
- taskruns.tekton.dev
- replicasets.apps
- pods
# Use Kopia for volume backups
snapshotVolumes: false
defaultVolumesToFsBackup: true
# 14 days retention
ttl: 336h
# Pre-backup hooks for data consistency
hooks:
resources:
# Cassandra: flush memtables to disk before backup
- name: cassandra-flush
includedNamespaces:
- cassandra
labelSelector:
matchLabels:
app.kubernetes.io/name: cassandra
pre:
- exec:
container: cassandra
command:
- /bin/bash
- -c
- nodetool flush
timeout: 5m
onError: Continue
# Gitea PostgreSQL: checkpoint before backup
- name: gitea-postgres-checkpoint
includedNamespaces:
- gitea
labelSelector:
matchLabels:
app.kubernetes.io/name: postgresql
app.kubernetes.io/instance: gitea
pre:
- exec:
container: postgresql
command:
- /bin/bash
- -c
- psql -U postgres -c 'CHECKPOINT;'
timeout: 2m
onError: Continue
# Authentik PostgreSQL: checkpoint before backup
- name: authentik-postgres-checkpoint
includedNamespaces:
- authentik
labelSelector:
matchLabels:
app.kubernetes.io/name: postgresql
app.kubernetes.io/instance: authentik
pre:
- exec:
container: postgresql
command:
- /bin/bash
- -c
- psql -U postgres -c 'CHECKPOINT;'
timeout: 2m
onError: Continue