completely reworked
This commit is contained in:
21
argo-git.yaml
Normal file
21
argo-git.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: oadp-backups
|
||||
namespace: openshift-gitops
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: <YOUR_REPO_URL>
|
||||
targetRevision: main
|
||||
path: gitops/oadp/base
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: openshift-adp
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=false
|
||||
- ServerSideApply=true
|
||||
63
backup-test-cronjob.yaml
Normal file
63
backup-test-cronjob.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
---
|
||||
# Optional: Monthly restore test automation
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: monthly-restore-test
|
||||
namespace: openshift-adp
|
||||
spec:
|
||||
schedule: "0 06 15 * *" # 15th of month, 6 AM
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 3
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: restore-test
|
||||
spec:
|
||||
serviceAccountName: velero
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: restore-test
|
||||
image: quay.io/konveyor/velero:latest
|
||||
env:
|
||||
- name: VELERO_NAMESPACE
|
||||
value: openshift-adp
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
|
||||
echo "=== Velero Restore Test ==="
|
||||
echo "Date: $(date)"
|
||||
|
||||
# Get latest daily-config backup
|
||||
CONFIG_BACKUP=$(velero backup get --selector="backup-type=config" \
|
||||
-o json | jq -r '.items[0].metadata.name')
|
||||
|
||||
# Get latest daily-stateful backup
|
||||
STATEFUL_BACKUP=$(velero backup get --selector="backup-type=stateful" \
|
||||
-o json | jq -r '.items[0].metadata.name')
|
||||
|
||||
echo "Latest config backup: $CONFIG_BACKUP"
|
||||
echo "Latest stateful backup: $STATEFUL_BACKUP"
|
||||
|
||||
# Verify backups are successful
|
||||
CONFIG_STATUS=$(velero backup get $CONFIG_BACKUP -o json | \
|
||||
jq -r '.status.phase')
|
||||
STATEFUL_STATUS=$(velero backup get $STATEFUL_BACKUP -o json | \
|
||||
jq -r '.status.phase')
|
||||
|
||||
echo "Config backup status: $CONFIG_STATUS"
|
||||
echo "Stateful backup status: $STATEFUL_STATUS"
|
||||
|
||||
if [ "$CONFIG_STATUS" != "Completed" ] || [ "$STATEFUL_STATUS" != "Completed" ]; then
|
||||
echo "ERROR: Backups not in Completed state"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Test Passed ==="
|
||||
echo "All backups verified successfully"
|
||||
@@ -1,20 +1,41 @@
|
||||
---
|
||||
# Schedule 1: Daily config-only backup (fast, all namespaces)
|
||||
apiVersion: velero.io/v1
|
||||
kind: Schedule
|
||||
metadata:
|
||||
name: daily-config
|
||||
namespace: openshift-adp
|
||||
spec:
|
||||
schedule: "0 02 * * *"
|
||||
|
||||
schedule: "0 02 * * *" # 2 AM daily
|
||||
|
||||
# Make backups readable, sortable, unique
|
||||
nameTemplate: "{{ .ScheduleName }}-{{ .Timestamp }}"
|
||||
|
||||
template:
|
||||
# Backup all namespaces
|
||||
includedNamespaces:
|
||||
- "*"
|
||||
|
||||
# No PV data — just manifests
|
||||
|
||||
# Labels for filtering and reporting
|
||||
labels:
|
||||
backup-type: config
|
||||
schedule: daily
|
||||
retention: short
|
||||
|
||||
# Exclude ephemeral/generated resources
|
||||
excludedResources:
|
||||
- events
|
||||
- events.events.k8s.io
|
||||
- pipelineruns.tekton.dev # Completed pipeline runs
|
||||
- taskruns.tekton.dev # Completed task runs
|
||||
- replicasets.apps # Managed by deployments
|
||||
- pods # Recreated by controllers
|
||||
- endpoints # Auto-generated
|
||||
- endpointslices.discovery.k8s.io
|
||||
|
||||
# No volume data - manifests only
|
||||
snapshotVolumes: false
|
||||
defaultVolumesToFsBackup: false
|
||||
|
||||
ttl: 336h
|
||||
|
||||
# 14 days retention
|
||||
ttl: 336h
|
||||
12
dpa.yaml
12
dpa.yaml
@@ -1,3 +1,5 @@
|
||||
---
|
||||
# DPA Configuration - optimized for SNO with Kopia
|
||||
apiVersion: oadp.openshift.io/v1alpha1
|
||||
kind: DataProtectionApplication
|
||||
metadata:
|
||||
@@ -28,6 +30,14 @@ spec:
|
||||
nodeAgent:
|
||||
enable: true
|
||||
uploaderType: kopia
|
||||
podConfig:
|
||||
resourceAllocations:
|
||||
limits:
|
||||
cpu: "1" # Increased for database compression
|
||||
memory: "1Gi" # Increased for larger chunks
|
||||
requests:
|
||||
cpu: "200m"
|
||||
memory: "512Mi"
|
||||
velero:
|
||||
defaultPlugins:
|
||||
- openshift
|
||||
@@ -45,4 +55,4 @@ spec:
|
||||
cpu: "100m"
|
||||
memory: "256Mi"
|
||||
|
||||
logFormat: text
|
||||
logFormat: text
|
||||
@@ -1,14 +1,17 @@
|
||||
---
|
||||
# Schedule 2: Daily stateful backup (with volume data)
|
||||
apiVersion: velero.io/v1
|
||||
kind: Schedule
|
||||
metadata:
|
||||
name: daily-stateful
|
||||
namespace: openshift-adp
|
||||
spec:
|
||||
schedule: "0 03 * * *"
|
||||
|
||||
schedule: "0 03 * * *" # 3 AM daily (after config backup)
|
||||
|
||||
nameTemplate: "{{ .ScheduleName }}-{{ .Timestamp }}"
|
||||
|
||||
template:
|
||||
# Only namespaces with persistent data
|
||||
includedNamespaces:
|
||||
- gitea
|
||||
- authentik
|
||||
@@ -17,10 +20,80 @@ spec:
|
||||
- n8n
|
||||
- apim
|
||||
- gitea-ci
|
||||
|
||||
|
||||
# No CSI snapshots; use nodeAgent/kopia only
|
||||
|
||||
labels:
|
||||
backup-type: stateful
|
||||
schedule: daily
|
||||
retention: short
|
||||
|
||||
# Exclude ephemeral resources
|
||||
excludedResources:
|
||||
- events
|
||||
- events.events.k8s.io
|
||||
- pipelineruns.tekton.dev
|
||||
- taskruns.tekton.dev
|
||||
- replicasets.apps
|
||||
- pods
|
||||
|
||||
# Use Kopia for volume backups
|
||||
snapshotVolumes: false
|
||||
defaultVolumesToFsBackup: true
|
||||
|
||||
|
||||
# 14 days retention
|
||||
ttl: 336h
|
||||
|
||||
# Pre-backup hooks for data consistency
|
||||
hooks:
|
||||
resources:
|
||||
# Cassandra: flush memtables to disk before backup
|
||||
- name: cassandra-flush
|
||||
includedNamespaces:
|
||||
- cassandra
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: cassandra
|
||||
pre:
|
||||
- exec:
|
||||
container: cassandra
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- nodetool flush
|
||||
timeout: 5m
|
||||
onError: Continue
|
||||
|
||||
# Gitea PostgreSQL: checkpoint before backup
|
||||
- name: gitea-postgres-checkpoint
|
||||
includedNamespaces:
|
||||
- gitea
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: postgresql
|
||||
app.kubernetes.io/instance: gitea
|
||||
pre:
|
||||
- exec:
|
||||
container: postgresql
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- psql -U postgres -c 'CHECKPOINT;'
|
||||
timeout: 2m
|
||||
onError: Continue
|
||||
|
||||
# Authentik PostgreSQL: checkpoint before backup
|
||||
- name: authentik-postgres-checkpoint
|
||||
includedNamespaces:
|
||||
- authentik
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: postgresql
|
||||
app.kubernetes.io/instance: authentik
|
||||
pre:
|
||||
- exec:
|
||||
container: postgresql
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- psql -U postgres -c 'CHECKPOINT;'
|
||||
timeout: 2m
|
||||
onError: Continue
|
||||
Reference in New Issue
Block a user