Enhance LLM sandbox with persistent caching, helper scripts, and improved UX

Added comprehensive improvements to ClawdBox for better LLM agent experience:

- Tools: Added tree, tmux, htop, strace, file, less for enhanced debugging
- Python packages: httpie, pyyaml, requests, black, ipython pre-installed
- Persistent caching: pip/npm caches now survive container restarts
- Git config persistence: .gitconfig auto-links from /data volume
- Shell improvements: colored prompt, aliases (ll, k, dc), 10k line history
- Helper scripts: ConfigMap with disk-usage, health-check, clean-workspace, install-tools
- Environment variables: TERM, TZ, DEBIAN_FRONTEND for better compatibility
- Makefile: Common operations (build, deploy, logs, shell, health-check)
- Documentation: Comprehensive README with troubleshooting and workflows

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 23:51:26 +11:00
parent 8fe712cda7
commit e039f77f0e
5 changed files with 562 additions and 8 deletions

View File

@@ -29,6 +29,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
ripgrep \ ripgrep \
ncdu \ ncdu \
sudo \ sudo \
tree \
tmux \
htop \
strace \
file \
less \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \ && apt-get install -y nodejs \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
@@ -44,6 +50,14 @@ RUN OC_VERSION="stable" && \
tar -xvf /tmp/oc.tar.gz -C /usr/bin/ oc && \ tar -xvf /tmp/oc.tar.gz -C /usr/bin/ oc && \
rm /tmp/oc.tar.gz && chmod +x /usr/bin/oc rm /tmp/oc.tar.gz && chmod +x /usr/bin/oc
# Install Python tools for LLM work
RUN pip3 install --no-cache-dir --break-system-packages \
httpie \
pyyaml \
requests \
black \
ipython
# Setup SSH directory & Config for OpenShift (Random UID support) # Setup SSH directory & Config for OpenShift (Random UID support)
RUN mkdir -p /var/run/sshd && \ RUN mkdir -p /var/run/sshd && \
chmod 775 /var/run/sshd chmod 775 /var/run/sshd
@@ -73,9 +87,9 @@ RUN useradd -m -s /bin/bash -u 1000 claw && \
# Prepare volume mount point # Prepare volume mount point
# Mount persistent storage here # Mount persistent storage here
VOLUME /data VOLUME /data
RUN mkdir -p /data && \ RUN mkdir -p /data /data/.cache/pip /data/.cache/npm /data/.local && \
chown claw:claw /data && \ chown -R claw:claw /data && \
chmod 775 /data chmod -R 775 /data
# Set working directory to the persistent volume # Set working directory to the persistent volume
WORKDIR /data WORKDIR /data
@@ -86,6 +100,20 @@ EXPOSE 2222
# Switch to user 'claw' (UID 1000) # Switch to user 'claw' (UID 1000)
USER claw USER claw
# Link package managers to persistent storage
ENV PIP_CACHE_DIR=/data/.cache/pip
ENV npm_config_cache=/data/.cache/npm
ENV XDG_CACHE_HOME=/data/.cache
# Add better default shell experience
RUN echo 'export PS1="\[\e[32m\]\u@clawdbox\[\e[m\]:\[\e[34m\]\w\[\e[m\]\$ "' >> /home/claw/.bashrc && \
echo 'alias ll="ls -lah"' >> /home/claw/.bashrc && \
echo 'alias k="kubectl"' >> /home/claw/.bashrc && \
echo 'alias dc="docker"' >> /home/claw/.bashrc && \
echo 'export HISTFILE=/data/.bash_history' >> /home/claw/.bashrc && \
echo 'export HISTSIZE=10000' >> /home/claw/.bashrc && \
echo 'export HISTFILESIZE=10000' >> /home/claw/.bashrc
# Start SSH daemon # Start SSH daemon
# The keys are generated if they don't exist on the persistent volume # The keys are generated if they don't exist on the persistent volume
CMD ["/bin/bash", "-c", "\ CMD ["/bin/bash", "-c", "\
@@ -97,4 +125,5 @@ for keytype in rsa ecdsa ed25519; do \
fi; \ fi; \
done && \ done && \
chmod 600 /data/ssh/ssh_host_*_key && \ chmod 600 /data/ssh/ssh_host_*_key && \
if [ -f /data/.gitconfig ]; then ln -sf /data/.gitconfig ~/.gitconfig; fi && \
/usr/sbin/sshd -D -e -f /etc/ssh/sshd_config"] /usr/sbin/sshd -D -e -f /etc/ssh/sshd_config"]

80
Makefile Normal file
View File

@@ -0,0 +1,80 @@
.PHONY: build push deploy redeploy logs shell disk-usage clean-cache health-check help
IMAGE_NAME=default-route-openshift-image-registry.apps.lab.apilab.us/clawdbox/clawdbox:latest
NAMESPACE=clawdbox
SSH_HOST=clawdbox.apps.lab.apilab.us
SSH_PORT=2222
SSH_USER=claw
help:
@echo "ClawdBox Makefile - Common Operations"
@echo ""
@echo "Build & Deploy:"
@echo " make build - Build Docker image"
@echo " make push - Build and push image to registry"
@echo " make deploy - Deploy all manifests to cluster"
@echo " make redeploy - Push image and restart deployment"
@echo ""
@echo "Operations:"
@echo " make logs - Stream pod logs"
@echo " make shell - SSH into container"
@echo " make disk-usage - Check /data disk usage"
@echo " make clean-cache - Clear pip/npm caches"
@echo " make health-check - Run comprehensive health diagnostics"
@echo ""
@echo "Debugging:"
@echo " make describe - Describe deployment"
@echo " make events - Show namespace events"
@echo " make restart - Restart deployment (without push)"
build:
docker build -t $(IMAGE_NAME) .
push: build
docker push $(IMAGE_NAME)
deploy:
kubectl apply -f manifests/
redeploy: push
kubectl rollout restart deployment/clawdbox -n $(NAMESPACE)
@echo "Waiting for rollout to complete..."
kubectl rollout status deployment/clawdbox -n $(NAMESPACE)
restart:
kubectl rollout restart deployment/clawdbox -n $(NAMESPACE)
kubectl rollout status deployment/clawdbox -n $(NAMESPACE)
logs:
kubectl logs -f -n $(NAMESPACE) deployment/clawdbox
shell:
ssh -p $(SSH_PORT) $(SSH_USER)@$(SSH_HOST)
disk-usage:
@ssh -p $(SSH_PORT) $(SSH_USER)@$(SSH_HOST) "df -h /data && echo '' && echo 'Top 10 Largest Directories:' && du -h /data 2>/dev/null | sort -rh | head -10"
clean-cache:
@ssh -p $(SSH_PORT) $(SSH_USER)@$(SSH_HOST) "rm -rf /data/.cache/pip/* /data/.cache/npm/* && echo 'Package caches cleared'"
health-check:
@ssh -p $(SSH_PORT) $(SSH_USER)@$(SSH_HOST) "/data/scripts/health-check.sh"
describe:
kubectl describe deployment/clawdbox -n $(NAMESPACE)
events:
kubectl get events -n $(NAMESPACE) --sort-by='.lastTimestamp'
status:
@echo "=== Deployment Status ==="
@kubectl get deployment -n $(NAMESPACE)
@echo ""
@echo "=== Pod Status ==="
@kubectl get pods -n $(NAMESPACE)
@echo ""
@echo "=== Service Status ==="
@kubectl get svc -n $(NAMESPACE)
@echo ""
@echo "=== Route Status ==="
@kubectl get route -n $(NAMESPACE) 2>/dev/null || echo "No routes found (not on OpenShift?)"

253
README.md
View File

@@ -10,11 +10,14 @@ This container provides a stable, tool-rich environment for the AI agent to:
- Use tools that aren't available in the minimal agent environment. - Use tools that aren't available in the minimal agent environment.
## Tools Included ## Tools Included
- **Core:** curl, wget, git, jq, yq, unzip, tar, vim/nano - **Core:** curl, wget, git, jq, yq, unzip, tar, vim/nano, tree, less
- **Dev:** python3 (pip/venv), build-essential, nodejs, npm - **Dev:** python3 (pip/venv), build-essential, nodejs (v20), npm
- **Network:** ping, dnsutils, net-tools - **Python Libraries:** httpie, pyyaml, requests, black, ipython
- **Network:** ping, dnsutils, net-tools, openssh-server/client, sshpass
- **Media:** ffmpeg - **Media:** ffmpeg
- **Access:** openssh-server - **Monitoring:** htop, tmux, ncdu, strace
- **Kubernetes:** OpenShift CLI (oc)
- **Search:** ripgrep (fast grep alternative)
## Deployment (OpenShift / K8s) ## Deployment (OpenShift / K8s)
@@ -47,4 +50,244 @@ This container provides a stable, tool-rich environment for the AI agent to:
``` ```
## Access ## Access
Connect via SSH using the `claw` user (passwordless sudo enabled). Connect via SSH using the `claw` user (passwordless sudo enabled):
```bash
ssh -p 2222 claw@clawdbox.apps.lab.apilab.us
# or
make shell
```
## Persistent Storage Structure
The `/data` volume preserves data across container restarts:
```
/data/
├── ssh/ # SSH host keys (auto-generated on first run)
├── scripts/ # Helper scripts (from ConfigMap, read-only)
│ ├── disk-usage.sh
│ ├── health-check.sh
│ ├── clean-workspace.sh
│ └── install-tools.sh
├── .cache/
│ ├── pip/ # Python package cache (persisted)
│ └── npm/ # Node package cache (persisted)
├── .local/ # User-installed Python packages (pip install --user)
├── .gitconfig # Git configuration (create to persist)
├── .bash_history # Command history (persistent)
└── [your workspace] # Your work files
```
**Storage:** 10Gi PersistentVolumeClaim (ReadWriteOnce)
## Common Tasks
### Quick Operations (Using Makefile)
```bash
make help # Show all available commands
make logs # Stream container logs
make shell # SSH into container
make disk-usage # Check storage usage
make clean-cache # Clear package caches
make redeploy # Rebuild, push, and restart
```
### Install Python Packages
Packages are cached in `/data/.cache/pip` and survive restarts:
```bash
pip3 install --user pandas numpy scikit-learn
# Installs to /data/.local/lib/python3.*/site-packages/
```
### Install Node Packages Globally
```bash
npm install -g typescript ts-node
# Cached in /data/.cache/npm
```
### Persist Git Configuration
```bash
# Inside the container:
git config --global user.name "Your Name"
git config --global user.email "you@example.com"
# Save to persistent storage:
cp ~/.gitconfig /data/.gitconfig
# (Will auto-link on next restart)
```
### Check Disk Usage
```bash
# Quick overview:
df -h /data
# Use helper script for detailed report:
/data/scripts/disk-usage.sh
# Interactive explorer:
ncdu /data
# Top 10 largest directories:
du -h /data | sort -rh | head -10
```
### Helper Scripts
Pre-loaded scripts available in `/data/scripts/`:
```bash
# Comprehensive health check:
/data/scripts/health-check.sh
# Disk usage report:
/data/scripts/disk-usage.sh
# Interactive workspace cleanup:
/data/scripts/clean-workspace.sh
# Install common tools:
/data/scripts/install-tools.sh
```
### Shell Features
The shell includes several quality-of-life improvements:
- **Colored prompt:** `claw@clawdbox:/data$` (green user, blue path)
- **Persistent history:** Command history saved to `/data/.bash_history`
- **Useful aliases:**
- `ll` - detailed file listing (`ls -lah`)
- `k` - kubectl shortcut
- `dc` - docker shortcut
- **10,000 line history:** Never lose your commands
## Troubleshooting
### SSH Connection Refused
**Problem:** Cannot connect via SSH
**Diagnosis:**
```bash
# Check if pod is running:
kubectl get pods -n clawdbox
# Check pod logs:
make logs
# or
kubectl logs -n clawdbox deployment/clawdbox
```
**Common causes:**
- Pod still starting (wait for startup probe to pass)
- SSH keys not mounted correctly (check secret exists)
- Route not configured (check `kubectl get route -n clawdbox`)
### Out of Disk Space
**Problem:** `/data` volume is full
**Diagnosis:**
```bash
make disk-usage
# or
ssh -p 2222 claw@clawdbox.apps.lab.apilab.us "df -h /data"
```
**Solutions:**
```bash
# Clear package caches:
make clean-cache
# Find large directories:
ncdu /data
# Clear specific caches manually:
rm -rf /data/.cache/pip/*
rm -rf /data/.cache/npm/*
```
### Slow Package Installs
**Problem:** `pip install` or `npm install` is slow
**Diagnosis:**
Check if cache directories are properly configured:
```bash
ssh -p 2222 claw@clawdbox.apps.lab.apilab.us
echo $PIP_CACHE_DIR # Should show: /data/.cache/pip
echo $npm_config_cache # Should show: /data/.cache/npm
ls -la /data/.cache/
```
**Solution:**
If environment variables are missing, rebuild the container:
```bash
make redeploy
```
### Pod Stuck in CrashLoopBackOff
**Problem:** Container won't start
**Diagnosis:**
```bash
kubectl describe pod -n clawdbox -l app=clawdbox
kubectl logs -n clawdbox -l app=clawdbox --previous
```
**Common causes:**
- PVC not bound (check `kubectl get pvc -n clawdbox`)
- SSH host key generation failed (check logs for errors)
- Resource limits too low (increase in deployment.yaml)
### Deployment Status
**Quick health check:**
```bash
make status
# Shows: deployment, pods, services, routes
```
## Development Workflow
### Local Development
```bash
# 1. Make changes to Dockerfile or manifests
vim Dockerfile
# 2. Build and test locally (optional)
docker build -t clawdbox:test .
# 3. Deploy to cluster
make redeploy
# 4. Watch for successful rollout
make logs
```
### Adding New Tools
Edit the Dockerfile and add to the `apt-get install` section:
```dockerfile
RUN apt-get update && apt-get install -y --no-install-recommends \
# ... existing tools ...
your-new-tool \
&& rm -rf /var/lib/apt/lists/*
```
Then:
```bash
make redeploy
```
## Security Notes
- **Non-root:** Container runs as UID 1000 (`claw` user)
- **SSH:** Public key authentication only (no passwords)
- **Sudo:** Passwordless sudo available for `claw` user
- **Capabilities:** All capabilities dropped except `NET_BIND_SERVICE`
- **Network:** Ingress restricted to SSH port (2222)
## Resource Limits
**Requests:**
- CPU: 500m
- Memory: 256Mi
**Limits:**
- CPU: 2000m (2 cores)
- Memory: 2Gi
Adjust in `manifests/deployment.yaml` if needed for heavy workloads.

189
manifests/configmap.yaml Normal file
View File

@@ -0,0 +1,189 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: clawdbox-scripts
namespace: clawdbox
labels:
app: clawdbox
data:
disk-usage.sh: |
#!/bin/bash
# Quick disk usage report for /data volume
echo "=== Data Volume Usage ==="
df -h /data
echo -e "\n=== Top 10 Largest Directories ==="
du -h /data 2>/dev/null | sort -rh | head -10
echo -e "\n=== Cache Sizes ==="
if [ -d /data/.cache/pip ]; then
echo "pip cache: $(du -sh /data/.cache/pip 2>/dev/null | cut -f1)"
fi
if [ -d /data/.cache/npm ]; then
echo "npm cache: $(du -sh /data/.cache/npm 2>/dev/null | cut -f1)"
fi
health-check.sh: |
#!/bin/bash
# Comprehensive health diagnostics for troubleshooting
echo "=== ClawdBox Health Check ==="
echo "Timestamp: $(date)"
echo ""
echo "=== SSH Daemon Status ==="
if ps aux | grep -v grep | grep sshd > /dev/null; then
echo "✓ SSH daemon is running"
ps aux | grep sshd | grep -v grep
else
echo "✗ SSH daemon is NOT running"
fi
echo ""
echo "=== Disk Space ==="
df -h /data
echo ""
echo "=== Memory Usage ==="
free -h
echo ""
echo "=== Network Connectivity ==="
if ping -c 1 8.8.8.8 > /dev/null 2>&1; then
echo "✓ Internet connectivity OK"
else
echo "✗ No internet connectivity"
fi
echo ""
echo "=== Environment Variables ==="
echo "PIP_CACHE_DIR: $PIP_CACHE_DIR"
echo "npm_config_cache: $npm_config_cache"
echo "HISTFILE: $HISTFILE"
echo ""
echo "=== Persistent Directories ==="
ls -lah /data/.cache/ 2>/dev/null || echo "No .cache directory"
echo ""
echo "=== Current Working Directory ==="
pwd
echo ""
echo "=== User Information ==="
whoami
id
clean-workspace.sh: |
#!/bin/bash
# Interactive workspace cleanup helper
echo "=== ClawdBox Workspace Cleanup ==="
echo ""
# Show current usage
echo "Current disk usage:"
df -h /data
echo ""
# Offer cleanup options
echo "What would you like to clean?"
echo "1) Package caches (pip + npm)"
echo "2) Temporary files"
echo "3) Both caches and temp files"
echo "4) Show what's using space (no deletion)"
echo "5) Cancel"
echo ""
read -p "Enter choice [1-5]: " choice
case $choice in
1)
echo "Cleaning package caches..."
rm -rf /data/.cache/pip/* /data/.cache/npm/*
echo "✓ Package caches cleared"
;;
2)
echo "Cleaning temporary files..."
find /data -type f -name "*.tmp" -delete 2>/dev/null
find /data -type f -name "*.log" -mtime +7 -delete 2>/dev/null
echo "✓ Temporary files cleaned"
;;
3)
echo "Cleaning caches and temporary files..."
rm -rf /data/.cache/pip/* /data/.cache/npm/*
find /data -type f -name "*.tmp" -delete 2>/dev/null
find /data -type f -name "*.log" -mtime +7 -delete 2>/dev/null
echo "✓ All cleaned"
;;
4)
echo "Analyzing disk usage..."
du -h /data | sort -rh | head -20
;;
5)
echo "Cleanup cancelled"
exit 0
;;
*)
echo "Invalid choice"
exit 1
;;
esac
echo ""
echo "New disk usage:"
df -h /data
install-tools.sh: |
#!/bin/bash
# Helper script to install common additional tools
echo "=== ClawdBox Additional Tools Installer ==="
echo ""
echo "This script helps install commonly requested tools."
echo "All installations are temporary and will be lost on container restart."
echo ""
echo "Available tool categories:"
echo "1) Data science (pandas, numpy, matplotlib)"
echo "2) Web scraping (beautifulsoup4, selenium)"
echo "3) CLI tools (fzf, bat, fd-find)"
echo "4) Custom (specify packages)"
echo "5) Exit"
echo ""
read -p "Enter choice [1-5]: " choice
case $choice in
1)
echo "Installing data science packages..."
pip3 install --user pandas numpy matplotlib scipy
;;
2)
echo "Installing web scraping packages..."
pip3 install --user beautifulsoup4 selenium requests-html
;;
3)
echo "Installing CLI tools..."
echo "Note: These require sudo and apt-get"
sudo apt-get update
sudo apt-get install -y fzf bat fd-find
;;
4)
read -p "Enter package names (space-separated): " packages
echo "Installing: $packages"
pip3 install --user $packages
;;
5)
echo "Exiting"
exit 0
;;
*)
echo "Invalid choice"
exit 1
;;
esac
echo ""
echo "✓ Installation complete"

View File

@@ -51,6 +51,13 @@ spec:
ports: ports:
- containerPort: 2222 - containerPort: 2222
name: ssh name: ssh
env:
- name: TERM
value: xterm-256color
- name: DEBIAN_FRONTEND
value: noninteractive
- name: TZ
value: UTC
startupProbe: startupProbe:
tcpSocket: tcpSocket:
port: ssh port: ssh
@@ -77,6 +84,8 @@ spec:
name: ssh-working name: ssh-working
- mountPath: /home/claw/.kube - mountPath: /home/claw/.kube
name: kubeconfig-secret name: kubeconfig-secret
- mountPath: /data/scripts
name: helper-scripts
resources: resources:
limits: limits:
memory: "2Gi" memory: "2Gi"
@@ -103,3 +112,7 @@ spec:
secretName: kube secretName: kube
- name: ssh-working - name: ssh-working
emptyDir: {} emptyDir: {}
- name: helper-scripts
configMap:
name: clawdbox-scripts
defaultMode: 0755