Files
k3s_auto_deploy/scripts/verify-deployment.sh

277 lines
9.0 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
CONFIG_FILE="$PROJECT_DIR/config/cluster-vars.yml"
# Source common library if available
if [ -f "$SCRIPT_DIR/lib/common.sh" ]; then
source "$SCRIPT_DIR/lib/common.sh"
else
log() { echo "[INFO] $1"; }
log_error() { echo "[ERROR] $1" >&2; }
log_warn() { echo "[WARN] $1"; }
fi
log "=== 验证K3s集群部署 ==="
echo ""
# Counters
TOTAL_CHECKS=0
PASSED_CHECKS=0
FAILED_CHECKS=0
WARNING_CHECKS=0
# Check function
check() {
local name="$1"
local command="$2"
local is_critical="${3:-true}"
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
echo -n "检查: $name ... "
if eval "$command" &>/dev/null; then
echo "✓ 通过"
PASSED_CHECKS=$((PASSED_CHECKS + 1))
return 0
else
if [ "$is_critical" = "true" ]; then
echo "✗ 失败"
FAILED_CHECKS=$((FAILED_CHECKS + 1))
else
echo "⚠ 警告"
WARNING_CHECKS=$((WARNING_CHECKS + 1))
fi
return 1
fi
}
# Detailed check with output
check_detailed() {
local name="$1"
local command="$2"
echo ""
echo "=========================================="
echo " $name"
echo "=========================================="
eval "$command"
echo ""
}
echo "=========================================="
echo " 1. 基础环境检查"
echo "=========================================="
echo ""
check "kubectl命令可用" "command -v kubectl"
check "kubectl连接集群" "kubectl cluster-info"
check "配置文件存在" "test -f $CONFIG_FILE"
if command -v yq &>/dev/null; then
check "yq工具可用" "command -v yq"
else
check "yq工具可用" "false" "false"
fi
echo ""
echo "=========================================="
echo " 2. K3s集群状态"
echo "=========================================="
echo ""
check "所有节点Ready" "kubectl get nodes | grep -v NotReady | grep Ready"
check "kube-system命名空间存在" "kubectl get namespace kube-system"
check "CoreDNS运行正常" "kubectl get deployment coredns -n kube-system -o jsonpath='{.status.availableReplicas}' | grep -v '^0$'"
check_detailed "节点状态" "kubectl get nodes -o wide"
check_detailed "系统Pod状态" "kubectl get pods -n kube-system"
echo ""
echo "=========================================="
echo " 3. Gitea服务检查"
echo "=========================================="
echo ""
if kubectl get namespace gitea &>/dev/null; then
check "Gitea命名空间存在" "kubectl get namespace gitea"
check "Gitea部署存在" "kubectl get deployment gitea -n gitea"
if kubectl get deployment gitea -n gitea &>/dev/null; then
check "Gitea Pod运行正常" "kubectl get pods -n gitea -l app.kubernetes.io/name=gitea -o jsonpath='{.items[0].status.phase}' | grep Running"
check "Gitea服务可访问" "kubectl get svc gitea-http -n gitea"
check_detailed "Gitea服务详情" "kubectl get all -n gitea"
# Get Gitea access info
GITEA_NODEPORT=$(kubectl get svc gitea-http -n gitea -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null)
if [ -z "$NODE_IP" ]; then
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null)
fi
echo "Gitea访问信息:"
echo " URL: http://$NODE_IP:$GITEA_NODEPORT"
echo ""
fi
else
check "Gitea命名空间存在" "false" "false"
log_warn "Gitea未部署"
fi
echo ""
echo "=========================================="
echo " 4. ArgoCD服务检查"
echo "=========================================="
echo ""
if kubectl get namespace argocd &>/dev/null; then
check "ArgoCD命名空间存在" "kubectl get namespace argocd"
check "ArgoCD Server部署存在" "kubectl get deployment argocd-server -n argocd"
if kubectl get deployment argocd-server -n argocd &>/dev/null; then
check "ArgoCD Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-server -o jsonpath='{.items[0].status.phase}' | grep Running"
check "ArgoCD Application Controller运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-application-controller -o jsonpath='{.items[0].status.phase}' | grep Running"
check "ArgoCD Repo Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-repo-server -o jsonpath='{.items[0].status.phase}' | grep Running"
check_detailed "ArgoCD服务详情" "kubectl get all -n argocd"
# Get ArgoCD access info
ARGOCD_NODEPORT=$(kubectl get svc argocd-server -n argocd -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
echo "ArgoCD访问信息:"
echo " URL: https://$NODE_IP:$ARGOCD_NODEPORT"
echo " 用户名: admin"
echo ""
fi
else
check "ArgoCD命名空间存在" "false" "false"
log_warn "ArgoCD未部署"
fi
echo ""
echo "=========================================="
echo " 5. HTTPS证书检查"
echo "=========================================="
echo ""
if kubectl get namespace cert-manager &>/dev/null; then
check "cert-manager命名空间存在" "kubectl get namespace cert-manager"
check "cert-manager部署存在" "kubectl get deployment cert-manager -n cert-manager"
if kubectl get deployment cert-manager -n cert-manager &>/dev/null; then
check "cert-manager运行正常" "kubectl get pods -n cert-manager -l app=cert-manager -o jsonpath='{.items[0].status.phase}' | grep Running"
# Check ClusterIssuers
if kubectl get clusterissuer &>/dev/null 2>&1; then
check_detailed "ClusterIssuer状态" "kubectl get clusterissuer"
fi
# Check Certificates
if kubectl get certificate -A &>/dev/null 2>&1; then
check_detailed "证书状态" "kubectl get certificate -A"
fi
fi
else
check "cert-manager命名空间存在" "false" "false"
log_warn "cert-manager未部署HTTPS功能不可用"
fi
echo ""
echo "=========================================="
echo " 6. GitOps工作流检查"
echo "=========================================="
echo ""
if kubectl get namespace argocd &>/dev/null; then
# Check for ArgoCD Applications
if kubectl get application -n argocd &>/dev/null 2>&1; then
APP_COUNT=$(kubectl get application -n argocd --no-headers 2>/dev/null | wc -l)
if [ "$APP_COUNT" -gt 0 ]; then
check "ArgoCD应用已创建" "test $APP_COUNT -gt 0"
check_detailed "ArgoCD应用状态" "kubectl get application -n argocd"
else
check "ArgoCD应用已创建" "false" "false"
log_warn "未找到ArgoCD应用"
fi
else
check "ArgoCD应用已创建" "false" "false"
log_warn "ArgoCD CRD可能未就绪"
fi
else
log_warn "ArgoCD未部署跳过GitOps检查"
fi
echo ""
echo "=========================================="
echo " 7. 存储检查"
echo "=========================================="
echo ""
check "PersistentVolume存在" "kubectl get pv" "false"
check "PersistentVolumeClaim存在" "kubectl get pvc -A" "false"
if kubectl get pvc -A &>/dev/null 2>&1; then
check_detailed "存储卷状态" "kubectl get pv,pvc -A"
fi
echo ""
echo "=========================================="
echo " 验证总结"
echo "=========================================="
echo ""
echo "总检查项: $TOTAL_CHECKS"
echo "通过: $PASSED_CHECKS"
echo "失败: $FAILED_CHECKS"
echo "警告: $WARNING_CHECKS"
echo ""
if [ $FAILED_CHECKS -eq 0 ]; then
log "✓ 所有关键检查通过!"
if [ $WARNING_CHECKS -gt 0 ]; then
log_warn "存在 $WARNING_CHECKS 个警告项,建议检查"
fi
echo ""
echo "=========================================="
echo " 快速访问指南"
echo "=========================================="
echo ""
if [ -n "${NODE_IP:-}" ]; then
if [ -n "${GITEA_NODEPORT:-}" ] && [ "$GITEA_NODEPORT" != "N/A" ]; then
echo "Gitea:"
echo " http://$NODE_IP:$GITEA_NODEPORT"
echo ""
fi
if [ -n "${ARGOCD_NODEPORT:-}" ] && [ "$ARGOCD_NODEPORT" != "N/A" ]; then
echo "ArgoCD:"
echo " https://$NODE_IP:$ARGOCD_NODEPORT"
echo " 用户名: admin"
echo ""
fi
fi
echo "常用命令:"
echo " 查看所有Pod: kubectl get pods -A"
echo " 查看节点: kubectl get nodes"
echo " 查看服务: kubectl get svc -A"
echo ""
exit 0
else
log_error "发现 $FAILED_CHECKS 个失败项,请检查并修复"
echo ""
echo "故障排查建议:"
echo " 1. 查看Pod日志: kubectl logs <pod-name> -n <namespace>"
echo " 2. 查看Pod详情: kubectl describe pod <pod-name> -n <namespace>"
echo " 3. 查看事件: kubectl get events -A --sort-by='.lastTimestamp'"
echo " 4. 重新部署: ./scripts/deploy-all.sh"
echo ""
exit 1
fi