fix: 将 k3s-ansible 作为普通目录添加
This commit is contained in:
276
scripts/verify-deployment.sh
Executable file
276
scripts/verify-deployment.sh
Executable file
@@ -0,0 +1,276 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
CONFIG_FILE="$PROJECT_DIR/config/cluster-vars.yml"
|
||||
|
||||
# Source common library if available
|
||||
if [ -f "$SCRIPT_DIR/lib/common.sh" ]; then
|
||||
source "$SCRIPT_DIR/lib/common.sh"
|
||||
else
|
||||
log() { echo "[INFO] $1"; }
|
||||
log_error() { echo "[ERROR] $1" >&2; }
|
||||
log_warn() { echo "[WARN] $1"; }
|
||||
fi
|
||||
|
||||
log "=== 验证K3s集群部署 ==="
|
||||
echo ""
|
||||
|
||||
# Counters
|
||||
TOTAL_CHECKS=0
|
||||
PASSED_CHECKS=0
|
||||
FAILED_CHECKS=0
|
||||
WARNING_CHECKS=0
|
||||
|
||||
# Check function
|
||||
check() {
|
||||
local name="$1"
|
||||
local command="$2"
|
||||
local is_critical="${3:-true}"
|
||||
|
||||
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
|
||||
echo -n "检查: $name ... "
|
||||
|
||||
if eval "$command" &>/dev/null; then
|
||||
echo "✓ 通过"
|
||||
PASSED_CHECKS=$((PASSED_CHECKS + 1))
|
||||
return 0
|
||||
else
|
||||
if [ "$is_critical" = "true" ]; then
|
||||
echo "✗ 失败"
|
||||
FAILED_CHECKS=$((FAILED_CHECKS + 1))
|
||||
else
|
||||
echo "⚠ 警告"
|
||||
WARNING_CHECKS=$((WARNING_CHECKS + 1))
|
||||
fi
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Detailed check with output
|
||||
check_detailed() {
|
||||
local name="$1"
|
||||
local command="$2"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " $name"
|
||||
echo "=========================================="
|
||||
eval "$command"
|
||||
echo ""
|
||||
}
|
||||
|
||||
echo "=========================================="
|
||||
echo " 1. 基础环境检查"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
check "kubectl命令可用" "command -v kubectl"
|
||||
check "kubectl连接集群" "kubectl cluster-info"
|
||||
check "配置文件存在" "test -f $CONFIG_FILE"
|
||||
|
||||
if command -v yq &>/dev/null; then
|
||||
check "yq工具可用" "command -v yq"
|
||||
else
|
||||
check "yq工具可用" "false" "false"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 2. K3s集群状态"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
check "所有节点Ready" "kubectl get nodes | grep -v NotReady | grep Ready"
|
||||
check "kube-system命名空间存在" "kubectl get namespace kube-system"
|
||||
check "CoreDNS运行正常" "kubectl get deployment coredns -n kube-system -o jsonpath='{.status.availableReplicas}' | grep -v '^0$'"
|
||||
|
||||
check_detailed "节点状态" "kubectl get nodes -o wide"
|
||||
check_detailed "系统Pod状态" "kubectl get pods -n kube-system"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 3. Gitea服务检查"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if kubectl get namespace gitea &>/dev/null; then
|
||||
check "Gitea命名空间存在" "kubectl get namespace gitea"
|
||||
check "Gitea部署存在" "kubectl get deployment gitea -n gitea"
|
||||
|
||||
if kubectl get deployment gitea -n gitea &>/dev/null; then
|
||||
check "Gitea Pod运行正常" "kubectl get pods -n gitea -l app.kubernetes.io/name=gitea -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||||
check "Gitea服务可访问" "kubectl get svc gitea-http -n gitea"
|
||||
|
||||
check_detailed "Gitea服务详情" "kubectl get all -n gitea"
|
||||
|
||||
# Get Gitea access info
|
||||
GITEA_NODEPORT=$(kubectl get svc gitea-http -n gitea -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
|
||||
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null)
|
||||
if [ -z "$NODE_IP" ]; then
|
||||
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null)
|
||||
fi
|
||||
|
||||
echo "Gitea访问信息:"
|
||||
echo " URL: http://$NODE_IP:$GITEA_NODEPORT"
|
||||
echo ""
|
||||
fi
|
||||
else
|
||||
check "Gitea命名空间存在" "false" "false"
|
||||
log_warn "Gitea未部署"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 4. ArgoCD服务检查"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if kubectl get namespace argocd &>/dev/null; then
|
||||
check "ArgoCD命名空间存在" "kubectl get namespace argocd"
|
||||
check "ArgoCD Server部署存在" "kubectl get deployment argocd-server -n argocd"
|
||||
|
||||
if kubectl get deployment argocd-server -n argocd &>/dev/null; then
|
||||
check "ArgoCD Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-server -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||||
check "ArgoCD Application Controller运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-application-controller -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||||
check "ArgoCD Repo Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-repo-server -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||||
|
||||
check_detailed "ArgoCD服务详情" "kubectl get all -n argocd"
|
||||
|
||||
# Get ArgoCD access info
|
||||
ARGOCD_NODEPORT=$(kubectl get svc argocd-server -n argocd -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
|
||||
|
||||
echo "ArgoCD访问信息:"
|
||||
echo " URL: https://$NODE_IP:$ARGOCD_NODEPORT"
|
||||
echo " 用户名: admin"
|
||||
echo ""
|
||||
fi
|
||||
else
|
||||
check "ArgoCD命名空间存在" "false" "false"
|
||||
log_warn "ArgoCD未部署"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 5. HTTPS证书检查"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if kubectl get namespace cert-manager &>/dev/null; then
|
||||
check "cert-manager命名空间存在" "kubectl get namespace cert-manager"
|
||||
check "cert-manager部署存在" "kubectl get deployment cert-manager -n cert-manager"
|
||||
|
||||
if kubectl get deployment cert-manager -n cert-manager &>/dev/null; then
|
||||
check "cert-manager运行正常" "kubectl get pods -n cert-manager -l app=cert-manager -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||||
|
||||
# Check ClusterIssuers
|
||||
if kubectl get clusterissuer &>/dev/null 2>&1; then
|
||||
check_detailed "ClusterIssuer状态" "kubectl get clusterissuer"
|
||||
fi
|
||||
|
||||
# Check Certificates
|
||||
if kubectl get certificate -A &>/dev/null 2>&1; then
|
||||
check_detailed "证书状态" "kubectl get certificate -A"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
check "cert-manager命名空间存在" "false" "false"
|
||||
log_warn "cert-manager未部署,HTTPS功能不可用"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 6. GitOps工作流检查"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if kubectl get namespace argocd &>/dev/null; then
|
||||
# Check for ArgoCD Applications
|
||||
if kubectl get application -n argocd &>/dev/null 2>&1; then
|
||||
APP_COUNT=$(kubectl get application -n argocd --no-headers 2>/dev/null | wc -l)
|
||||
if [ "$APP_COUNT" -gt 0 ]; then
|
||||
check "ArgoCD应用已创建" "test $APP_COUNT -gt 0"
|
||||
check_detailed "ArgoCD应用状态" "kubectl get application -n argocd"
|
||||
else
|
||||
check "ArgoCD应用已创建" "false" "false"
|
||||
log_warn "未找到ArgoCD应用"
|
||||
fi
|
||||
else
|
||||
check "ArgoCD应用已创建" "false" "false"
|
||||
log_warn "ArgoCD CRD可能未就绪"
|
||||
fi
|
||||
else
|
||||
log_warn "ArgoCD未部署,跳过GitOps检查"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 7. 存储检查"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
check "PersistentVolume存在" "kubectl get pv" "false"
|
||||
check "PersistentVolumeClaim存在" "kubectl get pvc -A" "false"
|
||||
|
||||
if kubectl get pvc -A &>/dev/null 2>&1; then
|
||||
check_detailed "存储卷状态" "kubectl get pv,pvc -A"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 验证总结"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
echo "总检查项: $TOTAL_CHECKS"
|
||||
echo "通过: $PASSED_CHECKS ✓"
|
||||
echo "失败: $FAILED_CHECKS ✗"
|
||||
echo "警告: $WARNING_CHECKS ⚠"
|
||||
echo ""
|
||||
|
||||
if [ $FAILED_CHECKS -eq 0 ]; then
|
||||
log "✓ 所有关键检查通过!"
|
||||
|
||||
if [ $WARNING_CHECKS -gt 0 ]; then
|
||||
log_warn "存在 $WARNING_CHECKS 个警告项,建议检查"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " 快速访问指南"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
if [ -n "${NODE_IP:-}" ]; then
|
||||
if [ -n "${GITEA_NODEPORT:-}" ] && [ "$GITEA_NODEPORT" != "N/A" ]; then
|
||||
echo "Gitea:"
|
||||
echo " http://$NODE_IP:$GITEA_NODEPORT"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if [ -n "${ARGOCD_NODEPORT:-}" ] && [ "$ARGOCD_NODEPORT" != "N/A" ]; then
|
||||
echo "ArgoCD:"
|
||||
echo " https://$NODE_IP:$ARGOCD_NODEPORT"
|
||||
echo " 用户名: admin"
|
||||
echo ""
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "常用命令:"
|
||||
echo " 查看所有Pod: kubectl get pods -A"
|
||||
echo " 查看节点: kubectl get nodes"
|
||||
echo " 查看服务: kubectl get svc -A"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
else
|
||||
log_error "发现 $FAILED_CHECKS 个失败项,请检查并修复"
|
||||
echo ""
|
||||
echo "故障排查建议:"
|
||||
echo " 1. 查看Pod日志: kubectl logs <pod-name> -n <namespace>"
|
||||
echo " 2. 查看Pod详情: kubectl describe pod <pod-name> -n <namespace>"
|
||||
echo " 3. 查看事件: kubectl get events -A --sort-by='.lastTimestamp'"
|
||||
echo " 4. 重新部署: ./scripts/deploy-all.sh"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user