277 lines
9.0 KiB
Bash
Executable File
277 lines
9.0 KiB
Bash
Executable File
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||
CONFIG_FILE="$PROJECT_DIR/config/cluster-vars.yml"
|
||
|
||
# Source common library if available
|
||
if [ -f "$SCRIPT_DIR/lib/common.sh" ]; then
|
||
source "$SCRIPT_DIR/lib/common.sh"
|
||
else
|
||
log() { echo "[INFO] $1"; }
|
||
log_error() { echo "[ERROR] $1" >&2; }
|
||
log_warn() { echo "[WARN] $1"; }
|
||
fi
|
||
|
||
log "=== 验证K3s集群部署 ==="
|
||
echo ""
|
||
|
||
# Counters
|
||
TOTAL_CHECKS=0
|
||
PASSED_CHECKS=0
|
||
FAILED_CHECKS=0
|
||
WARNING_CHECKS=0
|
||
|
||
# Check function
|
||
check() {
|
||
local name="$1"
|
||
local command="$2"
|
||
local is_critical="${3:-true}"
|
||
|
||
TOTAL_CHECKS=$((TOTAL_CHECKS + 1))
|
||
echo -n "检查: $name ... "
|
||
|
||
if eval "$command" &>/dev/null; then
|
||
echo "✓ 通过"
|
||
PASSED_CHECKS=$((PASSED_CHECKS + 1))
|
||
return 0
|
||
else
|
||
if [ "$is_critical" = "true" ]; then
|
||
echo "✗ 失败"
|
||
FAILED_CHECKS=$((FAILED_CHECKS + 1))
|
||
else
|
||
echo "⚠ 警告"
|
||
WARNING_CHECKS=$((WARNING_CHECKS + 1))
|
||
fi
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# Detailed check with output
|
||
check_detailed() {
|
||
local name="$1"
|
||
local command="$2"
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " $name"
|
||
echo "=========================================="
|
||
eval "$command"
|
||
echo ""
|
||
}
|
||
|
||
echo "=========================================="
|
||
echo " 1. 基础环境检查"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
check "kubectl命令可用" "command -v kubectl"
|
||
check "kubectl连接集群" "kubectl cluster-info"
|
||
check "配置文件存在" "test -f $CONFIG_FILE"
|
||
|
||
if command -v yq &>/dev/null; then
|
||
check "yq工具可用" "command -v yq"
|
||
else
|
||
check "yq工具可用" "false" "false"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 2. K3s集群状态"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
check "所有节点Ready" "kubectl get nodes | grep -v NotReady | grep Ready"
|
||
check "kube-system命名空间存在" "kubectl get namespace kube-system"
|
||
check "CoreDNS运行正常" "kubectl get deployment coredns -n kube-system -o jsonpath='{.status.availableReplicas}' | grep -v '^0$'"
|
||
|
||
check_detailed "节点状态" "kubectl get nodes -o wide"
|
||
check_detailed "系统Pod状态" "kubectl get pods -n kube-system"
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 3. Gitea服务检查"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
if kubectl get namespace gitea &>/dev/null; then
|
||
check "Gitea命名空间存在" "kubectl get namespace gitea"
|
||
check "Gitea部署存在" "kubectl get deployment gitea -n gitea"
|
||
|
||
if kubectl get deployment gitea -n gitea &>/dev/null; then
|
||
check "Gitea Pod运行正常" "kubectl get pods -n gitea -l app.kubernetes.io/name=gitea -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||
check "Gitea服务可访问" "kubectl get svc gitea-http -n gitea"
|
||
|
||
check_detailed "Gitea服务详情" "kubectl get all -n gitea"
|
||
|
||
# Get Gitea access info
|
||
GITEA_NODEPORT=$(kubectl get svc gitea-http -n gitea -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
|
||
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="ExternalIP")].address}' 2>/dev/null)
|
||
if [ -z "$NODE_IP" ]; then
|
||
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}' 2>/dev/null)
|
||
fi
|
||
|
||
echo "Gitea访问信息:"
|
||
echo " URL: http://$NODE_IP:$GITEA_NODEPORT"
|
||
echo ""
|
||
fi
|
||
else
|
||
check "Gitea命名空间存在" "false" "false"
|
||
log_warn "Gitea未部署"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 4. ArgoCD服务检查"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
if kubectl get namespace argocd &>/dev/null; then
|
||
check "ArgoCD命名空间存在" "kubectl get namespace argocd"
|
||
check "ArgoCD Server部署存在" "kubectl get deployment argocd-server -n argocd"
|
||
|
||
if kubectl get deployment argocd-server -n argocd &>/dev/null; then
|
||
check "ArgoCD Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-server -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||
check "ArgoCD Application Controller运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-application-controller -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||
check "ArgoCD Repo Server运行正常" "kubectl get pods -n argocd -l app.kubernetes.io/name=argocd-repo-server -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||
|
||
check_detailed "ArgoCD服务详情" "kubectl get all -n argocd"
|
||
|
||
# Get ArgoCD access info
|
||
ARGOCD_NODEPORT=$(kubectl get svc argocd-server -n argocd -o jsonpath='{.spec.ports[0].nodePort}' 2>/dev/null || echo "N/A")
|
||
|
||
echo "ArgoCD访问信息:"
|
||
echo " URL: https://$NODE_IP:$ARGOCD_NODEPORT"
|
||
echo " 用户名: admin"
|
||
echo ""
|
||
fi
|
||
else
|
||
check "ArgoCD命名空间存在" "false" "false"
|
||
log_warn "ArgoCD未部署"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 5. HTTPS证书检查"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
if kubectl get namespace cert-manager &>/dev/null; then
|
||
check "cert-manager命名空间存在" "kubectl get namespace cert-manager"
|
||
check "cert-manager部署存在" "kubectl get deployment cert-manager -n cert-manager"
|
||
|
||
if kubectl get deployment cert-manager -n cert-manager &>/dev/null; then
|
||
check "cert-manager运行正常" "kubectl get pods -n cert-manager -l app=cert-manager -o jsonpath='{.items[0].status.phase}' | grep Running"
|
||
|
||
# Check ClusterIssuers
|
||
if kubectl get clusterissuer &>/dev/null 2>&1; then
|
||
check_detailed "ClusterIssuer状态" "kubectl get clusterissuer"
|
||
fi
|
||
|
||
# Check Certificates
|
||
if kubectl get certificate -A &>/dev/null 2>&1; then
|
||
check_detailed "证书状态" "kubectl get certificate -A"
|
||
fi
|
||
fi
|
||
else
|
||
check "cert-manager命名空间存在" "false" "false"
|
||
log_warn "cert-manager未部署,HTTPS功能不可用"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 6. GitOps工作流检查"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
if kubectl get namespace argocd &>/dev/null; then
|
||
# Check for ArgoCD Applications
|
||
if kubectl get application -n argocd &>/dev/null 2>&1; then
|
||
APP_COUNT=$(kubectl get application -n argocd --no-headers 2>/dev/null | wc -l)
|
||
if [ "$APP_COUNT" -gt 0 ]; then
|
||
check "ArgoCD应用已创建" "test $APP_COUNT -gt 0"
|
||
check_detailed "ArgoCD应用状态" "kubectl get application -n argocd"
|
||
else
|
||
check "ArgoCD应用已创建" "false" "false"
|
||
log_warn "未找到ArgoCD应用"
|
||
fi
|
||
else
|
||
check "ArgoCD应用已创建" "false" "false"
|
||
log_warn "ArgoCD CRD可能未就绪"
|
||
fi
|
||
else
|
||
log_warn "ArgoCD未部署,跳过GitOps检查"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 7. 存储检查"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
check "PersistentVolume存在" "kubectl get pv" "false"
|
||
check "PersistentVolumeClaim存在" "kubectl get pvc -A" "false"
|
||
|
||
if kubectl get pvc -A &>/dev/null 2>&1; then
|
||
check_detailed "存储卷状态" "kubectl get pv,pvc -A"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 验证总结"
|
||
echo "=========================================="
|
||
echo ""
|
||
echo "总检查项: $TOTAL_CHECKS"
|
||
echo "通过: $PASSED_CHECKS ✓"
|
||
echo "失败: $FAILED_CHECKS ✗"
|
||
echo "警告: $WARNING_CHECKS ⚠"
|
||
echo ""
|
||
|
||
if [ $FAILED_CHECKS -eq 0 ]; then
|
||
log "✓ 所有关键检查通过!"
|
||
|
||
if [ $WARNING_CHECKS -gt 0 ]; then
|
||
log_warn "存在 $WARNING_CHECKS 个警告项,建议检查"
|
||
fi
|
||
|
||
echo ""
|
||
echo "=========================================="
|
||
echo " 快速访问指南"
|
||
echo "=========================================="
|
||
echo ""
|
||
|
||
if [ -n "${NODE_IP:-}" ]; then
|
||
if [ -n "${GITEA_NODEPORT:-}" ] && [ "$GITEA_NODEPORT" != "N/A" ]; then
|
||
echo "Gitea:"
|
||
echo " http://$NODE_IP:$GITEA_NODEPORT"
|
||
echo ""
|
||
fi
|
||
|
||
if [ -n "${ARGOCD_NODEPORT:-}" ] && [ "$ARGOCD_NODEPORT" != "N/A" ]; then
|
||
echo "ArgoCD:"
|
||
echo " https://$NODE_IP:$ARGOCD_NODEPORT"
|
||
echo " 用户名: admin"
|
||
echo ""
|
||
fi
|
||
fi
|
||
|
||
echo "常用命令:"
|
||
echo " 查看所有Pod: kubectl get pods -A"
|
||
echo " 查看节点: kubectl get nodes"
|
||
echo " 查看服务: kubectl get svc -A"
|
||
echo ""
|
||
|
||
exit 0
|
||
else
|
||
log_error "发现 $FAILED_CHECKS 个失败项,请检查并修复"
|
||
echo ""
|
||
echo "故障排查建议:"
|
||
echo " 1. 查看Pod日志: kubectl logs <pod-name> -n <namespace>"
|
||
echo " 2. 查看Pod详情: kubectl describe pod <pod-name> -n <namespace>"
|
||
echo " 3. 查看事件: kubectl get events -A --sort-by='.lastTimestamp'"
|
||
echo " 4. 重新部署: ./scripts/deploy-all.sh"
|
||
echo ""
|
||
exit 1
|
||
fi
|