1. 目标项目

部分较老的项目,如果使用20255月份之前的安装包,在升级时需要做启动脚本的升级。 如果使用2025 5月份之后的安装包部署,则不需要下面的步骤,部署时已经自动处理。 

需要修改的文件包括:docker-compose.yml, run.sh ,需要添加的文件有scripts/agent/healthcheck.sh。

2. docker-compose.yml

需修改docker-compose.yml,修改后的完整脚本(不能直接复制,应按需修改目录)

version: "3"
services:
  nginx:
    image: "nginx:alpine"
    ports:
      - "9070:9060"
    links:
      - "aiweb:aiweb"
    volumes:
      - /home/smartbi-nla-agent/nginx_conf:/etc/nginx/conf.d/
    depends_on:
      - aiweb
    restart: always

  propertycenter:
    image: "aienv4j:v9.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/ailogs
      - /home/smartbi-nla-agent/services:/jars
      - /home/smartbi-nla-agent/scripts/propertycenter:/scripts
    links:
      - "redis:airedis"
    entrypoint:
      - /bin/sh
      - /scripts/docker-entrypoint.sh
    depends_on:
      - redis
    restart: always

  aiweb: 
    image: "aienv4j:v9.1"
    ports: 
      - "9060:9082" 
    volumes:
      - /home/smartbi-nla-agent/logs:/ailogs
      - /home/smartbi-nla-agent/services:/jars
      - /home/smartbi-nla-agent/scripts/aiweb:/scripts
    links: 
      - "redis:airedis"
    entrypoint:
      - /bin/sh 
      - /scripts/docker-entrypoint.sh
    depends_on: 
      - redis
    restart: always

  admin: 
    image: "aienv4j:v9.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/ailogs
      - /home/smartbi-nla-agent/services:/jars
      - /home/smartbi-nla-agent/scripts/admin:/scripts
    links: 
      - "redis:airedis"
      - "propertycenter:propertycenter"
      - "mongo:aimongo"
    entrypoint:
      - /bin/sh 
      - /scripts/docker-entrypoint.sh
    depends_on: 
      - redis
      - propertycenter
      - mongo
    restart: always       

  modulemanager: 
    image: "aienv4j:v9.1"
    ports:
      - "9081:9081"
    volumes:
      - /home/smartbi-nla-agent/logs:/ailogs
      - /home/smartbi-nla-agent/services:/jars
      - /home/smartbi-nla-agent/scripts/modulemanager:/scripts
    links: 
      - "redis:airedis"
      - "propertycenter:propertycenter"
    entrypoint:
      - /bin/sh 
      - /scripts/docker-entrypoint.sh
    depends_on: 
      - redis
      - propertycenter
    restart: always    

  aibus: 
    image: "aienv4j:v9.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/ailogs
      - /home/smartbi-nla-agent/services:/jars
      - /home/smartbi-nla-agent/scripts/aibus:/scripts
    links: 
      - "redis:airedis"
      - "propertycenter:propertycenter"
    entrypoint:
      - /bin/sh 
      - /scripts/docker-entrypoint.sh
    depends_on: 
      - redis
      - propertycenter
    restart: always

  smartbiproxy: 
    image: "aienv4j:v9.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/ailogs
      - /home/smartbi-nla-agent/services:/jars
      - /home/smartbi-nla-agent/scripts/smartbiproxy:/scripts
    links: 
      - "redis:airedis"
      - "propertycenter:propertycenter"
    entrypoint:
      - /bin/sh
      - /scripts/docker-entrypoint.sh
    depends_on: 
      - redis
      - propertycenter
    restart: always

  nl2sql_llm:
    image: "aienv4py-llm:v6"
    volumes:
      - /home/smartbi-nla-agent/logs:/logs
      - /home/smartbi-nla-agent/scripts/nl2sql_llm:/scripts
      - /home/smartbi-nla-agent/services:/services
      - /home/smartbi-nla-agent/data/nl2sql_data:/data
      - /home/smartbi-nla-agent/backup/backup_data:/backup_data
    links: 
      - "redis:airedis"
      - "admin:main"
    depends_on: 
      - redis
    entrypoint:
      - /bin/sh 
      - /scripts/docker-entrypoint.sh
    restart: always

  jupyter:
    image: "aienv4py-agent:v1.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/logs
      - /home/smartbi-nla-agent/scripts/agent:/scripts
      - /home/smartbi-nla-agent/services:/services
    links:
      - "redis:airedis"
      - "admin:main"
    depends_on:
      - redis
    entrypoint:
      - /bin/sh
      - /scripts/docker-entrypoint-runtime.sh
    restart: always
    deploy:
      resources:
        limits:
          memory: 5G

  agent:
    image: "aienv4py-agent:v1.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/logs
      - /home/smartbi-nla-agent/scripts/agent:/scripts
      - /home/smartbi-nla-agent/services:/services
      - /home/smartbi-nla-agent/data/nl2sql_data:/data
      - /var/run/docker.sock:/var/run/docker.sock
    healthcheck:
      test: ["CMD-SHELL", "curl -f -X POST http://localhost:8085/api/health -H 'Content-Type: application/json' -d '{}' || exit 1"]
      interval: 90s
      timeout: 10s
      retries: 3
      start_period: 300s
    links: 
      - "redis:airedis"
      - "admin:main"
      - "jupyter:jupyter"
    depends_on: 
      - redis
      - jupyter
    entrypoint:
      - /bin/sh
      - -c
      - |
        set -e
        /scripts/docker-entrypoint.sh &
        /scripts/healthcheck.sh &
        wait
    restart: always

  mongo:
    image: "mongo:4"
    volumes:
      - /home/smartbi-nla-agent/data/mongo/data:/data/db
    ports:
      - "27017:27017"
    environment:
      MONGO_INITDB_ROOT_USERNAME: root
      MONGO_INITDB_ROOT_PASSWORD: smartbi1#nla
    restart: always
  
  redis:
    image: "airedis:v5"
    ports:
      - "6688:3306"
    volumes:
      - /home/smartbi-nla-agent/data/config:/var/lib/mysql
      - /home/smartbi-nla-agent/backup/config:/backup
    restart: always

修改点1:jupyter服务加了内存限制

jupyter:
    image: "aienv4py-agent:v1.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/logs
      - /home/smartbi-nla-agent/scripts/agent:/scripts
      - /home/smartbi-nla-agent/services:/services
    links:
      - "redis:airedis"
      - "admin:main"
    depends_on:
      - redis
    entrypoint:
      - /bin/sh
      - /scripts/docker-entrypoint-runtime.sh
    restart: always
    deploy:
      resources:
        limits:
          memory: 5G
 deploy:
      resources:
        limits:
          memory: 5G

修改点2:agent服务加了健康检查

 agent:
    image: "aienv4py-agent:v1.1"
    volumes:
      - /home/smartbi-nla-agent/logs:/logs
      - /home/smartbi-nla-agent/scripts/agent:/scripts
      - /home/smartbi-nla-agent/services:/services
      - /home/smartbi-nla-agent/data/nl2sql_data:/data
      - /var/run/docker.sock:/var/run/docker.sock
    healthcheck:
      test: ["CMD-SHELL", "curl -f -X POST http://localhost:8085/api/health -H 'Content-Type: application/json' -d '{}' || exit 1"]
      interval: 90s
      timeout: 10s
      retries: 3
      start_period: 300s
    links: 
      - "redis:airedis"
      - "admin:main"
      - "jupyter:jupyter"
    depends_on: 
      - redis
      - jupyter
    entrypoint:
      - /bin/sh
      - -c
      - |
        set -e
        /scripts/docker-entrypoint.sh &
        /scripts/healthcheck.sh &
        wait
    restart: always
healthcheck:
      test: ["CMD-SHELL", "curl -f -X POST http://localhost:8085/api/health -H 'Content-Type: application/json' -d '{}' || exit 1"]
      interval: 90s
      timeout: 10s
      retries: 3
      start_period: 300s
entrypoint:
      - /bin/sh
      - -c
      - |
        set -e
        /scripts/docker-entrypoint.sh &
        /scripts/healthcheck.sh &
        wait

3.run.sh

sh ./debug_run.sh
docker-compose up -d propertycenter
echo -e "\033[31mWaiting for propertycenter to be up... \033[0m"
sleep 10
docker-compose up -d --scale jupyter=2 --scale agent=2

根据实际情况,修改启动的jupyter个数和agent个数。

4.健康检查脚本

在scripts/agent目录下添加文件:healthcheck.sh

vi scripts/agent/healthcheck.sh

#!/bin/sh

MAX_FAILS=2
INTERVAL=90
FAIL_COUNT=0

# 先休眠5分钟,等待服务器完成启动
sleep 300

while true; do
    if ! curl -f -X POST http://localhost:8085/api/health \
        -H 'Content-Type: application/json' -d '{}' > /dev/null 2>&1; then
        FAIL_COUNT=$((FAIL_COUNT+1))
        echo "[$(date)] Health check failed, count: $FAIL_COUNT" >> /var/log/healthcheck.log
        if [ "$FAIL_COUNT" -ge "$MAX_FAILS" ]; then
            echo "[$(date)] Max failures reached, exiting container..." >> /var/log/healthcheck.log
            kill -s TERM 1  # 发送信号给 init 进程,触发重启
            exit 1
        fi
    else
        FAIL_COUNT=0
    fi
    sleep $INTERVAL
done