#!/usr/bin/env bash # S8-SCHED-MULTI-INSTANCE-1 多实例 lease 抢锁回归(dev/test only, aidopdev)。 # # === 重要:本脚本当前为「轻量补证」,不能作为多实例 PASS 凭证 === # # 阻碍项: # 尝试在端口 5006 启动第二实例 B 时,SqlSugar.CodeFirstProvider.InitTables 抛出: # SqlSugar.SqlSugarException: Incorrect datetime value: '0000-00-00 00:00:00' # for column 'create_time' at row 1 # 当前 dev MySQL 8.0.31 严格 SQL_MODE 拒绝 zero-date;某张共享表(疑为 Sys* 类) # 存在历史 zero-date 行,CodeFirst ALTER 校验失败。5005 实例先于该 SQL_MODE 启动 # 故未触发;新实例无法 bootstrap。 # # 按 CTO 八条款:不改代码硬绕;本轮无法完成"两个后端实例同时运行"严格意义上的多 # 实例压测;脚本退化为对 lease 原语的纯 DB 行为验证(race UPDATE / expired reset # / token guard),可作补证不作多实例 PASS。 # # === 验证不变量(DB 层 lease 原语)=== # I1. 同时两路 UPDATE 抢同一 ready rule,恰好 1 路 affectedRows=1。 # I2. lock_until 过期后 ResetExpiredLeasesAsync 等价 SQL 释放,新 token 可抢到。 # I3. OnRuleCompletedAsync 等价 SQL(WHERE id + lock_token)旧 token 回写 # affectedRows=0,新 lease 状态不被覆盖。 # I4. 同一 dedup_key 不重复建单:复用 sched-exec-regression.sh 中 antiflap=3 的覆盖。 # # === 严格遵守 === # - 不修改 demo rule 10/11/12 的最终态。 # - TEMP rule 测后 enabled=0;TEMP exception(若产生)soft delete。 # - 不清理 detection_log / rule_detection_state。 # - 不在生产库执行写操作。 set -uo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=./s8-regression-common.sh source "${SCRIPT_DIR}/s8-regression-common.sh" TEMP_RULE='TEMP_SCHED_MULTI_LOCK' TEMP_DEDUP_OBJ='TEMP-MULTI-LOCK-001' baseline_before=$(read_baseline) echo "==== sched-multi-instance-regression baseline_before=${baseline_before} ====" echo "BLOCKER: instance B startup fails on SqlSugar zero-date schema check;" echo " this run is LIGHTWEIGHT-SIMULATION ONLY, not multi-instance PASS." echo # demo rule pre-snapshot(仅观测,不写) echo "---- demo rule 10/11/12 pre-snapshot ----" mysql_run "SELECT id, rule_code, enabled, IFNULL(paused_until,'NULL'), trigger_count_required, recover_count_required, IFNULL(last_status,'NULL') FROM ado_s8_watch_rule WHERE id IN (10,11,12) ORDER BY id;" \ | awk '{printf " rule %s: %s\n", $1, $0}' # --------------------------------------------------------------------------- # Setup TEMP rule (idempotent, ready-state, no lease) # --------------------------------------------------------------------------- echo echo "---- Setup TEMP_SCHED_MULTI_LOCK ----" TEMP_PARAMS='{"dueAtField":"due_at","statusField":"status","completedStates":["CLOSED","DONE","COMPLETED"],"objectCodeField":"related_object_code","objectIdField":"source_object_id","graceMinutes":0,"exceptionTypeCode":"DELIVERY_DELAY"}' TEMP_EXPR="SELECT '${TEMP_DEDUP_OBJ}' AS related_object_code, '${TEMP_DEDUP_OBJ}' AS source_object_id, DATE_SUB(NOW(), INTERVAL 1 HOUR) AS due_at, 'PENDING' AS status" existing=$(mysql_run "SELECT COUNT(*) FROM ado_s8_watch_rule WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};") if [[ "${existing}" == "0" ]]; then mysql_run_strict "INSERT INTO ado_s8_watch_rule (tenant_id, factory_id, rule_code, scene_code, data_source_id, watch_object_type, expression, severity, poll_interval_seconds, enabled, created_at, rule_type, source_object_type, params_json, consecutive_failure_count, trigger_count_required, recover_count_required, next_run_at, lock_token, locked_by, lock_until) VALUES (${TENANT_ID}, ${FACTORY_ID}, '${TEMP_RULE}', 'S7', 1, 'ORDER', \"${TEMP_EXPR}\", 'HIGH', 60, 1, NOW(), 'TIMEOUT', 'ORDER', '${TEMP_PARAMS}', 0, 1, 1, NOW(), NULL, NULL, NULL);" >/dev/null echo " inserted TEMP rule" else mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=1, expression=\"${TEMP_EXPR}\", params_json='${TEMP_PARAMS}', trigger_count_required=1, recover_count_required=1, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, consecutive_failure_count=0, last_status=NULL, last_error=NULL, next_run_at=NOW(), updated_at=NOW() WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null echo " re-armed TEMP rule" fi TEMP_RULE_ID=$(get_rule_id_by_code "${TEMP_RULE}") echo " TEMP_RULE_ID=${TEMP_RULE_ID}" # 清残留 detection_state 与 TEMP exception mysql_run_strict "DELETE FROM ado_s8_rule_detection_state WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;" >/dev/null # --------------------------------------------------------------------------- # I1. Lease uniqueness on race (两路并发 UPDATE 抢同一 lease) # --------------------------------------------------------------------------- echo echo "---- I1. Lease uniqueness on race ----" # 复刻 PickReadyRulesAsync 的乐观抢锁 SQL(.NET 端 SetColumns + Where),单条 rule 模式。 # 通过两个并发 mysql_run,各使用独立 token,统计 ROW_COUNT()。 # 重置成 ready 态。 mysql_run_strict "UPDATE ado_s8_watch_rule SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, next_run_at=NOW() WHERE id=${TEMP_RULE_ID};" >/dev/null TOKEN_A="SIM_A_$(date +%s%N)" TOKEN_B="SIM_B_$(date +%s%N)" race_update() { local token="$1" local locked_by="$2" # 用复合语句一次性 UPDATE + 输出 ROW_COUNT()。 MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \ --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e " UPDATE ado_s8_watch_rule SET lock_token='${token}', locked_by='${locked_by}', lock_until=DATE_ADD(NOW(), INTERVAL 5 MINUTE), running_started_at=NOW(), last_run_id='${token:0:16}', updated_at=NOW() WHERE id=${TEMP_RULE_ID} AND enabled=1 AND (paused_until IS NULL OR paused_until <= NOW()) AND (next_run_at IS NULL OR next_run_at <= NOW()) AND (lock_until IS NULL OR lock_until <= NOW()); SELECT ROW_COUNT() AS affected; " 2>/dev/null | tail -1 } # 并发执行 A_RESULT=$(race_update "${TOKEN_A}" "SIM_A_PID_A") & A_PID=$! B_RESULT=$(race_update "${TOKEN_B}" "SIM_B_PID_B") & B_PID=$! # 这里 bash 不直接捕获子 shell stdout 到父变量,改用临时文件 unset A_RESULT B_RESULT TMP_A=$(mktemp); TMP_B=$(mktemp) ( race_update "${TOKEN_A}" "SIM_A_PID_A" > "${TMP_A}" ) & PID_A=$! ( race_update "${TOKEN_B}" "SIM_B_PID_B" > "${TMP_B}" ) & PID_B=$! wait ${PID_A} ${PID_B} ROWS_A=$(cat "${TMP_A}") ROWS_B=$(cat "${TMP_B}") rm -f "${TMP_A}" "${TMP_B}" echo " affected_rows: A=${ROWS_A} B=${ROWS_B}" # 校验:恰好一个为 1,另一个为 0 if [[ "${ROWS_A}" == "1" && "${ROWS_B}" == "0" ]]; then WINNER="A"; WINNER_TOKEN="${TOKEN_A}" elif [[ "${ROWS_A}" == "0" && "${ROWS_B}" == "1" ]]; then WINNER="B"; WINNER_TOKEN="${TOKEN_B}" else WINNER="" fi if [[ -n "${WINNER}" ]]; then record_pass "I1: lease race produced exactly one winner (${WINNER})" else record_fail "I1: lease race anomaly — both rows affected (A=${ROWS_A} B=${ROWS_B}); lease primitive broken" fi actual_token=$(get_rule_field "${TEMP_RULE_ID}" lock_token) actual_lockedby=$(get_rule_field "${TEMP_RULE_ID}" locked_by) echo " DB recorded: lock_token=${actual_token} locked_by=${actual_lockedby}" if [[ "${actual_token}" == "${WINNER_TOKEN}" ]]; then record_pass "I1: DB lock_token matches winner token (${WINNER})" else record_fail "I1: DB lock_token=${actual_token} does NOT match winner token=${WINNER_TOKEN}" fi # --------------------------------------------------------------------------- # I3. OnRuleCompletedAsync 旧 token 防覆盖 # 进度安排:先做 I3(依赖 I1 已抢到的 lease),再做 I2(强制过期 reset)。 # --------------------------------------------------------------------------- echo echo "---- I3. OnRuleCompletedAsync token guard (old token must not overwrite) ----" # 假设 I1 winner 持有 WINNER_TOKEN。模拟"旧进程"试图用一个 stale 假 token 完成本次 run。 OLD_FAKE_TOKEN="STALE_TOKEN_THAT_NEVER_WON" guard_rows=$(MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \ --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e " UPDATE ado_s8_watch_rule SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, last_status='SUCCESS', last_run_at=NOW(), next_run_at=DATE_ADD(NOW(), INTERVAL 60 SECOND), updated_at=NOW() WHERE id=${TEMP_RULE_ID} AND lock_token='${OLD_FAKE_TOKEN}'; SELECT ROW_COUNT(); " 2>/dev/null | tail -1) echo " stale-token completion attempt affected_rows=${guard_rows}" if [[ "${guard_rows}" == "0" ]]; then record_pass "I3: old token completion did not overwrite (affected=0)" else record_fail "I3: old token completion overwrote new lease (affected=${guard_rows})" fi # 校验 lock_token / lock_until 仍是 winner 的 post_token=$(get_rule_field "${TEMP_RULE_ID}" lock_token) post_lockuntil=$(get_rule_field "${TEMP_RULE_ID}" lock_until) echo " post-guard: lock_token=${post_token} lock_until=${post_lockuntil}" if [[ "${post_token}" == "${WINNER_TOKEN}" ]]; then record_pass "I3: lock_token still belongs to winner" else record_fail "I3: lock_token drifted: ${post_token} != ${WINNER_TOKEN}" fi # --------------------------------------------------------------------------- # I2. Expired lease release & re-acquisition # --------------------------------------------------------------------------- echo echo "---- I2. Expired lease release & re-acquisition ----" # 把 lock_until 强制写为过去时间,模拟 lease 超时(例如旧进程 crash 未释放)。 mysql_run_strict "UPDATE ado_s8_watch_rule SET lock_until=DATE_SUB(NOW(), INTERVAL 1 MINUTE), next_run_at=NOW() WHERE id=${TEMP_RULE_ID};" >/dev/null # 复刻 ResetExpiredLeasesAsync 的 SQL: released=$(MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \ --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e " UPDATE ado_s8_watch_rule SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, updated_at=NOW() WHERE tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID} AND lock_until IS NOT NULL AND lock_until < NOW() AND id=${TEMP_RULE_ID}; SELECT ROW_COUNT(); " 2>/dev/null | tail -1) echo " ResetExpiredLeases-equivalent affected=${released}" if [[ "${released}" == "1" ]]; then record_pass "I2: expired lease released (affected=1)" else record_fail "I2: expired lease not released (affected=${released})" fi reset_token=$(get_rule_field "${TEMP_RULE_ID}" lock_token) [[ "${reset_token}" == "NULL" ]] && record_pass "I2: lock_token is NULL after reset" || record_fail "I2: lock_token not cleared (${reset_token})" # 再次抢锁(模拟新实例) NEW_TOKEN="SIM_NEW_INSTANCE_$(date +%s%N)" new_rows=$(MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \ --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e " UPDATE ado_s8_watch_rule SET lock_token='${NEW_TOKEN}', locked_by='SIM_NEW_PID', lock_until=DATE_ADD(NOW(), INTERVAL 5 MINUTE), running_started_at=NOW(), updated_at=NOW() WHERE id=${TEMP_RULE_ID} AND enabled=1 AND (lock_until IS NULL OR lock_until <= NOW()); SELECT ROW_COUNT(); " 2>/dev/null | tail -1) echo " new-instance re-acquire affected=${new_rows}" if [[ "${new_rows}" == "1" ]]; then record_pass "I2: new instance re-acquired lease post-reset" else record_fail "I2: new instance failed to re-acquire (affected=${new_rows})" fi new_token_db=$(get_rule_field "${TEMP_RULE_ID}" lock_token) [[ "${new_token_db}" == "${NEW_TOKEN}" ]] && record_pass "I2: new lock_token recorded" || record_fail "I2: lock_token mismatch (${new_token_db})" # --------------------------------------------------------------------------- # Cleanup # --------------------------------------------------------------------------- echo echo "---- Cleanup ----" mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=0, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, updated_at=NOW() WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null TEMP_EXC_IDS=$(mysql_run "SELECT IFNULL(GROUP_CONCAT(id),'none') FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;") mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;" >/dev/null echo " TEMP rule_id=${TEMP_RULE_ID} disabled; soft-deleted exception ids=[${TEMP_EXC_IDS}]" final_temp_enabled=$(get_rule_field "${TEMP_RULE_ID}" enabled) final_temp_visible=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;") [[ "${final_temp_enabled}" == "0" ]] && record_pass "cleanup: TEMP rule enabled=0" || record_fail "cleanup: TEMP rule still enabled=${final_temp_enabled}" [[ "${final_temp_visible}" == "0" ]] && record_pass "cleanup: TEMP exception not in default list" || record_fail "cleanup: TEMP exception still in default list (${final_temp_visible})" # demo rule 10/11/12 守恒 demo_state=$(mysql_run "SELECT GROUP_CONCAT(CONCAT(id,':',enabled,':',IFNULL(paused_until,'NULL'),':',trigger_count_required,':',recover_count_required) ORDER BY id) FROM ado_s8_watch_rule WHERE id IN (10,11,12);") expected="10:1:NULL:1:1,11:1:NULL:1:1,12:1:NULL:1:1" [[ "${demo_state}" == "${expected}" ]] \ && record_pass "demo rule 10/11/12 conserved" \ || record_fail "demo rule drifted: ${demo_state}" assert_baseline_unchanged "${baseline_before}" echo print_summary echo echo "==== STATUS: LIGHTWEIGHT-SIMULATION ${S8REG_FAIL_COUNT} FAIL — multi-instance PASS BLOCKED on instance B SqlSugar startup ====" echo "TEMP_RULE=${TEMP_RULE} TEMP_RULE_ID=${TEMP_RULE_ID} TEMP_EXC_IDS=[${TEMP_EXC_IDS}]" exit_by_summary