| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- #!/usr/bin/env bash
- # S8-SCHED-MULTI-INSTANCE-1 多实例 lease 抢锁回归(dev/test only, aidopdev)。
- #
- # === 重要:本脚本当前为「轻量补证」,不能作为多实例 PASS 凭证 ===
- #
- # 阻碍项:
- # 尝试在端口 5006 启动第二实例 B 时,SqlSugar.CodeFirstProvider.InitTables 抛出:
- # SqlSugar.SqlSugarException: Incorrect datetime value: '0000-00-00 00:00:00'
- # for column 'create_time' at row 1
- # 当前 dev MySQL 8.0.31 严格 SQL_MODE 拒绝 zero-date;某张共享表(疑为 Sys* 类)
- # 存在历史 zero-date 行,CodeFirst ALTER 校验失败。5005 实例先于该 SQL_MODE 启动
- # 故未触发;新实例无法 bootstrap。
- #
- # 按 CTO 八条款:不改代码硬绕;本轮无法完成"两个后端实例同时运行"严格意义上的多
- # 实例压测;脚本退化为对 lease 原语的纯 DB 行为验证(race UPDATE / expired reset
- # / token guard),可作补证不作多实例 PASS。
- #
- # === 验证不变量(DB 层 lease 原语)===
- # I1. 同时两路 UPDATE 抢同一 ready rule,恰好 1 路 affectedRows=1。
- # I2. lock_until 过期后 ResetExpiredLeasesAsync 等价 SQL 释放,新 token 可抢到。
- # I3. OnRuleCompletedAsync 等价 SQL(WHERE id + lock_token)旧 token 回写
- # affectedRows=0,新 lease 状态不被覆盖。
- # I4. 同一 dedup_key 不重复建单:复用 sched-exec-regression.sh 中 antiflap=3 的覆盖。
- #
- # === 严格遵守 ===
- # - 不修改 demo rule 10/11/12 的最终态。
- # - TEMP rule 测后 enabled=0;TEMP exception(若产生)soft delete。
- # - 不清理 detection_log / rule_detection_state。
- # - 不在生产库执行写操作。
- set -uo pipefail
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- # shellcheck source=./s8-regression-common.sh
- source "${SCRIPT_DIR}/s8-regression-common.sh"
- TEMP_RULE='TEMP_SCHED_MULTI_LOCK'
- TEMP_DEDUP_OBJ='TEMP-MULTI-LOCK-001'
- baseline_before=$(read_baseline)
- echo "==== sched-multi-instance-regression baseline_before=${baseline_before} ===="
- echo "BLOCKER: instance B startup fails on SqlSugar zero-date schema check;"
- echo " this run is LIGHTWEIGHT-SIMULATION ONLY, not multi-instance PASS."
- echo
- # demo rule pre-snapshot(仅观测,不写)
- echo "---- demo rule 10/11/12 pre-snapshot ----"
- mysql_run "SELECT id, rule_code, enabled, IFNULL(paused_until,'NULL'), trigger_count_required, recover_count_required, IFNULL(last_status,'NULL') FROM ado_s8_watch_rule WHERE id IN (10,11,12) ORDER BY id;" \
- | awk '{printf " rule %s: %s\n", $1, $0}'
- # ---------------------------------------------------------------------------
- # Setup TEMP rule (idempotent, ready-state, no lease)
- # ---------------------------------------------------------------------------
- echo
- echo "---- Setup TEMP_SCHED_MULTI_LOCK ----"
- TEMP_PARAMS='{"dueAtField":"due_at","statusField":"status","completedStates":["CLOSED","DONE","COMPLETED"],"objectCodeField":"related_object_code","objectIdField":"source_object_id","graceMinutes":0,"exceptionTypeCode":"DELIVERY_DELAY"}'
- TEMP_EXPR="SELECT '${TEMP_DEDUP_OBJ}' AS related_object_code, '${TEMP_DEDUP_OBJ}' AS source_object_id, DATE_SUB(NOW(), INTERVAL 1 HOUR) AS due_at, 'PENDING' AS status"
- existing=$(mysql_run "SELECT COUNT(*) FROM ado_s8_watch_rule WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};")
- if [[ "${existing}" == "0" ]]; then
- mysql_run_strict "INSERT INTO ado_s8_watch_rule
- (tenant_id, factory_id, rule_code, scene_code, data_source_id, watch_object_type, expression, severity,
- poll_interval_seconds, enabled, created_at, rule_type, source_object_type, params_json,
- consecutive_failure_count, trigger_count_required, recover_count_required, next_run_at, lock_token, locked_by, lock_until)
- VALUES (${TENANT_ID}, ${FACTORY_ID}, '${TEMP_RULE}', 'S7', 1, 'ORDER',
- \"${TEMP_EXPR}\", 'HIGH', 60, 1, NOW(), 'TIMEOUT', 'ORDER', '${TEMP_PARAMS}',
- 0, 1, 1, NOW(), NULL, NULL, NULL);" >/dev/null
- echo " inserted TEMP rule"
- else
- mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=1, expression=\"${TEMP_EXPR}\", params_json='${TEMP_PARAMS}', trigger_count_required=1, recover_count_required=1, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, consecutive_failure_count=0, last_status=NULL, last_error=NULL, next_run_at=NOW(), updated_at=NOW() WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
- echo " re-armed TEMP rule"
- fi
- TEMP_RULE_ID=$(get_rule_id_by_code "${TEMP_RULE}")
- echo " TEMP_RULE_ID=${TEMP_RULE_ID}"
- # 清残留 detection_state 与 TEMP exception
- mysql_run_strict "DELETE FROM ado_s8_rule_detection_state WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
- mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;" >/dev/null
- # ---------------------------------------------------------------------------
- # I1. Lease uniqueness on race (两路并发 UPDATE 抢同一 lease)
- # ---------------------------------------------------------------------------
- echo
- echo "---- I1. Lease uniqueness on race ----"
- # 复刻 PickReadyRulesAsync 的乐观抢锁 SQL(.NET 端 SetColumns + Where),单条 rule 模式。
- # 通过两个并发 mysql_run,各使用独立 token,统计 ROW_COUNT()。
- # 重置成 ready 态。
- mysql_run_strict "UPDATE ado_s8_watch_rule SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, next_run_at=NOW() WHERE id=${TEMP_RULE_ID};" >/dev/null
- TOKEN_A="SIM_A_$(date +%s%N)"
- TOKEN_B="SIM_B_$(date +%s%N)"
- race_update() {
- local token="$1"
- local locked_by="$2"
- # 用复合语句一次性 UPDATE + 输出 ROW_COUNT()。
- MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \
- --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e "
- UPDATE ado_s8_watch_rule
- SET lock_token='${token}',
- locked_by='${locked_by}',
- lock_until=DATE_ADD(NOW(), INTERVAL 5 MINUTE),
- running_started_at=NOW(),
- last_run_id='${token:0:16}',
- updated_at=NOW()
- WHERE id=${TEMP_RULE_ID}
- AND enabled=1
- AND (paused_until IS NULL OR paused_until <= NOW())
- AND (next_run_at IS NULL OR next_run_at <= NOW())
- AND (lock_until IS NULL OR lock_until <= NOW());
- SELECT ROW_COUNT() AS affected;
- " 2>/dev/null | tail -1
- }
- # 并发执行
- A_RESULT=$(race_update "${TOKEN_A}" "SIM_A_PID_A") &
- A_PID=$!
- B_RESULT=$(race_update "${TOKEN_B}" "SIM_B_PID_B") &
- B_PID=$!
- # 这里 bash 不直接捕获子 shell stdout 到父变量,改用临时文件
- unset A_RESULT B_RESULT
- TMP_A=$(mktemp); TMP_B=$(mktemp)
- ( race_update "${TOKEN_A}" "SIM_A_PID_A" > "${TMP_A}" ) &
- PID_A=$!
- ( race_update "${TOKEN_B}" "SIM_B_PID_B" > "${TMP_B}" ) &
- PID_B=$!
- wait ${PID_A} ${PID_B}
- ROWS_A=$(cat "${TMP_A}")
- ROWS_B=$(cat "${TMP_B}")
- rm -f "${TMP_A}" "${TMP_B}"
- echo " affected_rows: A=${ROWS_A} B=${ROWS_B}"
- # 校验:恰好一个为 1,另一个为 0
- if [[ "${ROWS_A}" == "1" && "${ROWS_B}" == "0" ]]; then
- WINNER="A"; WINNER_TOKEN="${TOKEN_A}"
- elif [[ "${ROWS_A}" == "0" && "${ROWS_B}" == "1" ]]; then
- WINNER="B"; WINNER_TOKEN="${TOKEN_B}"
- else
- WINNER=""
- fi
- if [[ -n "${WINNER}" ]]; then
- record_pass "I1: lease race produced exactly one winner (${WINNER})"
- else
- record_fail "I1: lease race anomaly — both rows affected (A=${ROWS_A} B=${ROWS_B}); lease primitive broken"
- fi
- actual_token=$(get_rule_field "${TEMP_RULE_ID}" lock_token)
- actual_lockedby=$(get_rule_field "${TEMP_RULE_ID}" locked_by)
- echo " DB recorded: lock_token=${actual_token} locked_by=${actual_lockedby}"
- if [[ "${actual_token}" == "${WINNER_TOKEN}" ]]; then
- record_pass "I1: DB lock_token matches winner token (${WINNER})"
- else
- record_fail "I1: DB lock_token=${actual_token} does NOT match winner token=${WINNER_TOKEN}"
- fi
- # ---------------------------------------------------------------------------
- # I3. OnRuleCompletedAsync 旧 token 防覆盖
- # 进度安排:先做 I3(依赖 I1 已抢到的 lease),再做 I2(强制过期 reset)。
- # ---------------------------------------------------------------------------
- echo
- echo "---- I3. OnRuleCompletedAsync token guard (old token must not overwrite) ----"
- # 假设 I1 winner 持有 WINNER_TOKEN。模拟"旧进程"试图用一个 stale 假 token 完成本次 run。
- OLD_FAKE_TOKEN="STALE_TOKEN_THAT_NEVER_WON"
- guard_rows=$(MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \
- --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e "
- UPDATE ado_s8_watch_rule
- SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL,
- last_status='SUCCESS', last_run_at=NOW(), next_run_at=DATE_ADD(NOW(), INTERVAL 60 SECOND),
- updated_at=NOW()
- WHERE id=${TEMP_RULE_ID} AND lock_token='${OLD_FAKE_TOKEN}';
- SELECT ROW_COUNT();
- " 2>/dev/null | tail -1)
- echo " stale-token completion attempt affected_rows=${guard_rows}"
- if [[ "${guard_rows}" == "0" ]]; then
- record_pass "I3: old token completion did not overwrite (affected=0)"
- else
- record_fail "I3: old token completion overwrote new lease (affected=${guard_rows})"
- fi
- # 校验 lock_token / lock_until 仍是 winner 的
- post_token=$(get_rule_field "${TEMP_RULE_ID}" lock_token)
- post_lockuntil=$(get_rule_field "${TEMP_RULE_ID}" lock_until)
- echo " post-guard: lock_token=${post_token} lock_until=${post_lockuntil}"
- if [[ "${post_token}" == "${WINNER_TOKEN}" ]]; then
- record_pass "I3: lock_token still belongs to winner"
- else
- record_fail "I3: lock_token drifted: ${post_token} != ${WINNER_TOKEN}"
- fi
- # ---------------------------------------------------------------------------
- # I2. Expired lease release & re-acquisition
- # ---------------------------------------------------------------------------
- echo
- echo "---- I2. Expired lease release & re-acquisition ----"
- # 把 lock_until 强制写为过去时间,模拟 lease 超时(例如旧进程 crash 未释放)。
- mysql_run_strict "UPDATE ado_s8_watch_rule SET lock_until=DATE_SUB(NOW(), INTERVAL 1 MINUTE), next_run_at=NOW() WHERE id=${TEMP_RULE_ID};" >/dev/null
- # 复刻 ResetExpiredLeasesAsync 的 SQL:
- released=$(MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \
- --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e "
- UPDATE ado_s8_watch_rule
- SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, updated_at=NOW()
- WHERE tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID}
- AND lock_until IS NOT NULL AND lock_until < NOW()
- AND id=${TEMP_RULE_ID};
- SELECT ROW_COUNT();
- " 2>/dev/null | tail -1)
- echo " ResetExpiredLeases-equivalent affected=${released}"
- if [[ "${released}" == "1" ]]; then
- record_pass "I2: expired lease released (affected=1)"
- else
- record_fail "I2: expired lease not released (affected=${released})"
- fi
- reset_token=$(get_rule_field "${TEMP_RULE_ID}" lock_token)
- [[ "${reset_token}" == "NULL" ]] && record_pass "I2: lock_token is NULL after reset" || record_fail "I2: lock_token not cleared (${reset_token})"
- # 再次抢锁(模拟新实例)
- NEW_TOKEN="SIM_NEW_INSTANCE_$(date +%s%N)"
- new_rows=$(MYSQL_PWD="${DB_PASS}" mysql -h "${DB_HOST}" -P "${DB_PORT}" -u "${DB_USER}" "${DB_NAME}" \
- --default-character-set=utf8mb4 --connect-timeout=8 -N -B -e "
- UPDATE ado_s8_watch_rule
- SET lock_token='${NEW_TOKEN}', locked_by='SIM_NEW_PID', lock_until=DATE_ADD(NOW(), INTERVAL 5 MINUTE),
- running_started_at=NOW(), updated_at=NOW()
- WHERE id=${TEMP_RULE_ID}
- AND enabled=1
- AND (lock_until IS NULL OR lock_until <= NOW());
- SELECT ROW_COUNT();
- " 2>/dev/null | tail -1)
- echo " new-instance re-acquire affected=${new_rows}"
- if [[ "${new_rows}" == "1" ]]; then
- record_pass "I2: new instance re-acquired lease post-reset"
- else
- record_fail "I2: new instance failed to re-acquire (affected=${new_rows})"
- fi
- new_token_db=$(get_rule_field "${TEMP_RULE_ID}" lock_token)
- [[ "${new_token_db}" == "${NEW_TOKEN}" ]] && record_pass "I2: new lock_token recorded" || record_fail "I2: lock_token mismatch (${new_token_db})"
- # ---------------------------------------------------------------------------
- # Cleanup
- # ---------------------------------------------------------------------------
- echo
- echo "---- Cleanup ----"
- mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=0, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, updated_at=NOW() WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
- TEMP_EXC_IDS=$(mysql_run "SELECT IFNULL(GROUP_CONCAT(id),'none') FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;")
- mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;" >/dev/null
- echo " TEMP rule_id=${TEMP_RULE_ID} disabled; soft-deleted exception ids=[${TEMP_EXC_IDS}]"
- final_temp_enabled=$(get_rule_field "${TEMP_RULE_ID}" enabled)
- final_temp_visible=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;")
- [[ "${final_temp_enabled}" == "0" ]] && record_pass "cleanup: TEMP rule enabled=0" || record_fail "cleanup: TEMP rule still enabled=${final_temp_enabled}"
- [[ "${final_temp_visible}" == "0" ]] && record_pass "cleanup: TEMP exception not in default list" || record_fail "cleanup: TEMP exception still in default list (${final_temp_visible})"
- # demo rule 10/11/12 守恒
- demo_state=$(mysql_run "SELECT GROUP_CONCAT(CONCAT(id,':',enabled,':',IFNULL(paused_until,'NULL'),':',trigger_count_required,':',recover_count_required) ORDER BY id) FROM ado_s8_watch_rule WHERE id IN (10,11,12);")
- expected="10:1:NULL:1:1,11:1:NULL:1:1,12:1:NULL:1:1"
- [[ "${demo_state}" == "${expected}" ]] \
- && record_pass "demo rule 10/11/12 conserved" \
- || record_fail "demo rule drifted: ${demo_state}"
- assert_baseline_unchanged "${baseline_before}"
- echo
- print_summary
- echo
- echo "==== STATUS: LIGHTWEIGHT-SIMULATION ${S8REG_FAIL_COUNT} FAIL — multi-instance PASS BLOCKED on instance B SqlSugar startup ===="
- echo "TEMP_RULE=${TEMP_RULE} TEMP_RULE_ID=${TEMP_RULE_ID} TEMP_EXC_IDS=[${TEMP_EXC_IDS}]"
- exit_by_summary
|