sched-exec-regression.sh 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. #!/usr/bin/env bash
  2. # S8-REGRESSION-FIXTURE-1 调度执行专用回归脚本(dev/test only, aidopdev)。
  3. #
  4. # 覆盖 4 类 case:
  5. # A. runtime schema sanity — watch_rule 调度运行时列 + rule_detection_state 表/索引存在
  6. # B. scheduler execution sanity — demo rule 10 (DEMO_ORDER_DELIVERY_TIMEOUT) next_run_at=NOW
  7. # → 等待 Job tick → last_run_at/next_run_at 推进 / lock 三件套释放
  8. # C. antiflap trigger=3 — TEMP_SCHED_TIMEOUT_ANTIFLAP 三次 run-once → hit_count 1/2/3
  9. # → 第三次 CREATED;TEMP rule disabled、TEMP exception soft delete
  10. # D. detection log edge — CREATED(C 已覆盖)/ REFRESHED(第四次 run-once)
  11. # / RECOVERED(高风险,SKIP,由专项脚本覆盖)
  12. #
  13. # 退出码:FAIL=0 → 0;FAIL>0 → 1;致命错误 → 2。
  14. set -uo pipefail
  15. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  16. # shellcheck source=./s8-regression-common.sh
  17. source "${SCRIPT_DIR}/s8-regression-common.sh"
  18. auth_load
  19. DEMO_TIMEOUT_RULE='DEMO_ORDER_DELIVERY_TIMEOUT'
  20. DEMO_TIMEOUT_RULE_ID=10
  21. TEMP_RULE='TEMP_SCHED_TIMEOUT_ANTIFLAP'
  22. TEMP_DEDUP_OBJ='TEMP-ANTIFLAP-001'
  23. baseline_before=$(read_baseline)
  24. DEMO_SNAPSHOT=$(snapshot_demo_rule_state)
  25. echo "==== sched-exec-regression baseline_before=${baseline_before} demo_snapshot=${DEMO_SNAPSHOT} ===="
  26. # ---------------------------------------------------------------------------
  27. # A. runtime schema sanity
  28. # ---------------------------------------------------------------------------
  29. echo "---- A. runtime schema sanity ----"
  30. required_cols=(next_run_at last_run_at last_status last_error last_duration_ms last_run_id \
  31. lock_token locked_by lock_until running_started_at consecutive_failure_count \
  32. paused_until pause_reason trigger_count_required recover_count_required)
  33. for col in "${required_cols[@]}"; do
  34. cnt=$(mysql_run "SELECT COUNT(*) FROM information_schema.COLUMNS WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME='ado_s8_watch_rule' AND COLUMN_NAME='${col}';")
  35. if [[ "${cnt}" == "1" ]]; then
  36. record_pass "watch_rule.${col} exists"
  37. else
  38. record_fail "watch_rule.${col} missing"
  39. fi
  40. done
  41. state_table=$(mysql_run "SELECT COUNT(*) FROM information_schema.TABLES WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME='ado_s8_rule_detection_state';")
  42. if [[ "${state_table}" == "1" ]]; then
  43. record_pass "ado_s8_rule_detection_state table exists"
  44. else
  45. record_fail "ado_s8_rule_detection_state table missing"
  46. fi
  47. state_idx=$(mysql_run "SELECT COUNT(*) FROM information_schema.STATISTICS WHERE TABLE_SCHEMA='${DB_NAME}' AND TABLE_NAME='ado_s8_rule_detection_state' AND INDEX_NAME='uk_s8_rule_detection_state_dedup';")
  48. if [[ "${state_idx}" -ge 1 ]]; then
  49. record_pass "uk_s8_rule_detection_state_dedup exists"
  50. else
  51. record_fail "uk_s8_rule_detection_state_dedup missing"
  52. fi
  53. # ---------------------------------------------------------------------------
  54. # B. scheduler execution sanity(demo rule 10)
  55. # 策略:将 next_run_at 置为 NOW;轮询 last_run_at 是否推进,最多等 90s(Job 间隔 60s)。
  56. # 注意:仅刷新 last_run_at/next_run_at/lock 等运行态字段,不改 enabled/paused/trigger/recover/params/expression。
  57. # ---------------------------------------------------------------------------
  58. echo "---- B. scheduler execution sanity ----"
  59. if ! require_demo_rule "${DEMO_TIMEOUT_RULE}"; then
  60. record_skip "B: ${DEMO_TIMEOUT_RULE} not found or disabled — cannot validate scheduler execution"
  61. else
  62. pre_last_run_at=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} last_run_at)
  63. echo "B pre-state: last_run_at=${pre_last_run_at}"
  64. mysql_run_strict "UPDATE ado_s8_watch_rule SET next_run_at=NOW(), lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL WHERE id=${DEMO_TIMEOUT_RULE_ID};" >/dev/null
  65. echo "B nudged: next_run_at=NOW(); waiting Job tick (poll up to 90s)…"
  66. picked_up=0
  67. for ((i=1; i<=18; i++)); do
  68. sleep 5
  69. cur_last_run_at=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} last_run_at)
  70. if [[ "${cur_last_run_at}" != "${pre_last_run_at}" && "${cur_last_run_at}" != "NULL" ]]; then
  71. picked_up=1
  72. break
  73. fi
  74. done
  75. if (( picked_up == 1 )); then
  76. record_pass "B: rule ${DEMO_TIMEOUT_RULE_ID} picked up by Job (last_run_at advanced ${pre_last_run_at} -> ${cur_last_run_at})"
  77. last_status=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} last_status)
  78. last_run_id=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} last_run_id)
  79. last_duration_ms=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} last_duration_ms)
  80. next_run_at=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} next_run_at)
  81. lock_token=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} lock_token)
  82. locked_by=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} locked_by)
  83. lock_until=$(get_rule_field ${DEMO_TIMEOUT_RULE_ID} lock_until)
  84. echo "B post-state: last_status=${last_status} last_run_id=${last_run_id} last_duration_ms=${last_duration_ms} next_run_at=${next_run_at} lock_token=${lock_token} locked_by=${locked_by} lock_until=${lock_until}"
  85. if [[ "${last_status}" == "SUCCESS" ]]; then
  86. record_pass "B: last_status=SUCCESS"
  87. else
  88. record_fail "B: last_status expected SUCCESS, got ${last_status}"
  89. fi
  90. [[ "${last_run_id}" != "NULL" && -n "${last_run_id}" ]] && record_pass "B: last_run_id populated (${last_run_id})" || record_fail "B: last_run_id missing"
  91. [[ "${last_duration_ms}" != "NULL" && "${last_duration_ms}" != "0" ]] && record_pass "B: last_duration_ms=${last_duration_ms}" || record_fail "B: last_duration_ms missing/zero"
  92. [[ "${next_run_at}" != "NULL" ]] && record_pass "B: next_run_at advanced (${next_run_at})" || record_fail "B: next_run_at NULL"
  93. if [[ "${lock_token}" == "NULL" && "${locked_by}" == "NULL" && "${lock_until}" == "NULL" ]]; then
  94. record_pass "B: lock 三件套已释放"
  95. else
  96. record_fail "B: lock not released (lock_token=${lock_token} locked_by=${locked_by} lock_until=${lock_until})"
  97. fi
  98. else
  99. record_skip "B: Job did not pick up rule within 90s (poll timeout) — possibly Job paused or background load; not failing"
  100. fi
  101. fi
  102. # ---------------------------------------------------------------------------
  103. # C. antiflap trigger=3 (TEMP fixture)
  104. # ---------------------------------------------------------------------------
  105. echo "---- C. antiflap trigger=3 (TEMP fixture) ----"
  106. TEMP_PARAMS='{"dueAtField":"due_at","statusField":"status","completedStates":["CLOSED","DONE","COMPLETED"],"objectCodeField":"related_object_code","objectIdField":"source_object_id","graceMinutes":0,"exceptionTypeCode":"DELIVERY_DELAY"}'
  107. TEMP_EXPR="SELECT '${TEMP_DEDUP_OBJ}' AS related_object_code, '${TEMP_DEDUP_OBJ}' AS source_object_id, DATE_SUB(NOW(), INTERVAL 1 HOUR) AS due_at, 'PENDING' AS status"
  108. # 幂等 upsert,并清空 detection_state 残留 hit_count 以保证从 0 起。
  109. existing=$(mysql_run "SELECT COUNT(*) FROM ado_s8_watch_rule WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};")
  110. if [[ "${existing}" == "0" ]]; then
  111. mysql_run_strict "INSERT INTO ado_s8_watch_rule
  112. (tenant_id, factory_id, rule_code, scene_code, data_source_id, watch_object_type, expression, severity,
  113. poll_interval_seconds, enabled, created_at, rule_type, source_object_type, params_json,
  114. consecutive_failure_count, trigger_count_required, recover_count_required, next_run_at)
  115. VALUES (${TENANT_ID}, ${FACTORY_ID}, '${TEMP_RULE}', 'S7', 1, 'ORDER',
  116. \"${TEMP_EXPR}\", 'HIGH', 60, 1, NOW(), 'TIMEOUT', 'ORDER', '${TEMP_PARAMS}',
  117. 0, 3, 1, NULL);" >/dev/null
  118. echo "C: inserted TEMP rule ${TEMP_RULE}"
  119. else
  120. mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=1, expression=\"${TEMP_EXPR}\", params_json='${TEMP_PARAMS}', trigger_count_required=3, recover_count_required=1, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, consecutive_failure_count=0, last_status=NULL, last_error=NULL, updated_at=NOW() WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
  121. echo "C: re-armed TEMP rule ${TEMP_RULE}"
  122. fi
  123. TEMP_RULE_ID=$(get_rule_id_by_code "${TEMP_RULE}")
  124. echo "C: TEMP_RULE_ID=${TEMP_RULE_ID}"
  125. # 清理可能残留的 detection_state(防止此前测试遗留 hit_count > 0)
  126. mysql_run_strict "DELETE FROM ado_s8_rule_detection_state WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
  127. # 清理可能残留的 TEMP exception(同 dedup_key 旧记录),避免被识别为已存在。Soft delete only.
  128. mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;" >/dev/null
  129. marker_c=$(mysql_run "SELECT NOW();")
  130. sleep 1
  131. # tick 1 / 2 / 3
  132. for tick in 1 2 3; do
  133. resp=$(run_once_endpoint)
  134. if [[ -z "${resp}" ]]; then
  135. record_fail "C: tick${tick} run-once returned empty response"
  136. break
  137. fi
  138. hit_count=$(mysql_run "SELECT IFNULL(consecutive_hit_count,0) FROM ado_s8_rule_detection_state WHERE rule_code='${TEMP_RULE}' AND dedup_key='T${TENANT_ID}:F${FACTORY_ID}:R${TEMP_RULE}:ORDER:${TEMP_DEDUP_OBJ}';")
  139. active_excs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0 AND status<>'CLOSED';")
  140. echo "C tick${tick}: hit_count=${hit_count} active_excs=${active_excs}"
  141. case "${tick}" in
  142. 1)
  143. if [[ "${hit_count}" == "1" && "${active_excs}" == "0" ]]; then
  144. record_pass "C tick1 pending: hit_count=1, no exception"
  145. else
  146. record_fail "C tick1 expected hit_count=1/active=0, got hit_count=${hit_count}/active=${active_excs}"
  147. fi
  148. ;;
  149. 2)
  150. if [[ "${hit_count}" == "2" && "${active_excs}" == "0" ]]; then
  151. record_pass "C tick2 pending: hit_count=2, no exception"
  152. else
  153. record_fail "C tick2 expected hit_count=2/active=0, got hit_count=${hit_count}/active=${active_excs}"
  154. fi
  155. ;;
  156. 3)
  157. if [[ "${hit_count}" -ge 3 && "${active_excs}" == "1" ]]; then
  158. record_pass "C tick3 fired: hit_count=${hit_count}, exception created"
  159. else
  160. record_fail "C tick3 expected hit_count>=3/active=1, got hit_count=${hit_count}/active=${active_excs}"
  161. fi
  162. ;;
  163. esac
  164. sleep 1
  165. done
  166. # CREATED detection_log
  167. created_cnt=$(mysql_run "SELECT COUNT(*) FROM ado_s8_detection_log WHERE rule_code='${TEMP_RULE}' AND detect_result='CREATED' AND detected_at >= '${marker_c}';")
  168. [[ "${created_cnt}" -ge 1 ]] && record_pass "D: CREATED detection_log present (${created_cnt})" || record_fail "D: CREATED detection_log missing"
  169. # state.active_exception_id 写入校验
  170. state_active_excid=$(mysql_run "SELECT IFNULL(active_exception_id,0) FROM ado_s8_rule_detection_state WHERE rule_code='${TEMP_RULE}' AND dedup_key='T${TENANT_ID}:F${FACTORY_ID}:R${TEMP_RULE}:ORDER:${TEMP_DEDUP_OBJ}';")
  171. [[ "${state_active_excid}" != "0" ]] && record_pass "C: state.active_exception_id=${state_active_excid}" || record_fail "C: state.active_exception_id not set"
  172. # ---------------------------------------------------------------------------
  173. # D. detection log edge — REFRESHED(CREATED 已在 C 覆盖;RECOVERED 风险高,SKIP)
  174. # ---------------------------------------------------------------------------
  175. echo "---- D. detection log edge ----"
  176. sleep 1
  177. marker_d=$(mysql_run "SELECT NOW();")
  178. sleep 1
  179. run_once_endpoint >/dev/null
  180. refreshed_cnt=$(mysql_run "SELECT COUNT(*) FROM ado_s8_detection_log WHERE rule_code='${TEMP_RULE}' AND detect_result='REFRESHED' AND detected_at >= '${marker_d}';")
  181. if [[ "${refreshed_cnt}" -ge 1 ]]; then
  182. record_pass "D: REFRESHED detection_log present (${refreshed_cnt})"
  183. else
  184. # 可能是命中后 refresh 路径走 last_detected_at 更新但不写 REFRESHED log;放宽到 last_detected_at 推进。
  185. ts=$(mysql_run "SELECT IFNULL(MAX(last_detected_at),'NULL') FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;")
  186. if [[ "${ts}" > "${marker_d}" ]]; then
  187. record_pass "D: last_detected_at advanced past marker_d (${ts} > ${marker_d})"
  188. else
  189. record_fail "D: REFRESHED log absent and last_detected_at did not advance (ts=${ts}, marker=${marker_d})"
  190. fi
  191. fi
  192. record_skip "D: RECOVERED edge — TEMP fixture 翻转风险高(需要业务态切换),由专项脚本覆盖"
  193. # ---------------------------------------------------------------------------
  194. # 收尾:TEMP fixture 关闭 + TEMP exception soft delete
  195. # ---------------------------------------------------------------------------
  196. echo "---- TEMP fixture cleanup ----"
  197. mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=0, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, updated_at=NOW() WHERE rule_code='${TEMP_RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
  198. TEMP_EXC_IDS=$(mysql_run "SELECT GROUP_CONCAT(id) FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;")
  199. mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;" >/dev/null
  200. cleanup_temp_sched_approval_ghost_tasks
  201. echo "TEMP cleanup: rule_id=${TEMP_RULE_ID} disabled; soft-deleted exception ids=[${TEMP_EXC_IDS:-none}]"
  202. # 确认 TEMP rule 不再 enabled,且默认列表(is_deleted=0)不可见。
  203. final_temp_enabled=$(get_rule_field "${TEMP_RULE_ID}" enabled)
  204. final_temp_visible=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${TEMP_RULE}' AND is_deleted=0;")
  205. [[ "${final_temp_enabled}" == "0" ]] && record_pass "cleanup: TEMP rule enabled=0" || record_fail "cleanup: TEMP rule still enabled=${final_temp_enabled}"
  206. [[ "${final_temp_visible}" == "0" ]] && record_pass "cleanup: TEMP exception 不出现在默认列表" || record_fail "cleanup: TEMP exception 仍在默认列表 (${final_temp_visible})"
  207. # ---------------------------------------------------------------------------
  208. # demo rule 10/11/12 漂移检测(不依赖 enabled=1 假设;driver arm 后 child 看到 1,
  209. # 单跑看到 0,两种场景都只检查"未漂移")
  210. # ---------------------------------------------------------------------------
  211. echo "---- demo rule 10/11/12 漂移检测 ----"
  212. assert_demo_rule_state_unchanged "${DEMO_SNAPSHOT}"
  213. # ---------------------------------------------------------------------------
  214. # baseline 守恒
  215. # ---------------------------------------------------------------------------
  216. echo "---- baseline assert ----"
  217. assert_baseline_unchanged "${baseline_before}"
  218. # ---------------------------------------------------------------------------
  219. # summary
  220. # ---------------------------------------------------------------------------
  221. echo
  222. print_summary
  223. echo "TEMP_RULE=${TEMP_RULE} TEMP_RULE_ID=${TEMP_RULE_ID} TEMP_EXC_IDS=[${TEMP_EXC_IDS:-none}]"
  224. exit_by_summary