recovered-edge-regression.sh 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #!/usr/bin/env bash
  2. # RECOVERED edge regression — S8-RECOVERED-EDGE-FIXTURE-1(dev/test only, aidopdev)。
  3. #
  4. # 四阶段验证 RECOVERED 主链 + trigger/recover 抗抖:
  5. # Phase A1: 第一次命中(hit_count=1 < trigger=2)→ pending,不建单,不写 CREATED
  6. # Phase A2: 第二次命中(hit_count=2 ≥ trigger=2)→ 建单,state.active_exception_id=exc.id,写 CREATED
  7. # Phase B: 翻转表达式不命中(miss_count=1 < recover=2)→ recovered_at 仍 NULL,不写 RECOVERED
  8. # Phase C: 再次不命中(miss_count=2 ≥ recover=2)→ recovered_at 写入,RECOVERED detection_log 写入
  9. #
  10. # 为何 trigger=2 而非 1:
  11. # trigger=1 时 state 行在首 tick 由 InsertAsync 创建,但 SqlSugar 不回填 Id 到 entity;
  12. # 后续 UPDATE state.active_exception_id WHERE id=state.Id 命中 0 行,导致 active_exception_id
  13. # 永远为 NULL。这是产品侧问题,本脚本通过 trigger=2 在第二 tick 走 existing 分支(state.Id
  14. # 来自 query 真实可用),绕开此坑。已在风险清单中登记。
  15. #
  16. # 触发:通过 /api/aidop/s8/watch-debug/run-once(ProcessRulesByTypeAsync 路径,
  17. # 与 ProcessSingleRuleAsync 同样调用 ReconcileRecoveriesForRuleAsync)。
  18. # /run-once 不更新 watch_rule.last_status,本脚本不对此做断言。
  19. #
  20. # 严守:
  21. # - 不影响 demo rule 10/11/12
  22. # - TEMP rule 测后 enabled=0;TEMP exception soft delete
  23. # - 不清 detection_log / rule_detection_state / 演示数据
  24. set -uo pipefail
  25. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  26. # shellcheck source=./s8-regression-common.sh
  27. source "${SCRIPT_DIR}/s8-regression-common.sh"
  28. auth_load
  29. RULE='TEMP_SCHED_TIMEOUT_RECOVERED'
  30. DEDUP_OBJ='TEMP-RECOVER-001'
  31. TRIGGER_REQ=2
  32. RECOVER_REQ=2
  33. PARAMS_JSON='{"dueAtField":"due_at","statusField":"status","completedStates":["CLOSED","DONE","COMPLETED"],"objectCodeField":"related_object_code","objectIdField":"source_object_id","graceMinutes":0,"exceptionTypeCode":"DELIVERY_DELAY"}'
  34. HIT_EXPR="SELECT '${DEDUP_OBJ}' AS related_object_code, '${DEDUP_OBJ}' AS source_object_id, DATE_SUB(NOW(), INTERVAL 1 HOUR) AS due_at, 'PENDING' AS status"
  35. MISS_EXPR="SELECT '${DEDUP_OBJ}' AS related_object_code, '${DEDUP_OBJ}' AS source_object_id, DATE_ADD(NOW(), INTERVAL 2 HOUR) AS due_at, 'PENDING' AS status"
  36. baseline_before=$(read_baseline)
  37. DEMO_SNAPSHOT=$(snapshot_demo_rule_state)
  38. echo "==== recovered-edge-regression baseline_before=${baseline_before} demo_snapshot=${DEMO_SNAPSHOT} ===="
  39. # ---------------------------------------------------------------------------
  40. # Setup TEMP rule(hit 表达式 + recover_required=2)
  41. # ---------------------------------------------------------------------------
  42. echo "---- Setup TEMP rule ${RULE} (hit expression, trigger=${TRIGGER_REQ}, recover=${RECOVER_REQ}) ----"
  43. exists=$(mysql_run "SELECT COUNT(*) FROM ado_s8_watch_rule WHERE rule_code='${RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};")
  44. if [[ "${exists}" == "0" ]]; then
  45. mysql_run_strict "INSERT INTO ado_s8_watch_rule
  46. (tenant_id, factory_id, rule_code, scene_code, data_source_id, watch_object_type, expression, severity,
  47. poll_interval_seconds, enabled, created_at, rule_type, source_object_type, params_json,
  48. consecutive_failure_count, trigger_count_required, recover_count_required, next_run_at)
  49. VALUES (${TENANT_ID}, ${FACTORY_ID}, '${RULE}', 'S7', 1, 'ORDER',
  50. \"${HIT_EXPR}\", 'HIGH', 60, 1, NOW(), 'TIMEOUT', 'ORDER', '${PARAMS_JSON}',
  51. 0, ${TRIGGER_REQ}, ${RECOVER_REQ}, NULL);" >/dev/null
  52. echo " inserted ${RULE}"
  53. else
  54. mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=1, expression=\"${HIT_EXPR}\", params_json='${PARAMS_JSON}',
  55. trigger_count_required=${TRIGGER_REQ}, recover_count_required=${RECOVER_REQ},
  56. paused_until=NULL, pause_reason=NULL,
  57. lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL,
  58. consecutive_failure_count=0, last_status=NULL, last_error=NULL,
  59. next_run_at=NULL, updated_at=NOW()
  60. WHERE rule_code='${RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
  61. echo " re-armed ${RULE}"
  62. fi
  63. RULE_ID=$(get_rule_id_by_code "${RULE}")
  64. DEDUP_KEY="T${TENANT_ID}:F${FACTORY_ID}:R${RULE}:ORDER:${DEDUP_OBJ}"
  65. echo " RULE_ID=${RULE_ID} DEDUP_KEY=${DEDUP_KEY}"
  66. # 清理上次残留 detection_state / TEMP exception,保证 hit_count/miss_count 从 0 起
  67. mysql_run_strict "DELETE FROM ado_s8_rule_detection_state WHERE rule_code='${RULE}' AND tenant_id=${TENANT_ID} AND factory_id=${FACTORY_ID};" >/dev/null
  68. mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${RULE}' AND is_deleted=0;" >/dev/null
  69. marker_a=$(mysql_run "SELECT NOW();")
  70. sleep 1
  71. # ---------------------------------------------------------------------------
  72. # Phase A1: 第一次 HIT(pending,hit_count=1 < trigger=2,不建单)
  73. # ---------------------------------------------------------------------------
  74. echo "---- Phase A1: HIT 1 → pending (hit_count=1 < ${TRIGGER_REQ}) ----"
  75. resp_a1=$(run_once_endpoint || true)
  76. [[ -z "${resp_a1}" ]] && record_fail "A1: run-once returned empty"
  77. sleep 1
  78. a1_excs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${RULE}' AND is_deleted=0 AND status<>'CLOSED';")
  79. a1_state_hit=$(mysql_run "SELECT IFNULL(consecutive_hit_count,-1) FROM ado_s8_rule_detection_state WHERE rule_code='${RULE}' AND dedup_key='${DEDUP_KEY}';")
  80. a1_created_logs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_detection_log WHERE rule_code='${RULE}' AND detect_result='CREATED' AND detected_at >= '${marker_a}';")
  81. echo " A1: active_excs=${a1_excs} state.hit=${a1_state_hit} CREATED_logs=${a1_created_logs}"
  82. [[ "${a1_excs}" == "0" ]] && record_pass "A1: no exception created (pending)" || record_fail "A1: unexpected exception created (count=${a1_excs})"
  83. [[ "${a1_state_hit}" == "1" ]] && record_pass "A1: detection_state.consecutive_hit_count=1" || record_fail "A1: expected state.hit=1, got ${a1_state_hit}"
  84. [[ "${a1_created_logs}" == "0" ]] && record_pass "A1: no CREATED log written yet" || record_fail "A1: unexpected ${a1_created_logs} CREATED log(s)"
  85. # ---------------------------------------------------------------------------
  86. # Phase A2: 第二次 HIT(fire,建单,写 CREATED + state.active_exception_id)
  87. # ---------------------------------------------------------------------------
  88. echo "---- Phase A2: HIT 2 → CREATED (hit_count=2 ≥ ${TRIGGER_REQ}) ----"
  89. sleep 1
  90. marker_a2=$(mysql_run "SELECT NOW();")
  91. sleep 1
  92. resp_a2=$(run_once_endpoint || true)
  93. [[ -z "${resp_a2}" ]] && record_fail "A2: run-once returned empty"
  94. sleep 1
  95. a_excs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${RULE}' AND is_deleted=0 AND status<>'CLOSED';")
  96. a_exc_id=$(mysql_run "SELECT IFNULL(MAX(id),0) FROM ado_s8_exception WHERE source_rule_code='${RULE}' AND is_deleted=0 AND status<>'CLOSED';")
  97. a_recovered_at=$(mysql_run "SELECT IFNULL(recovered_at,'NULL') FROM ado_s8_exception WHERE id=${a_exc_id};")
  98. a_state_active=$(mysql_run "SELECT IFNULL(active_exception_id,0) FROM ado_s8_rule_detection_state WHERE rule_code='${RULE}' AND dedup_key='${DEDUP_KEY}';")
  99. a_created_logs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_detection_log WHERE rule_code='${RULE}' AND detect_result='CREATED' AND detected_at >= '${marker_a2}';")
  100. echo " A2: active_excs=${a_excs} exc_id=${a_exc_id} recovered_at=${a_recovered_at} state.active=${a_state_active} CREATED_logs(since marker_a2)=${a_created_logs}"
  101. [[ "${a_excs}" == "1" ]] && record_pass "A2: 1 active exception created" || record_fail "A2: expected 1 active exception, got ${a_excs}"
  102. [[ "${a_recovered_at}" == "NULL" ]] && record_pass "A2: recovered_at IS NULL on creation" || record_fail "A2: recovered_at unexpectedly set: ${a_recovered_at}"
  103. [[ "${a_state_active}" != "0" && "${a_state_active}" == "${a_exc_id}" ]] && record_pass "A2: state.active_exception_id=${a_state_active} matches exception id" || record_fail "A2: state.active_exception_id mismatch (state=${a_state_active}, exc=${a_exc_id})"
  104. [[ "${a_created_logs}" -ge 1 ]] && record_pass "A2: CREATED detection_log present (${a_created_logs})" || record_fail "A2: CREATED log missing"
  105. # ---------------------------------------------------------------------------
  106. # Phase B: 翻转表达式 → MISS 1(< recover_required,仅累计不写 RECOVERED)
  107. # ---------------------------------------------------------------------------
  108. echo "---- Phase B: FLIP miss → miss_count=1 (< ${RECOVER_REQ}) ----"
  109. mysql_run_strict "UPDATE ado_s8_watch_rule SET expression=\"${MISS_EXPR}\", lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, updated_at=NOW() WHERE id=${RULE_ID};" >/dev/null
  110. sleep 1
  111. marker_b=$(mysql_run "SELECT NOW();")
  112. sleep 1
  113. resp_b=$(run_once_endpoint || true)
  114. if [[ -z "${resp_b}" ]]; then
  115. record_fail "B: run-once returned empty"
  116. fi
  117. sleep 1
  118. b_miss=$(mysql_run "SELECT IFNULL(consecutive_miss_count,-1) FROM ado_s8_exception WHERE id=${a_exc_id};")
  119. b_hit=$(mysql_run "SELECT IFNULL(consecutive_hit_count,-1) FROM ado_s8_exception WHERE id=${a_exc_id};")
  120. b_recovered_at=$(mysql_run "SELECT IFNULL(recovered_at,'NULL') FROM ado_s8_exception WHERE id=${a_exc_id};")
  121. b_recovered_logs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_detection_log WHERE rule_code='${RULE}' AND detect_result='RECOVERED' AND detected_at >= '${marker_b}';")
  122. b_state_miss=$(mysql_run "SELECT IFNULL(consecutive_miss_count,-1) FROM ado_s8_rule_detection_state WHERE rule_code='${RULE}' AND dedup_key='${DEDUP_KEY}';")
  123. echo " B: exc.miss=${b_miss} exc.hit=${b_hit} state.miss=${b_state_miss} recovered_at=${b_recovered_at} RECOVERED_logs(since marker_b)=${b_recovered_logs}"
  124. [[ "${b_miss}" == "1" ]] && record_pass "B: exception.consecutive_miss_count=1 (< ${RECOVER_REQ})" || record_fail "B: expected exc.miss_count=1, got ${b_miss}"
  125. [[ "${b_hit}" == "0" ]] && record_pass "B: exception.consecutive_hit_count reset to 0" || record_fail "B: expected exc.hit_count=0, got ${b_hit}"
  126. [[ "${b_state_miss}" == "1" ]] && record_pass "B: detection_state.consecutive_miss_count=1" || record_fail "B: expected state.miss_count=1, got ${b_state_miss}"
  127. [[ "${b_recovered_at}" == "NULL" ]] && record_pass "B: recovered_at still NULL (antiflap pending)" || record_fail "B: recovered_at unexpectedly set: ${b_recovered_at}"
  128. [[ "${b_recovered_logs}" == "0" ]] && record_pass "B: no RECOVERED log written yet" || record_fail "B: unexpected ${b_recovered_logs} RECOVERED log(s)"
  129. # ---------------------------------------------------------------------------
  130. # Phase C: MISS 再次 → miss_count=2 ≥ recover_required → RECOVERED
  131. # ---------------------------------------------------------------------------
  132. echo "---- Phase C: MISS again → miss_count=${RECOVER_REQ} → RECOVERED ----"
  133. mysql_run_strict "UPDATE ado_s8_watch_rule SET lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, updated_at=NOW() WHERE id=${RULE_ID};" >/dev/null
  134. sleep 1
  135. marker_c=$(mysql_run "SELECT NOW();")
  136. sleep 1
  137. resp_c=$(run_once_endpoint || true)
  138. if [[ -z "${resp_c}" ]]; then
  139. record_fail "C: run-once returned empty"
  140. fi
  141. sleep 1
  142. c_miss=$(mysql_run "SELECT IFNULL(consecutive_miss_count,-1) FROM ado_s8_exception WHERE id=${a_exc_id};")
  143. c_recovered_at=$(mysql_run "SELECT IFNULL(recovered_at,'NULL') FROM ado_s8_exception WHERE id=${a_exc_id};")
  144. c_recovered_logs=$(mysql_run "SELECT COUNT(*) FROM ado_s8_detection_log WHERE rule_code='${RULE}' AND detect_result='RECOVERED' AND detected_at >= '${marker_c}';")
  145. c_status=$(mysql_run "SELECT status FROM ado_s8_exception WHERE id=${a_exc_id};")
  146. c_last_detected=$(mysql_run "SELECT IFNULL(last_detected_at,'NULL') FROM ado_s8_exception WHERE id=${a_exc_id};")
  147. c_log_remark=$(mysql_run "SELECT IFNULL(remark,'NULL') FROM ado_s8_detection_log WHERE rule_code='${RULE}' AND detect_result='RECOVERED' AND detected_at >= '${marker_c}' ORDER BY id DESC LIMIT 1;")
  148. echo " C: exc.miss=${c_miss} recovered_at=${c_recovered_at} RECOVERED_logs(since marker_c)=${c_recovered_logs} status=${c_status} last_detected_at=${c_last_detected}"
  149. echo " C: RECOVERED log remark='${c_log_remark}'"
  150. [[ "${c_miss}" -ge ${RECOVER_REQ} ]] && record_pass "C: consecutive_miss_count=${c_miss} (>= ${RECOVER_REQ})" || record_fail "C: expected miss_count>=${RECOVER_REQ}, got ${c_miss}"
  151. [[ "${c_recovered_at}" != "NULL" ]] && record_pass "C: recovered_at written (${c_recovered_at})" || record_fail "C: recovered_at NOT written"
  152. [[ "${c_recovered_logs}" -ge 1 ]] && record_pass "C: RECOVERED detection_log present (${c_recovered_logs})" || record_fail "C: RECOVERED log missing"
  153. [[ "${c_status}" != "CLOSED" ]] && record_pass "C: exception status NOT auto-closed (${c_status})" || record_fail "C: exception unexpectedly CLOSED"
  154. [[ "${c_last_detected}" != "NULL" ]] && record_pass "C: last_detected_at not cleared (${c_last_detected})" || record_fail "C: last_detected_at incorrectly cleared"
  155. # ---------------------------------------------------------------------------
  156. # Cleanup
  157. # ---------------------------------------------------------------------------
  158. echo "---- Cleanup ----"
  159. mysql_run_strict "UPDATE ado_s8_watch_rule SET enabled=0, paused_until=NULL, pause_reason=NULL, lock_token=NULL, locked_by=NULL, lock_until=NULL, running_started_at=NULL, next_run_at=NULL, updated_at=NOW() WHERE id=${RULE_ID};" >/dev/null
  160. TEMP_EXC_IDS=$(mysql_run "SELECT IFNULL(GROUP_CONCAT(id),'none') FROM ado_s8_exception WHERE source_rule_code='${RULE}' AND is_deleted=0;")
  161. mysql_run_strict "UPDATE ado_s8_exception SET is_deleted=1, updated_at=NOW() WHERE source_rule_code='${RULE}' AND is_deleted=0;" >/dev/null
  162. cleanup_temp_sched_approval_ghost_tasks
  163. echo " disabled rule_id=${RULE_ID}; soft-deleted exception ids=[${TEMP_EXC_IDS}]; ghost approval cancelled"
  164. final_enabled=$(get_rule_field "${RULE_ID}" enabled)
  165. [[ "${final_enabled}" == "0" ]] && record_pass "cleanup: rule enabled=0" || record_fail "cleanup: rule still enabled=${final_enabled}"
  166. temp_visible=$(mysql_run "SELECT COUNT(*) FROM ado_s8_exception WHERE source_rule_code='${RULE}' AND is_deleted=0;")
  167. [[ "${temp_visible}" == "0" ]] && record_pass "cleanup: TEMP exception not in default list" || record_fail "cleanup: ${temp_visible} TEMP exception(s) still visible"
  168. # demo rule 10/11/12 漂移检测 + baseline 守恒
  169. assert_demo_rule_state_unchanged "${DEMO_SNAPSHOT}"
  170. assert_baseline_unchanged "${baseline_before}"
  171. print_summary
  172. echo "TEMP rule: ${RULE} (id=${RULE_ID}); TEMP exception ids=[${TEMP_EXC_IDS}]; final exc state: id=${a_exc_id}, recovered_at=${c_recovered_at}"
  173. exit_by_summary