gen_demo_data.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. #!/usr/bin/env python3
  2. """
  3. Demo 数据智能生成脚本
  4. ====================
  5. 为 Demo 租户生成 30 天 KPI 值数据。支持从 Excel 模板导入用户填入的基准值,
  6. 未填入的自动生成,并保证父子层级数值逻辑自洽。
  7. 用法:
  8. # 全自动生成
  9. python3 scripts/gen_demo_data.py > scripts/demo_data.sql
  10. # 使用 Excel 模板(用户填入了部分基准值)
  11. python3 scripts/gen_demo_data.py --template scripts/demo_data_template.xlsx > scripts/demo_data.sql
  12. # 导入
  13. mysql -h... -u... -p... aidopdev < scripts/demo_data.sql
  14. 依赖: pip install pymysql openpyxl
  15. """
  16. import argparse
  17. import random
  18. import sys
  19. from datetime import date, timedelta
  20. DEMO_TENANT_ID = 1300000000888
  21. FACTORY_ID = 1
  22. DAYS = 30
  23. ID_BASE_L1 = 8880000000
  24. ID_BASE_L2 = 8890000000
  25. ID_BASE_L3 = 8900000000
  26. DB_CONFIG = dict(
  27. host="123.60.180.165",
  28. port=3306,
  29. user="aidopremote",
  30. password="1234567890aiDOP#",
  31. database="aidopdev",
  32. )
  33. random.seed(42)
  34. # ── DB ──────────────────────────────────────────────
  35. def connect_db():
  36. try:
  37. import pymysql
  38. return pymysql.connect(**DB_CONFIG)
  39. except ImportError:
  40. pass
  41. try:
  42. import mysql.connector
  43. return mysql.connector.connect(**DB_CONFIG)
  44. except ImportError:
  45. pass
  46. print("ERROR: 需要 pymysql 或 mysql-connector-python", file=sys.stderr)
  47. sys.exit(1)
  48. def load_kpi_master(conn):
  49. cur = conn.cursor()
  50. cur.execute(
  51. "SELECT MetricCode, ModuleCode, MetricLevel, Direction, Unit, ParentId, Id "
  52. "FROM ado_smart_ops_kpi_master WHERE TenantId = %s ORDER BY SortNo",
  53. (DEMO_TENANT_ID,),
  54. )
  55. rows = cur.fetchall()
  56. cur.close()
  57. return [
  58. dict(
  59. metric_code=r[0],
  60. module_code=r[1],
  61. metric_level=r[2],
  62. direction=r[3] or "higher_is_better",
  63. unit=r[4] or "",
  64. parent_id=r[5],
  65. id=r[6],
  66. )
  67. for r in rows
  68. ]
  69. # ── Excel 模板读取 ──────────────────────────────────
  70. def load_template(path):
  71. """读取 Excel 模板,返回 {metric_code: {target, actual, yellow_threshold, red_threshold}}"""
  72. from openpyxl import load_workbook
  73. wb = load_workbook(path, data_only=True)
  74. ws = wb.active
  75. overrides = {}
  76. for row in ws.iter_rows(min_row=2, values_only=True):
  77. if not row or not row[0]:
  78. continue
  79. code = str(row[0]).strip()
  80. if not code or not code.startswith("S"):
  81. continue
  82. target = row[7] if len(row) > 7 else None
  83. actual = row[8] if len(row) > 8 else None
  84. yellow_th = row[9] if len(row) > 9 else None
  85. red_th = row[10] if len(row) > 10 else None
  86. entry = {}
  87. if target is not None:
  88. entry["target"] = float(target)
  89. if actual is not None:
  90. entry["actual"] = float(actual)
  91. if yellow_th is not None:
  92. entry["yellow_threshold"] = float(yellow_th)
  93. if red_th is not None:
  94. entry["red_threshold"] = float(red_th)
  95. if entry:
  96. overrides[code] = entry
  97. return overrides
  98. # ── 自动生成基准值 ──────────────────────────────────
  99. def auto_target(kpi):
  100. unit = kpi.get("unit", "").strip()
  101. if unit == "%":
  102. return round(random.uniform(80, 98), 2)
  103. elif unit in ("天",):
  104. return round(random.uniform(2, 15), 2)
  105. elif "PPM" in unit:
  106. return round(random.uniform(3000, 8000), 0)
  107. elif "/" in unit:
  108. return round(random.uniform(50, 300), 2)
  109. elif unit in ("个", "颗/人", "家/人", "小时"):
  110. return round(random.uniform(5, 200), 0)
  111. else:
  112. return round(random.uniform(50, 200), 2)
  113. def auto_actual(target, direction):
  114. if direction == "higher_is_better":
  115. factor = random.uniform(0.82, 1.05)
  116. else:
  117. factor = random.uniform(0.90, 1.25)
  118. return round(target * factor, 4)
  119. # ── 父子自洽逻辑 ──────────────────────────────────
  120. def resolve_baselines(masters, overrides):
  121. """
  122. 决定每个指标的 (target, actual) 基准值。
  123. 优先级: 用户填入 > 从父级拆分 / 从子级汇总 > 全自动生成。
  124. 保证父子层级加权平均自洽。
  125. """
  126. id_map = {m["id"]: m for m in masters}
  127. code_map = {m["metric_code"]: m for m in masters}
  128. children_of = {}
  129. for m in masters:
  130. pid = m["parent_id"]
  131. if pid:
  132. children_of.setdefault(pid, []).append(m)
  133. baselines = {}
  134. def get_or_gen(m):
  135. code = m["metric_code"]
  136. if code in baselines:
  137. return baselines[code]
  138. ov = overrides.get(code, {})
  139. user_target = ov.get("target")
  140. user_actual = ov.get("actual")
  141. kids = children_of.get(m["id"], [])
  142. if user_target is not None and user_actual is not None:
  143. baselines[code] = (user_target, user_actual)
  144. _propagate_to_children(m, user_target, user_actual, kids, overrides)
  145. return baselines[code]
  146. if user_target is not None:
  147. actual = auto_actual(user_target, m["direction"])
  148. baselines[code] = (user_target, actual)
  149. _propagate_to_children(m, user_target, actual, kids, overrides)
  150. return baselines[code]
  151. if user_actual is not None:
  152. tgt = _infer_target_from_actual(user_actual, m["direction"])
  153. baselines[code] = (tgt, user_actual)
  154. _propagate_to_children(m, tgt, user_actual, kids, overrides)
  155. return baselines[code]
  156. if kids:
  157. kid_baselines = [get_or_gen(k) for k in kids]
  158. any_kid_has_user = any(
  159. overrides.get(k["metric_code"], {}).get("target") is not None
  160. or overrides.get(k["metric_code"], {}).get("actual") is not None
  161. for k in kids
  162. )
  163. if any_kid_has_user:
  164. avg_t = sum(b[0] for b in kid_baselines) / len(kid_baselines)
  165. avg_a = sum(b[1] for b in kid_baselines) / len(kid_baselines)
  166. baselines[code] = (round(avg_t, 4), round(avg_a, 4))
  167. return baselines[code]
  168. tgt = auto_target(m)
  169. actual = auto_actual(tgt, m["direction"])
  170. baselines[code] = (tgt, actual)
  171. _propagate_to_children(m, tgt, actual, kids, overrides)
  172. return baselines[code]
  173. def _propagate_to_children(parent_m, p_target, p_actual, kids, overrides):
  174. if not kids:
  175. return
  176. n = len(kids)
  177. for i, k in enumerate(kids):
  178. kcode = k["metric_code"]
  179. if kcode in baselines:
  180. continue
  181. kov = overrides.get(kcode, {})
  182. if kov.get("target") is not None or kov.get("actual") is not None:
  183. continue
  184. if i < n - 1:
  185. noise_t = random.uniform(-0.08, 0.08)
  186. noise_a = random.uniform(-0.08, 0.08)
  187. ct = p_target * (1 + noise_t)
  188. ca = p_actual * (1 + noise_a)
  189. else:
  190. already = [baselines[kids[j]["metric_code"]] for j in range(i) if kids[j]["metric_code"] in baselines]
  191. if already:
  192. sum_t = sum(b[0] for b in already)
  193. sum_a = sum(b[1] for b in already)
  194. ct = p_target * n - sum_t
  195. ca = p_actual * n - sum_a
  196. ct = max(ct, p_target * 0.7)
  197. ca = max(ca, p_actual * 0.5)
  198. else:
  199. ct = p_target
  200. ca = p_actual
  201. baselines[kcode] = (round(ct, 4), round(ca, 4))
  202. grandkids = children_of.get(k["id"], [])
  203. _propagate_to_children(k, ct, ca, grandkids, overrides)
  204. def _infer_target_from_actual(actual, direction):
  205. if direction == "higher_is_better":
  206. return round(actual / random.uniform(0.85, 1.02), 4)
  207. else:
  208. return round(actual / random.uniform(0.95, 1.20), 4)
  209. for m in masters:
  210. if m["metric_level"] == 1:
  211. get_or_gen(m)
  212. for m in masters:
  213. if m["metric_code"] not in baselines:
  214. get_or_gen(m)
  215. return baselines
  216. # ── 日值生成 ──────────────────────────────────────
  217. def gen_daily_values(baseline_actual, direction, days=DAYS):
  218. """以 baseline_actual 为中心生成 30 天日值,带轻微波动但均值 ≈ baseline。"""
  219. vals = []
  220. for i in range(days):
  221. noise = random.gauss(0, 0.03)
  222. vals.append(round(max(0, baseline_actual * (1 + noise)), 4))
  223. return vals
  224. def status_color(val, target, direction, yellow_th=None, red_th=None):
  225. if target == 0:
  226. return "green"
  227. if direction == "higher_is_better":
  228. y_pct = (yellow_th or 95) / 100.0
  229. r_pct = (red_th or 80) / 100.0
  230. if val >= target:
  231. return "green"
  232. elif val >= target * y_pct:
  233. return "yellow"
  234. else:
  235. return "red"
  236. else:
  237. y_pct = (yellow_th or 110) / 100.0
  238. r_pct = (red_th or 120) / 100.0
  239. if val <= target:
  240. return "green"
  241. elif val <= target * y_pct:
  242. return "yellow"
  243. else:
  244. return "red"
  245. def trend_flag(vals, idx):
  246. if idx == 0:
  247. return "flat"
  248. prev = vals[idx - 1]
  249. cur = vals[idx]
  250. if prev == 0:
  251. return "up" if cur > 0 else "flat"
  252. change = (cur - prev) / prev
  253. if change > 0.02:
  254. return "up"
  255. elif change < -0.02:
  256. return "down"
  257. return "flat"
  258. def esc(s):
  259. if s is None:
  260. return "NULL"
  261. return "'" + str(s).replace("\\", "\\\\").replace("'", "\\'") + "'"
  262. # ── 主流程 ────────────────────────────────────────
  263. def main():
  264. parser = argparse.ArgumentParser(description="Demo 数据智能生成")
  265. parser.add_argument(
  266. "--template", "-t",
  267. help="Excel 模板路径(含用户填入的基准值),不提供则全自动生成",
  268. )
  269. args = parser.parse_args()
  270. conn = connect_db()
  271. masters = load_kpi_master(conn)
  272. conn.close()
  273. if not masters:
  274. print(f"-- ERROR: 租户 {DEMO_TENANT_ID} 无 KpiMaster 数据", file=sys.stderr)
  275. sys.exit(1)
  276. overrides = {}
  277. if args.template:
  278. overrides = load_template(args.template)
  279. print(f"-- 从模板读取了 {len(overrides)} 条用户填入的基准值", file=sys.stderr)
  280. baselines = resolve_baselines(masters, overrides)
  281. today = date.today()
  282. start_date = today - timedelta(days=DAYS - 1)
  283. l1s = [m for m in masters if m["metric_level"] == 1]
  284. l2s = [m for m in masters if m["metric_level"] == 2]
  285. l3s = [m for m in masters if m["metric_level"] == 3]
  286. daily_vals = {}
  287. targets = {}
  288. for m in masters:
  289. code = m["metric_code"]
  290. tgt, actual_base = baselines[code]
  291. targets[code] = tgt
  292. daily_vals[code] = gen_daily_values(actual_base, m["direction"])
  293. print("-- ============================================================")
  294. print(f"-- Demo data for tenant {DEMO_TENANT_ID}, {DAYS} days")
  295. print(f"-- Metrics: L1={len(l1s)}, L2={len(l2s)}, L3={len(l3s)}")
  296. if overrides:
  297. print(f"-- User overrides: {len(overrides)} metrics from template")
  298. print("-- ============================================================")
  299. print()
  300. print(f"DELETE FROM ado_s9_kpi_value_l1_day WHERE tenant_id = {DEMO_TENANT_ID};")
  301. print(f"DELETE FROM ado_s9_kpi_value_l2_day WHERE tenant_id = {DEMO_TENANT_ID};")
  302. print(f"DELETE FROM ado_s9_kpi_value_l3_day WHERE tenant_id = {DEMO_TENANT_ID};")
  303. print()
  304. def emit_inserts(level_list, table, id_base):
  305. if not level_list:
  306. return
  307. print(
  308. f"INSERT INTO `{table}` "
  309. f"(id, tenant_id, factory_id, biz_date, module_code, metric_code, "
  310. f"metric_value, target_value, status_color, trend_flag, is_deleted, is_active) VALUES"
  311. )
  312. rows = []
  313. seq = 0
  314. for m in level_list:
  315. code = m["metric_code"]
  316. tgt = targets[code]
  317. vals = daily_vals[code]
  318. ov = overrides.get(code, {})
  319. y_th = ov.get("yellow_threshold")
  320. r_th = ov.get("red_threshold")
  321. for i in range(DAYS):
  322. seq += 1
  323. d = start_date + timedelta(days=i)
  324. v = vals[i]
  325. sc = status_color(v, tgt, m["direction"], y_th, r_th)
  326. tf = trend_flag(vals, i)
  327. row_id = id_base + seq
  328. rows.append(
  329. f"({row_id},{DEMO_TENANT_ID},{FACTORY_ID},'{d}',"
  330. f"'{m['module_code']}','{code}',{v},{tgt},{esc(sc)},{esc(tf)},0,1)"
  331. )
  332. for idx, r in enumerate(rows):
  333. sep = "," if idx < len(rows) - 1 else ";"
  334. print(f" {r}{sep}")
  335. print()
  336. emit_inserts(l1s, "ado_s9_kpi_value_l1_day", ID_BASE_L1)
  337. emit_inserts(l2s, "ado_s9_kpi_value_l2_day", ID_BASE_L2)
  338. emit_inserts(l3s, "ado_s9_kpi_value_l3_day", ID_BASE_L3)
  339. print("-- Done.")
  340. if __name__ == "__main__":
  341. main()