import_sql_dump_merge.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. #!/usr/bin/env python3
  2. """
  3. 将 mysqldump 备份中的数据合并导入 MySQL,跳过已存在行(INSERT IGNORE)。
  4. 适用场景:目标库已有部分数据,需要从备份补全而不产生主键/唯一键重复。
  5. 默认连接 Dev 165 / aidopdev(与 Database.json 开发库一致)。
  6. 不会执行 DROP TABLE / CREATE TABLE / CREATE DATABASE,不覆盖表结构。
  7. 用法示例:
  8. python doc/db/import_sql_dump_merge.py --dry-run
  9. python doc/db/import_sql_dump_merge.py
  10. python doc/db/import_sql_dump_merge.py --skip-tables abpauditlogs,abpauditlogactions
  11. """
  12. from __future__ import annotations
  13. import argparse
  14. import re
  15. import sys
  16. import time
  17. from collections import defaultdict
  18. from pathlib import Path
  19. import pymysql
  20. DEFAULT_SOURCE = (
  21. Path(__file__).resolve().parents[4]
  22. / "阿里云数据库备份"
  23. / "aidopdev_20260608_053527.sql"
  24. / "aidopdev_20260608_053527.sql"
  25. )
  26. DEFAULT_DB = dict(
  27. host="123.60.180.165",
  28. port=3306,
  29. user="aidopremote",
  30. password="1234567890aiDOP#",
  31. database="aidopdev",
  32. charset="utf8mb4",
  33. )
  34. SKIP_PREFIXES = (
  35. "DROP TABLE",
  36. "DROP DATABASE",
  37. "CREATE DATABASE",
  38. "CREATE TABLE",
  39. "ALTER TABLE",
  40. "LOCK TABLES",
  41. "UNLOCK TABLES",
  42. "/*!40000 ALTER TABLE",
  43. "/*!40101 SET @saved_cs_client",
  44. )
  45. SAFE_PREFIXES = (
  46. "SET NAMES",
  47. "SET TIME_ZONE",
  48. "SET @OLD_",
  49. "SET SQL_MODE",
  50. "SET FOREIGN_KEY_CHECKS",
  51. "SET UNIQUE_CHECKS",
  52. "SET CHARACTER_SET_CLIENT",
  53. "SET CHARACTER_SET_RESULTS",
  54. "SET COLLATION_CONNECTION",
  55. "SET SQL_NOTES",
  56. "/*!40101 SET @OLD_",
  57. "/*!40103 SET @OLD_",
  58. "/*!40014 SET @OLD_",
  59. "/*!40111 SET @OLD_",
  60. "/*!50503 SET NAMES",
  61. )
  62. INSERT_RE = re.compile(
  63. r"^INSERT\s+(?:IGNORE\s+)?INTO\s+[`]?([^`\s(]+)[`]?",
  64. re.IGNORECASE,
  65. )
  66. def parse_args() -> argparse.Namespace:
  67. parser = argparse.ArgumentParser(description="Merge mysqldump data into MySQL with INSERT IGNORE")
  68. parser.add_argument(
  69. "--source",
  70. type=Path,
  71. default=DEFAULT_SOURCE,
  72. help=f"SQL dump file path (default: {DEFAULT_SOURCE})",
  73. )
  74. parser.add_argument("--host", default=DEFAULT_DB["host"])
  75. parser.add_argument("--port", type=int, default=DEFAULT_DB["port"])
  76. parser.add_argument("--user", default=DEFAULT_DB["user"])
  77. parser.add_argument("--password", default=DEFAULT_DB["password"])
  78. parser.add_argument("--database", default=DEFAULT_DB["database"])
  79. parser.add_argument("--dry-run", action="store_true", help="Only scan and report, do not execute")
  80. parser.add_argument(
  81. "--only-tables",
  82. default="",
  83. help="Comma-separated table allowlist (lowercase). Empty = all tables with INSERT",
  84. )
  85. parser.add_argument(
  86. "--skip-tables",
  87. default="",
  88. help="Comma-separated tables to skip (lowercase)",
  89. )
  90. parser.add_argument(
  91. "--batch-commit",
  92. type=int,
  93. default=1,
  94. help="Commit every N INSERT statements (default 1)",
  95. )
  96. return parser.parse_args()
  97. def normalize_table(name: str) -> str:
  98. return name.strip("`").lower()
  99. def should_skip_statement(stmt: str) -> bool:
  100. upper = stmt.lstrip().upper()
  101. if upper.startswith("USE "):
  102. return True
  103. return any(upper.startswith(prefix.upper()) for prefix in SKIP_PREFIXES)
  104. def is_safe_session(stmt: str) -> bool:
  105. stripped = stmt.lstrip()
  106. return any(stripped.startswith(prefix) for prefix in SAFE_PREFIXES)
  107. def to_insert_ignore(stmt: str) -> str:
  108. if re.match(r"^INSERT\s+IGNORE\s+INTO", stmt, re.IGNORECASE):
  109. return stmt
  110. return re.sub(r"^INSERT\s+INTO", "INSERT IGNORE INTO", stmt, count=1, flags=re.IGNORECASE)
  111. def rewrite_insert_table(stmt: str, actual_table: str) -> str:
  112. sql = to_insert_ignore(stmt)
  113. return re.sub(
  114. r"^(INSERT\s+(?:IGNORE\s+)?INTO\s+)[`']?[^`'\s(]+[`']?",
  115. rf"\1`{actual_table}`",
  116. sql,
  117. count=1,
  118. flags=re.IGNORECASE,
  119. )
  120. def iter_statements(path: Path):
  121. buffer: list[str] = []
  122. for line in path.open("r", encoding="utf-8", errors="replace"):
  123. buffer.append(line)
  124. if not line.rstrip().endswith(";"):
  125. continue
  126. stmt = "".join(buffer).strip()
  127. buffer.clear()
  128. if stmt:
  129. yield stmt
  130. if buffer:
  131. tail = "".join(buffer).strip()
  132. if tail:
  133. yield tail
  134. def is_values_only_insert(stmt: str) -> bool:
  135. """Only import mysqldump data rows; skip procedure fragments with column lists."""
  136. stripped = stmt.lstrip()
  137. return bool(
  138. re.match(
  139. r"^INSERT\s+(?:IGNORE\s+)?INTO\s+[`']?[^`'\s(]+[`']?\s+VALUES\s",
  140. stripped,
  141. re.IGNORECASE,
  142. )
  143. )
  144. def extract_insert_table(stmt: str) -> str | None:
  145. match = INSERT_RE.match(stmt.strip())
  146. return normalize_table(match.group(1)) if match else None
  147. def connect_db(args: argparse.Namespace):
  148. return pymysql.connect(
  149. host=args.host,
  150. port=args.port,
  151. user=args.user,
  152. password=args.password,
  153. database=args.database,
  154. charset="utf8mb4",
  155. autocommit=False,
  156. connect_timeout=30,
  157. read_timeout=600,
  158. write_timeout=600,
  159. )
  160. def load_target_tables(conn) -> dict[str, str]:
  161. """Return lowercase table name -> actual TABLE_NAME on server."""
  162. with conn.cursor() as cur:
  163. cur.execute(
  164. "SELECT TABLE_NAME FROM information_schema.TABLES "
  165. "WHERE TABLE_SCHEMA = DATABASE()"
  166. )
  167. mapping: dict[str, str] = {}
  168. for row in cur.fetchall():
  169. actual = row[0]
  170. mapping[normalize_table(actual)] = actual
  171. return mapping
  172. def main() -> int:
  173. args = parse_args()
  174. source: Path = args.source
  175. if not source.is_file():
  176. print(f"[ERROR] Source file not found: {source}", file=sys.stderr)
  177. return 1
  178. only_tables = {t.strip().lower() for t in args.only_tables.split(",") if t.strip()}
  179. skip_tables = {t.strip().lower() for t in args.skip_tables.split(",") if t.strip()}
  180. stats = defaultdict(int)
  181. table_rows_attempted = defaultdict(int)
  182. errors: list[str] = []
  183. warnings: list[str] = []
  184. print(f"Source: {source}")
  185. print(f"Target: {args.user}@{args.host}:{args.port}/{args.database}")
  186. print(f"Mode: {'DRY-RUN' if args.dry_run else 'EXECUTE'}")
  187. if only_tables:
  188. print(f"Only tables: {', '.join(sorted(only_tables))}")
  189. if skip_tables:
  190. print(f"Skip tables: {', '.join(sorted(skip_tables))}")
  191. conn = None
  192. target_tables: dict[str, str] = {}
  193. if not args.dry_run:
  194. conn = connect_db(args)
  195. target_tables = load_target_tables(conn)
  196. print(f"Target tables in database: {len(target_tables)}")
  197. started = time.time()
  198. pending_since_commit = 0
  199. try:
  200. for stmt in iter_statements(source):
  201. if should_skip_statement(stmt):
  202. stats["skipped_ddl"] += 1
  203. continue
  204. table = extract_insert_table(stmt)
  205. if table:
  206. if not is_values_only_insert(stmt):
  207. stats["skipped_non_values_insert"] += 1
  208. continue
  209. if only_tables and table not in only_tables:
  210. stats["skipped_filter"] += 1
  211. continue
  212. if table in skip_tables:
  213. stats["skipped_filter"] += 1
  214. continue
  215. if not args.dry_run and table not in target_tables:
  216. stats["skipped_missing_table"] += 1
  217. warnings.append(f"missing table on target: {table}")
  218. continue
  219. stats["insert_statements"] += 1
  220. table_rows_attempted[table] += 1
  221. if args.dry_run:
  222. continue
  223. sql = rewrite_insert_table(stmt, target_tables[table])
  224. try:
  225. with conn.cursor() as cur:
  226. affected = cur.execute(sql)
  227. stats["insert_executed"] += 1
  228. stats["rows_affected"] += max(affected, 0)
  229. pending_since_commit += 1
  230. if pending_since_commit >= args.batch_commit:
  231. conn.commit()
  232. pending_since_commit = 0
  233. except Exception as exc: # noqa: BLE001
  234. conn.rollback()
  235. pending_since_commit = 0
  236. stats["insert_failed"] += 1
  237. errors.append(f"{table}: {exc}")
  238. continue
  239. if is_safe_session(stmt):
  240. if args.dry_run:
  241. stats["session_setup"] += 1
  242. continue
  243. try:
  244. with conn.cursor() as cur:
  245. cur.execute(stmt)
  246. stats["session_setup"] += 1
  247. except Exception:
  248. stats["session_setup_ignored"] += 1
  249. continue
  250. stats["skipped_other"] += 1
  251. if conn and pending_since_commit:
  252. conn.commit()
  253. finally:
  254. if conn:
  255. conn.close()
  256. elapsed = time.time() - started
  257. print("\n=== Summary ===")
  258. print(f"Elapsed: {elapsed:.1f}s")
  259. for key in sorted(stats):
  260. print(f" {key}: {stats[key]}")
  261. if table_rows_attempted:
  262. print("\nINSERT statements by table (top 20):")
  263. for table, count in sorted(table_rows_attempted.items(), key=lambda x: (-x[1], x[0]))[:20]:
  264. print(f" {table}: {count}")
  265. if len(table_rows_attempted) > 20:
  266. print(f" ... and {len(table_rows_attempted) - 20} more tables")
  267. if warnings:
  268. print(f"\nWarnings ({len(warnings)}):")
  269. for msg in warnings[:30]:
  270. print(f" - {msg}")
  271. if len(warnings) > 30:
  272. print(f" ... and {len(warnings) - 30} more")
  273. if errors:
  274. print(f"\nErrors ({len(errors)}):")
  275. for msg in errors[:30]:
  276. print(f" - {msg}")
  277. if len(errors) > 30:
  278. print(f" ... and {len(errors) - 30} more")
  279. return 2
  280. return 0
  281. if __name__ == "__main__":
  282. raise SystemExit(main())