161 lines
4.7 KiB
Python
161 lines
4.7 KiB
Python
"""Markdown parser — extracts YAML frontmatter and produces DesignDocument + specialized entities."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import yaml
|
|
|
|
from app.modules.design.domain.entities import (
|
|
ADR,
|
|
DesignDocument,
|
|
Domain,
|
|
ModuleBoundaryRule,
|
|
OperationalBaseline,
|
|
ReleasePlan,
|
|
RuntimeTopology,
|
|
ScopeAndGoals,
|
|
SolutionLayer,
|
|
SystemContext,
|
|
)
|
|
|
|
|
|
_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
|
|
|
|
|
|
class MdParser:
|
|
"""Parse Markdown file and return dict mapping entity type name to list of instances.
|
|
|
|
Keys: 'design_documents', 'scope_and_goals', 'system_context', etc.
|
|
"""
|
|
|
|
def parse(self, file_path: Path) -> dict[str, list[Any]]:
|
|
try:
|
|
content = file_path.read_text(encoding="utf-8")
|
|
except Exception:
|
|
return {}
|
|
|
|
match = _FRONTMATTER_RE.match(content)
|
|
if not match:
|
|
return {}
|
|
|
|
try:
|
|
frontmatter = yaml.safe_load(match.group(1))
|
|
except Exception:
|
|
return {}
|
|
|
|
if not isinstance(frontmatter, dict):
|
|
return {}
|
|
|
|
doc_id = frontmatter.get("doc_id", "")
|
|
if not doc_id:
|
|
return {}
|
|
|
|
title = frontmatter.get("title", "")
|
|
version = frontmatter.get("version", "")
|
|
status = frontmatter.get("status", "")
|
|
owners = frontmatter.get("owners", []) or []
|
|
upstream = frontmatter.get("upstream", []) or []
|
|
downstream = frontmatter.get("downstream", []) or []
|
|
|
|
# Ensure list types
|
|
if not isinstance(owners, list):
|
|
owners = [owners]
|
|
if not isinstance(upstream, list):
|
|
upstream = [upstream]
|
|
if not isinstance(downstream, list):
|
|
downstream = [downstream]
|
|
|
|
design_doc = DesignDocument(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
version=str(version),
|
|
status=status,
|
|
owners=owners,
|
|
upstream=upstream,
|
|
downstream=downstream,
|
|
file_path=str(file_path),
|
|
)
|
|
|
|
result: dict[str, list[Any]] = {"design_documents": [design_doc]}
|
|
|
|
# Body content after frontmatter
|
|
body = content[match.end():].strip()
|
|
fname = file_path.name.lower()
|
|
fpath_str = str(file_path).lower()
|
|
|
|
# Specialized entity detection
|
|
if "scope-and-goals" in fname or "scope_and_goals" in fname:
|
|
result["scope_and_goals"] = [ScopeAndGoals(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
core_problem="",
|
|
users="",
|
|
constraints="",
|
|
)]
|
|
|
|
elif "system-context" in fname or "system_context" in fname:
|
|
result["system_context"] = [SystemContext(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=body,
|
|
)]
|
|
|
|
elif "solution-layering" in fname or "solution_layering" in fname:
|
|
result["solution_layer"] = [SolutionLayer(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=body,
|
|
)]
|
|
|
|
elif "module-boundary" in fname or "module_boundary" in fname:
|
|
result["module_boundary_rule"] = [ModuleBoundaryRule(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=body,
|
|
)]
|
|
|
|
elif "runtime-topology" in fname or "runtime_topology" in fname:
|
|
result["runtime_topology"] = [RuntimeTopology(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=body,
|
|
)]
|
|
|
|
elif "operational-baseline" in fname or "operational_baseline" in fname:
|
|
result["operational_baseline"] = [OperationalBaseline(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=body,
|
|
)]
|
|
|
|
elif "release-and-rollback" in fname or "release_and_rollback" in fname:
|
|
result["release_plan"] = [ReleasePlan(
|
|
doc_id=doc_id,
|
|
title=title,
|
|
content=body,
|
|
)]
|
|
|
|
elif "domain-overview" in fname or "domain_overview" in fname:
|
|
# Extract domain name from parent directory
|
|
domain_name = file_path.parent.name
|
|
result["domains"] = [Domain(
|
|
domain_name=domain_name,
|
|
overview=body,
|
|
modules=[],
|
|
entities=[],
|
|
)]
|
|
|
|
elif fname.startswith("adr-") and "template" not in fname.lower():
|
|
result["adrs"] = [ADR(
|
|
adr_id=doc_id,
|
|
title=title,
|
|
status=status,
|
|
context=body,
|
|
decision="",
|
|
)]
|
|
|
|
return result
|