From a39cbcb76658c766e19c895c587e2b3b9865d8ec Mon Sep 17 00:00:00 2001 From: openclaw Date: Mon, 23 Mar 2026 16:33:49 +0000 Subject: [PATCH] =?UTF-8?q?feat(scanner):=20add=20ScanService=20=E2=80=94?= =?UTF-8?q?=20orchestrates=20parsers,=20file=20status,=20and=20entity=20co?= =?UTF-8?q?llection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../modules/scanner/application/services.py | 127 ++++++++++++++++++ backend/tests/test_scanner_service.py | 110 +++++++++++++++ 2 files changed, 237 insertions(+) create mode 100644 backend/tests/test_scanner_service.py diff --git a/backend/app/modules/scanner/application/services.py b/backend/app/modules/scanner/application/services.py index e69de29..eafe0ad 100644 --- a/backend/app/modules/scanner/application/services.py +++ b/backend/app/modules/scanner/application/services.py @@ -0,0 +1,127 @@ +"""ScanService — orchestrates parsers, file status detection, and entity collection.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from app.modules.design.domain.services import DesignValidationService +from app.modules.design.domain.value_objects import FileStatus +from app.modules.project.domain.entities import Project +from app.modules.scanner.domain.entities import ( + FileStatusEntry, + ScanResult, + ScanSummary, +) +from app.modules.scanner.infrastructure.parsers.csv_parser import CsvParser +from app.modules.scanner.infrastructure.parsers.md_parser import MdParser +from app.modules.scanner.infrastructure.parsers.openapi_parser import OpenapiParser + + +class ScanService: + """Scan a project's design directory and produce a ScanResult.""" + + def __init__(self) -> None: + self._csv_parser = CsvParser() + self._md_parser = MdParser() + self._openapi_parser = OpenapiParser() + self._cache: dict[str, ScanResult] = {} + + def scan(self, project: Project) -> ScanResult: + design_dir = Path(project.design_dir) + file_statuses: list[FileStatusEntry] = [] + all_entities: dict[str, list[Any]] = {} + + # Walk design directory recursively + for file_path in sorted(design_dir.rglob("*")): + if not file_path.is_file(): + continue + + # Determine file status + try: + content = file_path.read_text(encoding="utf-8") + except Exception: + content = "" + + status = DesignValidationService.determine_file_status( + content, str(file_path) + ) + lines = len(content.splitlines()) if content else 0 + rel_path = str(file_path.relative_to(design_dir)) + + file_statuses.append(FileStatusEntry( + path=rel_path, + status=status, + content_lines=lines, + )) + + # Dispatch to appropriate parser + parsed: dict[str, list[Any]] = {} + suffix = file_path.suffix.lower() + fname = file_path.name.lower() + + if suffix == ".csv": + parsed = self._csv_parser.parse(file_path) + elif suffix == ".md": + parsed = self._md_parser.parse(file_path) + elif suffix == ".yaml" or suffix == ".yml": + if "openapi" in fname or "api-contracts" in fname: + parsed = self._openapi_parser.parse(file_path) + + # Merge parsed entities + for key, entities in parsed.items(): + if key not in all_entities: + all_entities[key] = [] + all_entities[key].extend(entities) + + # Build summary + summary = self._build_summary(file_statuses) + + # Assemble ScanResult + # Singleton fields (take first item from list or None) + singleton_keys = { + "scope_and_goals", "system_context", "solution_layer", + "module_boundary_rule", "runtime_topology", + "operational_baseline", "release_plan", + } + + kwargs: dict[str, Any] = { + "project_id": project.id, + "scanned_at": datetime.now(timezone.utc), + "file_statuses": file_statuses, + "summary": summary, + } + + for key, entities in all_entities.items(): + if key in singleton_keys: + kwargs[key] = entities[0] if entities else None + else: + kwargs[key] = entities + + result = ScanResult(**kwargs) + self._cache[project.id] = result + return result + + def get_latest_scan(self, project_id: str) -> ScanResult | None: + return self._cache.get(project_id) + + @staticmethod + def _build_summary(file_statuses: list[FileStatusEntry]) -> ScanSummary: + ok = sum(1 for fs in file_statuses if fs.status == FileStatus.OK) + sparse = sum(1 for fs in file_statuses if fs.status == FileStatus.SPARSE) + missing = sum(1 for fs in file_statuses if fs.status == FileStatus.MISSING) + placeholder_heavy = sum( + 1 for fs in file_statuses if fs.status == FileStatus.PLACEHOLDER_HEAVY + ) + template_residue = sum( + 1 for fs in file_statuses if fs.status == FileStatus.TEMPLATE_RESIDUE + ) + return ScanSummary( + total_files=len(file_statuses), + ok=ok, + sparse=sparse, + missing=missing, + placeholder_heavy=placeholder_heavy, + template_residue=template_residue, + ) diff --git a/backend/tests/test_scanner_service.py b/backend/tests/test_scanner_service.py new file mode 100644 index 0000000..eea459e --- /dev/null +++ b/backend/tests/test_scanner_service.py @@ -0,0 +1,110 @@ +"""Tests for ScanService — integration with real design directory.""" + +import pytest +from datetime import datetime +from pathlib import Path + +from app.modules.project.domain.entities import Project +from app.modules.scanner.application.services import ScanService + + +@pytest.fixture +def scan_service(): + return ScanService() + + +@pytest.fixture +def test_project(): + return Project( + id="test-proj", + name="test", + design_dir="/workspace/arch-design-agent-skill-dashboard/design", + code_dir=None, + created_at=datetime(2026, 1, 1), + ) + + +def test_scan_produces_result(scan_service, test_project): + result = scan_service.scan(test_project) + assert result.project_id == "test-proj" + assert result.scanned_at is not None + assert len(result.file_statuses) > 0 + assert result.summary.total_files > 0 + + +def test_scan_has_capabilities(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.capabilities) > 0 + assert result.capabilities[0].capability_id.startswith("CAP-") + + +def test_scan_has_modules(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.modules) > 0 + + +def test_scan_has_traceability_links(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.traceability_links) > 0 + # entity_ids should be a list (space-split) + assert isinstance(result.traceability_links[0].entity_ids, list) + + +def test_scan_has_design_documents(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.design_documents) > 0 + + +def test_scan_has_api_contracts(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.api_contracts) > 0 + assert result.api_contracts[0].path.startswith("/") + + +def test_scan_has_value_flows(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.value_flows) > 0 + + +def test_scan_has_integrations(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.integrations) > 0 + + +def test_scan_has_external_systems(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.external_systems) > 0 + + +def test_scan_has_entities(scan_service, test_project): + result = scan_service.scan(test_project) + assert len(result.entities) > 0 + + +def test_scan_summary_counts_match(scan_service, test_project): + result = scan_service.scan(test_project) + s = result.summary + assert s.total_files == len(result.file_statuses) + assert s.total_files == s.ok + s.sparse + s.missing + s.placeholder_heavy + s.template_residue + + +def test_get_latest_scan_none_before_scan(scan_service): + assert scan_service.get_latest_scan("nonexistent") is None + + +def test_get_latest_scan_cached(scan_service, test_project): + scan_service.scan(test_project) + cached = scan_service.get_latest_scan("test-proj") + assert cached is not None + assert cached.project_id == "test-proj" + + +def test_scan_has_singleton_fields(scan_service, test_project): + result = scan_service.scan(test_project) + # These MD files have frontmatter and should produce singleton entities + assert result.system_context is not None + assert result.solution_layer is not None + assert result.module_boundary_rule is not None + assert result.runtime_topology is not None + assert result.operational_baseline is not None + assert result.release_plan is not None