From 386fa714bf6afe31f97d07c09a41dfe5f86eedc2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 26 Feb 2026 08:07:21 +0000
Subject: [PATCH 01/14] Initial plan


From 9b1748e6f270ac7409e7199e7955363e0b00cabe Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 26 Feb 2026 08:08:45 +0000
Subject: [PATCH 02/14] refactor(test): rename config var to logging_cfg in
 pytest_configure to avoid shadowing

Co-authored-by: gkostkowski <12532923+gkostkowski@users.noreply.github.com>
---
 test/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index 7553ee9..8f3de23 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -29,8 +29,8 @@ def pytest_configure(config: pytest.Config):
     # Setup logging from YAML config file
     cfg_path = os.path.join(os.path.dirname(__file__), "resources/logging-test.yml")
     with open(cfg_path) as f:
-        config = yaml.safe_load(f)
-    logging.config.dictConfig(config)
+        logging_cfg = yaml.safe_load(f)
+    logging.config.dictConfig(logging_cfg)
 
 
 # ============================================================================

From cd3e1d1ed7fcb6b59e3506e43819600cd9704cc8 Mon Sep 17 00:00:00 2001
From: Grzegorz Kostkowski <grzegorz.kostkowski@meaningfy.ws>
Date: Thu, 5 Mar 2026 10:26:53 +0100
Subject: [PATCH 03/14] chore: reformat code with Ruff

---
 src/ere/adapters/duckdb_repositories.py       |   8 +-
 src/ere/adapters/rdf_mapper.py                |   4 +-
 src/ere/adapters/rdf_mapper_impl.py           |  15 +-
 src/ere/adapters/repositories.py              |   8 +-
 src/ere/adapters/splink_linker_impl.py        |  78 +++++----
 src/ere/adapters/utils.py                     |  10 +-
 src/ere/entrypoints/app.py                    |   4 +-
 src/ere/entrypoints/queue_worker.py           |   8 +-
 src/ere/models/exceptions.py                  |   4 +-
 src/ere/models/resolver/mention.py            |   6 +-
 src/ere/services/entity_resolution_service.py |  28 +++-
 src/ere/services/factories.py                 |  12 +-
 src/ere/services/resolver_config.py           |   4 +-
 test/conftest.py                              |   1 +
 test/e2e/test_ere.py                          |   4 +-
 .../test_direct_service_resolution_steps.py   | 148 +++++++++++++++---
 .../test_entity_resolution_algorithm_steps.py |  24 ++-
 test/integration/test_entity_resolver.py      |  48 ++++--
 test/integration/test_redis_integration.py    |  20 ++-
 test/stress/stress_test.py                    |   5 +-
 test/unit/adapters/stubs.py                   |   6 +-
 test/unit/adapters/test_duckdb_adapters.py    |  30 ++--
 .../test_entity_resolution_service.py         |  52 +++---
 23 files changed, 378 insertions(+), 149 deletions(-)

diff --git a/src/ere/adapters/duckdb_repositories.py b/src/ere/adapters/duckdb_repositories.py
index 7b58ad9..c65caa8 100644
--- a/src/ere/adapters/duckdb_repositories.py
+++ b/src/ere/adapters/duckdb_repositories.py
@@ -3,7 +3,13 @@
 import duckdb
 import pandas as pd
 
-from ere.models.resolver import ClusterId, ClusterMembership, Mention, MentionId, MentionLink
+from ere.models.resolver import (
+    ClusterId,
+    ClusterMembership,
+    Mention,
+    MentionId,
+    MentionLink,
+)
 from ere.adapters.repositories import (
     ClusterRepository,
     MentionRepository,
diff --git a/src/ere/adapters/rdf_mapper.py b/src/ere/adapters/rdf_mapper.py
index 37bf6bb..1f45fc1 100644
--- a/src/ere/adapters/rdf_mapper.py
+++ b/src/ere/adapters/rdf_mapper.py
@@ -87,9 +87,7 @@ def extract_mention_attributes(
     entity_subject = graph.value(predicate=RDF.type, object=rdf_type)
 
     if entity_subject is None:
-        raise ValueError(
-            f"No entity of type {rdf_type} found in RDF content"
-        )
+        raise ValueError(f"No entity of type {rdf_type} found in RDF content")
 
     # Extract attributes per config
     attributes = {}
diff --git a/src/ere/adapters/rdf_mapper_impl.py b/src/ere/adapters/rdf_mapper_impl.py
index 243f6b1..e18d6a3 100644
--- a/src/ere/adapters/rdf_mapper_impl.py
+++ b/src/ere/adapters/rdf_mapper_impl.py
@@ -42,7 +42,12 @@ def _load_mappings(rdf_mapping_path: str | Path = None) -> dict:
             dict: Entity type mappings from config.
         """
         if rdf_mapping_path is None:
-            rdf_mapping_path = Path(__file__).parent.parent.parent.parent / "infra" / "config" / "rdf_mapping.yaml"
+            rdf_mapping_path = (
+                Path(__file__).parent.parent.parent.parent
+                / "infra"
+                / "config"
+                / "rdf_mapping.yaml"
+            )
         else:
             rdf_mapping_path = Path(rdf_mapping_path)
         return load_entity_mappings(rdf_mapping_path)
@@ -70,9 +75,13 @@ def map_entity_mention_to_domain(self, entity_mention: EntityMention) -> Mention
             )
 
         mention_id = MentionId(
-            value=self._derive_mention_id(eid.source_id, eid.request_id, eid.entity_type)
+            value=self._derive_mention_id(
+                eid.source_id, eid.request_id, eid.entity_type
+            )
+        )
+        attributes = extract_mention_attributes(
+            entity_mention.content, entity_type_config
         )
-        attributes = extract_mention_attributes(entity_mention.content, entity_type_config)
         return Mention(id=mention_id, attributes=attributes)
 
     @staticmethod
diff --git a/src/ere/adapters/repositories.py b/src/ere/adapters/repositories.py
index 6ac6dc2..2a99e6d 100644
--- a/src/ere/adapters/repositories.py
+++ b/src/ere/adapters/repositories.py
@@ -9,7 +9,13 @@
 
 from abc import ABC, abstractmethod
 
-from ere.models.resolver import ClusterId, ClusterMembership, Mention, MentionId, MentionLink
+from ere.models.resolver import (
+    ClusterId,
+    ClusterMembership,
+    Mention,
+    MentionId,
+    MentionLink,
+)
 
 
 class MentionRepository(ABC):
diff --git a/src/ere/adapters/splink_linker_impl.py b/src/ere/adapters/splink_linker_impl.py
index 8172614..283432a 100644
--- a/src/ere/adapters/splink_linker_impl.py
+++ b/src/ere/adapters/splink_linker_impl.py
@@ -45,7 +45,9 @@ def build_tf_df(mentions: list[Mention], entity_fields: list[str]) -> pd.DataFra
         flat_dict = mention.to_flat_dict()
         row = {
             "mention_id": flat_dict["mention_id"],
-            **{f: flat_dict.get(f) or "" for f in entity_fields},  # Convert None to empty string
+            **{
+                f: flat_dict.get(f) or "" for f in entity_fields
+            },  # Convert None to empty string
             "__splink_salt": 0.5,
         }
         rows.append(row)
@@ -246,11 +248,15 @@ def register_mention(self, mention: Mention) -> None:
         )
 
         # Build new row with same schema as _tf_df
-        new_row = pd.DataFrame([{
-            "mention_id": flat_dict["mention_id"],
-            **{f: flat_dict.get(f) for f in self._entity_fields},
-            "__splink_salt": 0.5,
-        }])
+        new_row = pd.DataFrame(
+            [
+                {
+                    "mention_id": flat_dict["mention_id"],
+                    **{f: flat_dict.get(f) for f in self._entity_fields},
+                    "__splink_salt": 0.5,
+                }
+            ]
+        )
 
         # Cast string columns to pd.StringDtype() to prevent type drift on None values
         for col in self._entity_fields:
@@ -324,7 +330,9 @@ def _build_settings(self) -> SettingsCreator:
                     comp["field"],
                     thresholds,
                 )
-                comparisons.append(cl.JaroWinklerAtThresholds(comp["field"], thresholds))
+                comparisons.append(
+                    cl.JaroWinklerAtThresholds(comp["field"], thresholds)
+                )
             elif comp["type"] == "exact_match":
                 log.trace(
                     "_build_settings: Adding ExactMatch comparison on field '%s'",
@@ -406,7 +414,9 @@ def _train_safe(self) -> None:
             log.info("EM training: estimating u-probabilities via random sampling")
             linker_new.training.estimate_u_using_random_sampling(max_pairs=1e6)
 
-            log.info("EM training: estimating m-probabilities and lambda via EM algorithm")
+            log.info(
+                "EM training: estimating m-probabilities and lambda via EM algorithm"
+            )
             linker_new.training.estimate_parameters_using_expectation_maximisation(
                 self._get_em_training_rule(), estimate_without_term_frequencies=True
             )
@@ -455,12 +465,16 @@ def _apply_cold_start_params(self) -> None:
         # Check if cold_start config exists
         cold_start_cfg = self._config.get("splink", {}).get("cold_start", {})
         if not cold_start_cfg:
-            log.info("Linker initializing: No cold_start config found, using Splink defaults")
+            log.info(
+                "Linker initializing: No cold_start config found, using Splink defaults"
+            )
             return
 
         comparisons_cfg = cold_start_cfg.get("comparisons", {})
         if not comparisons_cfg:
-            log.info("Linker initializing: No comparisons config in cold_start, using Splink defaults")
+            log.info(
+                "Linker initializing: No comparisons config in cold_start, using Splink defaults"
+            )
             return
 
         log.info(
@@ -475,11 +489,11 @@ def _apply_cold_start_params(self) -> None:
         for _, comparison in enumerate(self._linker._settings_obj.comparisons):
             # Get the field name from the comparison
             field_name = None
-            if hasattr(comparison, 'output_column_name'):
+            if hasattr(comparison, "output_column_name"):
                 field_name = comparison.output_column_name
-            elif hasattr(comparison, '_field_names') and comparison._field_names:
+            elif hasattr(comparison, "_field_names") and comparison._field_names:
                 field_name = comparison._field_names[0]
-        # pylint: enable=protected-access
+            # pylint: enable=protected-access
 
             if field_name not in comparisons_cfg:
                 continue
@@ -494,8 +508,9 @@ def _apply_cold_start_params(self) -> None:
 
             # Collect non-null levels to properly map cold-start probabilities
             non_null_levels = [
-                (i, level) for i, level in enumerate(comparison.comparison_levels)
-                if not (hasattr(level, 'is_null_level') and level.is_null_level)
+                (i, level)
+                for i, level in enumerate(comparison.comparison_levels)
+                if not (hasattr(level, "is_null_level") and level.is_null_level)
             ]
             log.trace(
                 "_apply_cold_start_params: Field '%s' has %d non-null levels: %s",
@@ -505,8 +520,8 @@ def _apply_cold_start_params(self) -> None:
             )
 
             # Apply m-probabilities to non-null levels in order
-            if 'm_probabilities' in field_cfg:
-                m_probs = field_cfg['m_probabilities']
+            if "m_probabilities" in field_cfg:
+                m_probs = field_cfg["m_probabilities"]
                 for config_idx, m_prob in enumerate(m_probs):
                     if config_idx < len(non_null_levels):
                         actual_level_idx, level = non_null_levels[config_idx]
@@ -528,8 +543,8 @@ def _apply_cold_start_params(self) -> None:
                             )
 
             # Apply u-probabilities to non-null levels in order
-            if 'u_probabilities' in field_cfg:
-                u_probs = field_cfg['u_probabilities']
+            if "u_probabilities" in field_cfg:
+                u_probs = field_cfg["u_probabilities"]
                 for config_idx, u_prob in enumerate(u_probs):
                     if config_idx < len(non_null_levels):
                         actual_level_idx, level = non_null_levels[config_idx]
@@ -566,7 +581,7 @@ def _log_trained_parameters(self, linker: Linker) -> None:
             # Get the Fellegi-Sunter prior (lambda)
             prior = None
             # pylint: disable=protected-access  # Splink exposes no public API for settings introspection
-            if hasattr(linker._settings_obj, 'probability_two_random_records_match'):
+            if hasattr(linker._settings_obj, "probability_two_random_records_match"):
                 prior = linker._settings_obj.probability_two_random_records_match
                 log.info(
                     "EM trained parameter: lambda (P(match)) = %.6f",
@@ -577,11 +592,11 @@ def _log_trained_parameters(self, linker: Linker) -> None:
             for comparison in linker._settings_obj.comparisons:
                 # Get field name
                 field_name = None
-                if hasattr(comparison, 'output_column_name'):
+                if hasattr(comparison, "output_column_name"):
                     field_name = comparison.output_column_name
-                elif hasattr(comparison, '_field_names') and comparison._field_names:
+                elif hasattr(comparison, "_field_names") and comparison._field_names:
                     field_name = comparison._field_names[0]
-            # pylint: enable=protected-access
+                # pylint: enable=protected-access
 
                 if not field_name:
                     continue
@@ -593,8 +608,9 @@ def _log_trained_parameters(self, linker: Linker) -> None:
 
                 # Collect non-null levels
                 non_null_levels = [
-                    (i, level) for i, level in enumerate(comparison.comparison_levels)
-                    if not (hasattr(level, 'is_null_level') and level.is_null_level)
+                    (i, level)
+                    for i, level in enumerate(comparison.comparison_levels)
+                    if not (hasattr(level, "is_null_level") and level.is_null_level)
                 ]
 
                 # Log m and u probabilities for each level
@@ -605,19 +621,25 @@ def _log_trained_parameters(self, linker: Linker) -> None:
                     trained_u = False
 
                     # Extract m-probability
-                    if hasattr(level, 'm_probability') and level.m_probability is not None:
+                    if (
+                        hasattr(level, "m_probability")
+                        and level.m_probability is not None
+                    ):
                         m_prob = level.m_probability
                         # Check if it was trained (non-cold-start values have specific patterns)
                         # Cold-start values are typically set exactly; trained values may vary
                         trained_m = True
 
                     # Extract u-probability
-                    if hasattr(level, 'u_probability') and level.u_probability is not None:
+                    if (
+                        hasattr(level, "u_probability")
+                        and level.u_probability is not None
+                    ):
                         u_prob = level.u_probability
                         trained_u = True
 
                     # Log level details
-                    level_desc = getattr(level, 'label', f"Level {config_idx}")
+                    level_desc = getattr(level, "label", f"Level {config_idx}")
                     m_status = "✓ trained" if trained_m else "✗ cold-start"
                     u_status = "✓ trained" if trained_u else "✗ cold-start"
 
diff --git a/src/ere/adapters/utils.py b/src/ere/adapters/utils.py
index 63ad5f9..c1535ae 100644
--- a/src/ere/adapters/utils.py
+++ b/src/ere/adapters/utils.py
@@ -21,7 +21,10 @@
 )
 
 SUPPORTED_REQUEST_CLASSES = {
-    cls.__name__: cls for cls in [EntityMentionResolutionRequest]  # , FullRebuildRequest]  # TODO: Add when available
+    cls.__name__: cls
+    for cls in [
+        EntityMentionResolutionRequest
+    ]
 }
 """
 Explicit list of supported Request classes, used in utilities like :meth:`get_request_from_message`.
@@ -34,7 +37,10 @@
 
 SUPPORTED_RESPONSE_CLASSES = {
     cls.__name__: cls
-    for cls in [EntityMentionResolutionResponse, EREErrorResponse]  # , FullRebuildResponse]  # TODO: Add when available
+    for cls in [
+        EntityMentionResolutionResponse,
+        EREErrorResponse,
+    ]
 }
 """
 Explicit list of supported Response classes, used in utilities like :meth:`get_response_from_message`.
diff --git a/src/ere/entrypoints/app.py b/src/ere/entrypoints/app.py
index e2bfd35..e4077db 100644
--- a/src/ere/entrypoints/app.py
+++ b/src/ere/entrypoints/app.py
@@ -78,7 +78,9 @@ def main() -> None:
 
     # Config file paths: CLI takes precedence over environment
     rdf_mapping_path = args.rdf_mapping_path or os.environ.get("RDF_MAPPING_PATH")
-    resolver_config_path = args.resolver_config_path or os.environ.get("RESOLVER_CONFIG_PATH")
+    resolver_config_path = args.resolver_config_path or os.environ.get(
+        "RESOLVER_CONFIG_PATH"
+    )
     duckdb_path = os.environ.get("DUCKDB_PATH")
 
     log.info(
diff --git a/src/ere/entrypoints/queue_worker.py b/src/ere/entrypoints/queue_worker.py
index 020f18c..e3d435b 100644
--- a/src/ere/entrypoints/queue_worker.py
+++ b/src/ere/entrypoints/queue_worker.py
@@ -47,7 +47,9 @@ def process_single_message(self) -> bool:
             Exception: Propagates connection errors.
         """
         # Wait for a request
-        queue_message = self.redis_client.brpop(self.request_queue, timeout=self.queue_timeout)
+        queue_message = self.redis_client.brpop(
+            self.request_queue, timeout=self.queue_timeout
+        )
         if not queue_message:
             return False  # Timeout
 
@@ -88,7 +90,9 @@ def _send_response(self, response: EREResponse) -> None:
             log.error("Failed to send response: %s", e)
 
     @staticmethod
-    def _build_error_response(error_detail: str, ere_request_id: str = "unknown") -> EREErrorResponse:
+    def _build_error_response(
+        error_detail: str, ere_request_id: str = "unknown"
+    ) -> EREErrorResponse:
         """Build error response for request processing failures."""
         log.error("Building error response: %s", error_detail)
         return EREErrorResponse(
diff --git a/src/ere/models/exceptions.py b/src/ere/models/exceptions.py
index 889a82c..2d648d4 100644
--- a/src/ere/models/exceptions.py
+++ b/src/ere/models/exceptions.py
@@ -4,7 +4,9 @@
 class ConflictError(Exception):
     """Raised when the same mention_id is submitted with different content."""
 
-    def __init__(self, mention_id: str, existing_attributes: dict, incoming_attributes: dict):
+    def __init__(
+        self, mention_id: str, existing_attributes: dict, incoming_attributes: dict
+    ):
         super().__init__(
             f"Mention '{mention_id}' was already resolved with different content. "
             f"Existing: {existing_attributes!r}, Incoming: {incoming_attributes!r}"
diff --git a/src/ere/models/resolver/mention.py b/src/ere/models/resolver/mention.py
index 71c09ac..4e5cd05 100644
--- a/src/ere/models/resolver/mention.py
+++ b/src/ere/models/resolver/mention.py
@@ -28,7 +28,11 @@ def _from_flat_dict(cls, raw_input: object) -> object:
             {"mention_id": "m1", "legal_name": "Acme", "country_code": "US"}
         and convert to the structured form expected by the model.
         """
-        if isinstance(raw_input, dict) and "mention_id" in raw_input and "id" not in raw_input:
+        if (
+            isinstance(raw_input, dict)
+            and "mention_id" in raw_input
+            and "id" not in raw_input
+        ):
             return {
                 "id": MentionId(value=raw_input["mention_id"]),
                 "attributes": {k: v for k, v in raw_input.items() if k != "mention_id"},
diff --git a/src/ere/services/entity_resolution_service.py b/src/ere/services/entity_resolution_service.py
index dc27376..2bbec9b 100644
--- a/src/ere/services/entity_resolution_service.py
+++ b/src/ere/services/entity_resolution_service.py
@@ -128,7 +128,9 @@ def resolve(self, mention: Mention) -> ResolutionResult:
             cluster_id = ClusterId(value=mention.id.value)
             log.trace("New cluster generated for mention with id=%s", mention.id.value)
 
-        self._cluster_repo.save(ClusterMembership(mention_id=mention.id, cluster_id=cluster_id))
+        self._cluster_repo.save(
+            ClusterMembership(mention_id=mention.id, cluster_id=cluster_id)
+        )
 
         # Log cluster contents after assignment
         all_memberships = self._cluster_repo.get_all_memberships()
@@ -147,7 +149,10 @@ def resolve(self, mention: Mention) -> ResolutionResult:
 
         # Trigger auto-training if threshold is reached (non-blocking background thread).
         count = self._mention_repo.count()
-        if self._config.auto_train_threshold > 0 and count == self._config.auto_train_threshold:
+        if (
+            self._config.auto_train_threshold > 0
+            and count == self._config.auto_train_threshold
+        ):
             log.info(
                 "Auto-training triggered: %d mentions reached (threshold=%d). "
                 "Starting background EM training thread. Scoring continues with current parameters.",
@@ -155,9 +160,7 @@ def resolve(self, mention: Mention) -> ResolutionResult:
                 self._config.auto_train_threshold,
             )
             threading.Thread(
-                target=self._linker.train,
-                daemon=True,
-                name="linker-training"
+                target=self._linker.train, daemon=True, name="linker-training"
             ).start()
 
         # Step 5: Return cluster references (non-empty, always top-N).
@@ -351,7 +354,9 @@ def resolve_to_result(
 
 
 def resolve_entity_mention(
-    entity_mention: EntityMention, resolver: EntityResolver = None, mapper: RDFMapper = None
+    entity_mention: EntityMention,
+    resolver: EntityResolver = None,
+    mapper: RDFMapper = None,
 ) -> ClusterReference:
     """
     Resolve an entity mention to a Cluster (public API - returns top candidate).
@@ -454,7 +459,9 @@ def process_request(self, request: ERERequest) -> EREResponse:
                 entity_mention.identifiedBy.request_id,
             )
 
-            resolution_outcome = resolve_to_result(entity_mention, self._resolver, self._mapper)
+            resolution_outcome = resolve_to_result(
+                entity_mention, self._resolver, self._mapper
+            )
 
             # Log resolution result with candidates
             candidate_info = [
@@ -482,7 +489,12 @@ def process_request(self, request: ERERequest) -> EREResponse:
                 timestamp=now,
             )
         except Exception as exc:  # pylint: disable=broad-exception-caught
-            log.error("Resolution error for mention %s: %s", request.ere_request_id, exc, exc_info=True)
+            log.error(
+                "Resolution error for mention %s: %s",
+                request.ere_request_id,
+                exc,
+                exc_info=True,
+            )
             return EREErrorResponse(
                 ere_request_id=request.ere_request_id,
                 error_type=type(exc).__name__,
diff --git a/src/ere/services/factories.py b/src/ere/services/factories.py
index 6442ae8..0766616 100644
--- a/src/ere/services/factories.py
+++ b/src/ere/services/factories.py
@@ -19,7 +19,10 @@
 from ere.adapters.duckdb_schema import init_schema
 from ere.adapters.rdf_mapper_port import RDFMapper
 from ere.adapters.splink_linker_impl import SpLinkSimilarityLinker
-from ere.services.entity_resolution_service import EntityResolver, EntityResolutionService
+from ere.services.entity_resolution_service import (
+    EntityResolver,
+    EntityResolutionService,
+)
 from ere.services.resolver_config import ResolverConfig
 
 
@@ -47,7 +50,12 @@ def build_entity_resolver(
         Fully-constructed EntityResolver with DuckDB backend and Splink linker.
     """
     if resolver_config_path is None:
-        config_path = Path(__file__).parent.parent.parent.parent / "infra" / "config" / "resolver.yaml"
+        config_path = (
+            Path(__file__).parent.parent.parent.parent
+            / "infra"
+            / "config"
+            / "resolver.yaml"
+        )
     else:
         config_path = Path(resolver_config_path)
 
diff --git a/src/ere/services/resolver_config.py b/src/ere/services/resolver_config.py
index e3c839f..50b49bb 100644
--- a/src/ere/services/resolver_config.py
+++ b/src/ere/services/resolver_config.py
@@ -7,7 +7,9 @@ class DuckDBConfig(BaseModel):
     """DuckDB database configuration."""
 
     type: str = "in-memory"  # "in-memory" or "persistent"
-    path: str = ":memory:"  # Database path: ":memory:" for in-memory, file path for persistent
+    path: str = (
+        ":memory:"  # Database path: ":memory:" for in-memory, file path for persistent
+    )
 
 
 class ResolverConfig(BaseModel):
diff --git a/test/conftest.py b/test/conftest.py
index 4cdc4d2..1d93d39 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -215,6 +215,7 @@ def rdf_mapper(rdf_mapping_path):  # pylint: disable=redefined-outer-name  # pyt
 # Redis fixture
 # ============================================================================
 
+
 @pytest.fixture(scope="module")
 def redis_client():
     """
diff --git a/test/e2e/test_ere.py b/test/e2e/test_ere.py
index e5bb5ee..c8d05e5 100644
--- a/test/e2e/test_ere.py
+++ b/test/e2e/test_ere.py
@@ -141,7 +141,9 @@ def test_single_request_resolution_flow(redis_client, redis_queues, queue_worker
     redis_client.rpush(request_queue, request_bytes)
 
     # 2. Process message using worker
-    assert queue_worker.process_single_message() is True, "Worker should process message"
+    assert queue_worker.process_single_message() is True, (
+        "Worker should process message"
+    )
 
     # 3. Verify response in queue
     result = redis_client.brpop(response_queue, timeout=1)
diff --git a/test/features/steps/test_direct_service_resolution_steps.py b/test/features/steps/test_direct_service_resolution_steps.py
index 678efea..b39d6b8 100644
--- a/test/features/steps/test_direct_service_resolution_steps.py
+++ b/test/features/steps/test_direct_service_resolution_steps.py
@@ -2,6 +2,7 @@
 
 Tests resolve_entity_mention(EntityMention) -> ClusterReference directly.
 """
+
 import pytest
 from assertpy import assert_that
 from erspec.models.core import ClusterReference, EntityMention, EntityMentionIdentifier
@@ -41,6 +42,7 @@ def outcome():
     # store either "result" or "exception"
     return {"result": None, "exception": None}
 
+
 # ---------------------------------------------------------------------------
 # Background
 # ---------------------------------------------------------------------------
@@ -58,11 +60,23 @@ def fresh_service(entity_resolution_service):
 
 
 @given(
-    parsers.parse('entity mention "{mention_id}" of type "{entity_type}" was already resolved with content from "{rdf_file_first}"'),
+    parsers.parse(
+        'entity mention "{mention_id}" of type "{entity_type}" was already resolved with content from "{rdf_file_first}"'
+    ),
     target_fixture="seed_result",
 )
-def pre_resolve(mention_id: str, entity_type: str, rdf_file_first: str, entity_resolution_service, rdf_mapper) -> ClusterReference:
-    return resolve_entity_mention(_make_mention(mention_id, entity_type, load_rdf(rdf_file_first)), entity_resolution_service, rdf_mapper)
+def pre_resolve(
+    mention_id: str,
+    entity_type: str,
+    rdf_file_first: str,
+    entity_resolution_service,
+    rdf_mapper,
+) -> ClusterReference:
+    return resolve_entity_mention(
+        _make_mention(mention_id, entity_type, load_rdf(rdf_file_first)),
+        entity_resolution_service,
+        rdf_mapper,
+    )
 
 
 # ---------------------------------------------------------------------------
@@ -71,19 +85,43 @@ def pre_resolve(mention_id: str, entity_type: str, rdf_file_first: str, entity_r
 
 
 @when(
-    parsers.parse('I resolve the first entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'),
+    parsers.parse(
+        'I resolve the first entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'
+    ),
     target_fixture="first_result",
 )
-def resolve_first(mention_id: str, entity_type: str, rdf_file: str, entity_resolution_service, rdf_mapper) -> ClusterReference:
-    return resolve_entity_mention(_make_mention(mention_id, entity_type, load_rdf(rdf_file)), entity_resolution_service, rdf_mapper)
+def resolve_first(
+    mention_id: str,
+    entity_type: str,
+    rdf_file: str,
+    entity_resolution_service,
+    rdf_mapper,
+) -> ClusterReference:
+    return resolve_entity_mention(
+        _make_mention(mention_id, entity_type, load_rdf(rdf_file)),
+        entity_resolution_service,
+        rdf_mapper,
+    )
 
 
 @when(
-    parsers.parse('I resolve the second entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'),
+    parsers.parse(
+        'I resolve the second entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'
+    ),
     target_fixture="second_result",
 )
-def resolve_second(mention_id: str, entity_type: str, rdf_file: str, entity_resolution_service, rdf_mapper) -> ClusterReference:
-    return resolve_entity_mention(_make_mention(mention_id, entity_type, load_rdf(rdf_file)), entity_resolution_service, rdf_mapper)
+def resolve_second(
+    mention_id: str,
+    entity_type: str,
+    rdf_file: str,
+    entity_resolution_service,
+    rdf_mapper,
+) -> ClusterReference:
+    return resolve_entity_mention(
+        _make_mention(mention_id, entity_type, load_rdf(rdf_file)),
+        entity_resolution_service,
+        rdf_mapper,
+    )
 
 
 # ---------------------------------------------------------------------------
@@ -92,20 +130,40 @@ def resolve_second(mention_id: str, entity_type: str, rdf_file: str, entity_reso
 
 
 @when(
-    parsers.parse('I resolve entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'),
+    parsers.parse(
+        'I resolve entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'
+    ),
     target_fixture="first_result",
 )
-def resolve_mention(mention_id: str, entity_type: str, rdf_file: str, entity_resolution_service, rdf_mapper) -> ClusterReference:
+def resolve_mention(
+    mention_id: str,
+    entity_type: str,
+    rdf_file: str,
+    entity_resolution_service,
+    rdf_mapper,
+) -> ClusterReference:
     mention = _make_mention(mention_id, entity_type, load_rdf(rdf_file))
     return resolve_entity_mention(mention, entity_resolution_service, rdf_mapper)
 
 
 @when(
-    parsers.parse('I resolve entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}" again'),
+    parsers.parse(
+        'I resolve entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}" again'
+    ),
     target_fixture="second_result",
 )
-def resolve_mention_again(mention_id: str, entity_type: str, rdf_file: str, entity_resolution_service, rdf_mapper) -> ClusterReference:
-    return resolve_entity_mention(_make_mention(mention_id, entity_type, load_rdf(rdf_file)), entity_resolution_service, rdf_mapper)
+def resolve_mention_again(
+    mention_id: str,
+    entity_type: str,
+    rdf_file: str,
+    entity_resolution_service,
+    rdf_mapper,
+) -> ClusterReference:
+    return resolve_entity_mention(
+        _make_mention(mention_id, entity_type, load_rdf(rdf_file)),
+        entity_resolution_service,
+        rdf_mapper,
+    )
 
 
 # ---------------------------------------------------------------------------
@@ -114,12 +172,25 @@ def resolve_mention_again(mention_id: str, entity_type: str, rdf_file: str, enti
 
 
 @when(
-    parsers.parse('I try to resolve entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'),
+    parsers.parse(
+        'I try to resolve entity mention "{mention_id}" of type "{entity_type}" with content from "{rdf_file}"'
+    ),
     target_fixture="raised_exception",
 )
-def try_resolve_conflict(mention_id: str, entity_type: str, rdf_file: str, outcome, entity_resolution_service, rdf_mapper) -> Exception | None:
+def try_resolve_conflict(
+    mention_id: str,
+    entity_type: str,
+    rdf_file: str,
+    outcome,
+    entity_resolution_service,
+    rdf_mapper,
+) -> Exception | None:
     try:
-        outcome["result"] = resolve_entity_mention(_make_mention(mention_id, entity_type, load_rdf(rdf_file)), entity_resolution_service, rdf_mapper)
+        outcome["result"] = resolve_entity_mention(
+            _make_mention(mention_id, entity_type, load_rdf(rdf_file)),
+            entity_resolution_service,
+            rdf_mapper,
+        )
         return None
     except Exception as exc:
         outcome["exception"] = exc
@@ -128,12 +199,25 @@ def try_resolve_conflict(mention_id: str, entity_type: str, rdf_file: str, outco
 
 @when(
     # parsers.re required: parsers.parse cannot match an empty string for {bad_content}
-    parsers.re(r'I try to resolve entity mention "(?P<mention_id>[^"]+)" of type "(?P<entity_type>[^"]+)" with invalid content "(?P<bad_content>.*)"'),
+    parsers.re(
+        r'I try to resolve entity mention "(?P<mention_id>[^"]+)" of type "(?P<entity_type>[^"]+)" with invalid content "(?P<bad_content>.*)"'
+    ),
     target_fixture="raised_exception",
 )
-def try_resolve_malformed(mention_id: str, entity_type: str, bad_content: str, outcome, entity_resolution_service, rdf_mapper) -> Exception | None:
+def try_resolve_malformed(
+    mention_id: str,
+    entity_type: str,
+    bad_content: str,
+    outcome,
+    entity_resolution_service,
+    rdf_mapper,
+) -> Exception | None:
     try:
-        outcome["result"] = resolve_entity_mention(_make_mention(mention_id, entity_type, bad_content), entity_resolution_service, rdf_mapper)
+        outcome["result"] = resolve_entity_mention(
+            _make_mention(mention_id, entity_type, bad_content),
+            entity_resolution_service,
+            rdf_mapper,
+        )
         return None
     except Exception as exc:
         outcome["exception"] = exc
@@ -146,7 +230,9 @@ def try_resolve_malformed(mention_id: str, entity_type: str, bad_content: str, o
 
 
 @then("both results are ClusterReference instances")
-def check_cluster_reference_type(first_result: ClusterReference, second_result: ClusterReference):
+def check_cluster_reference_type(
+    first_result: ClusterReference, second_result: ClusterReference
+):
     assert_that(first_result).is_instance_of(ClusterReference)
     assert_that(second_result).is_instance_of(ClusterReference)
 
@@ -157,12 +243,16 @@ def check_same_cluster(first_result: ClusterReference, second_result: ClusterRef
 
 
 @then("the cluster_ids are different")
-def check_different_clusters(first_result: ClusterReference, second_result: ClusterReference):
+def check_different_clusters(
+    first_result: ClusterReference, second_result: ClusterReference
+):
     assert_that(first_result.cluster_id).is_not_equal_to(second_result.cluster_id)
 
 
 @then("both ClusterReference results are identical")
-def check_identical_results(first_result: ClusterReference, second_result: ClusterReference):
+def check_identical_results(
+    first_result: ClusterReference, second_result: ClusterReference
+):
     assert_that(first_result).is_equal_to(second_result)
     assert_that(first_result).is_equal_to(second_result)
 
@@ -183,7 +273,9 @@ def check_exception_raised(outcome):
         )
     elif isinstance(raised_exception, ConflictError):
         # Conflict errors should contain mention_id and indicate content mismatch
-        assert_that(str(raised_exception)).contains("was already resolved with different content")
+        assert_that(str(raised_exception)).contains(
+            "was already resolved with different content"
+        )
 
 
 @then("the result is a ClusterReference")
@@ -193,7 +285,9 @@ def check_single_result_type(first_result: ClusterReference):
 
 
 @then("the cluster_id matches the seed cluster")
-def check_matches_seed_cluster(first_result: ClusterReference, seed_result: ClusterReference):
+def check_matches_seed_cluster(
+    first_result: ClusterReference, seed_result: ClusterReference
+):
     """Verify new mention joined the pre-established cluster (not a new one)."""
     assert_that(first_result.cluster_id).is_equal_to(seed_result.cluster_id)
 
@@ -207,4 +301,6 @@ def check_unsupported_entity_type_exception(outcome):
         f"Result was: {outcome['result']!r}"
     )
     assert_that(raised_exception).is_instance_of(ValueError)
-    assert_that(str(raised_exception)).matches(r"No rdf_mapping configured for entity_type")
+    assert_that(str(raised_exception)).matches(
+        r"No rdf_mapping configured for entity_type"
+    )
diff --git a/test/features/steps/test_entity_resolution_algorithm_steps.py b/test/features/steps/test_entity_resolution_algorithm_steps.py
index 89d0984..c41e42c 100644
--- a/test/features/steps/test_entity_resolution_algorithm_steps.py
+++ b/test/features/steps/test_entity_resolution_algorithm_steps.py
@@ -81,7 +81,7 @@ def resolve_mention(mention_id: str, algorithm_context):
     # Create mention
     mention = Mention(
         id=MentionId(value=mention_id),
-        attributes={"legal_name": f"Company {mention_id}", "country_code": "US"}
+        attributes={"legal_name": f"Company {mention_id}", "country_code": "US"},
     )
 
     # Update linker with new similarities
@@ -102,7 +102,9 @@ def resolve_mention(mention_id: str, algorithm_context):
     algorithm_context["last_result"] = result
 
 
-@when(parsers.parse('I set similarity between "{left_id}" and "{right_id}" to {score:f}'))
+@when(
+    parsers.parse('I set similarity between "{left_id}" and "{right_id}" to {score:f}')
+)
 def set_similarity(left_id: str, right_id: str, score: float, algorithm_context):
     """Set similarity between two mentions."""
     pair_set = frozenset([left_id, right_id])
@@ -114,8 +116,14 @@ def set_similarity(left_id: str, right_id: str, score: float, algorithm_context)
 # ===============================================================================
 
 
-@then(parsers.parse('mention "{mention_id}" is in cluster "{cluster_id}" with score {score:f}'))
-def check_mention_cluster(mention_id: str, cluster_id: str, score: float, algorithm_context):
+@then(
+    parsers.parse(
+        'mention "{mention_id}" is in cluster "{cluster_id}" with score {score:f}'
+    )
+)
+def check_mention_cluster(
+    mention_id: str, cluster_id: str, score: float, algorithm_context
+):
     """Verify that a mention is assigned to a cluster with the expected score."""
     result = algorithm_context["last_result"]
     assert_that(result.top.cluster_id.value).is_equal_to(cluster_id)
@@ -129,7 +137,9 @@ def check_candidate_count(count: int, algorithm_context):
     assert_that(len(result.candidates)).is_equal_to(count)
 
 
-@then(parsers.parse('candidate {index:d} is cluster "{cluster_id}" with score {score:f}'))
+@then(
+    parsers.parse('candidate {index:d} is cluster "{cluster_id}" with score {score:f}')
+)
 def check_candidate(index: int, cluster_id: str, score: float, algorithm_context):
     """Verify a specific candidate cluster and its score."""
     result = algorithm_context["last_result"]
@@ -139,7 +149,9 @@ def check_candidate(index: int, cluster_id: str, score: float, algorithm_context
     assert_that(candidate.score).is_close_to(score, 0.01)
 
 
-@then(parsers.parse('the cluster assignment for mention "{mention_id}" is "{cluster_id}"'))
+@then(
+    parsers.parse('the cluster assignment for mention "{mention_id}" is "{cluster_id}"')
+)
 def check_cluster_assignment(mention_id: str, cluster_id: str, algorithm_context):
     """Verify the cluster assignment from state."""
     service = algorithm_context["service"]
diff --git a/test/integration/test_entity_resolver.py b/test/integration/test_entity_resolver.py
index 5470190..abf5e7e 100644
--- a/test/integration/test_entity_resolver.py
+++ b/test/integration/test_entity_resolver.py
@@ -122,7 +122,9 @@ def test_first_mention_resolves_to_singleton(service, con):
     # Verify persistence
     mention_count = con.execute("SELECT COUNT(*) FROM mentions").fetchone()[0]
     assert mention_count == 1
-    cluster_count = con.execute("SELECT COUNT(DISTINCT cluster_id) FROM clusters").fetchone()[0]
+    cluster_count = con.execute(
+        "SELECT COUNT(DISTINCT cluster_id) FROM clusters"
+    ).fetchone()[0]
     assert cluster_count == 1
 
 
@@ -169,7 +171,9 @@ def test_below_threshold_creates_new_cluster(service, con):
     assert mention_count == 2
 
     # Verify cluster assignments persist
-    cluster_count = con.execute("SELECT COUNT(DISTINCT cluster_id) FROM clusters").fetchone()[0]
+    cluster_count = con.execute(
+        "SELECT COUNT(DISTINCT cluster_id) FROM clusters"
+    ).fetchone()[0]
     assert cluster_count >= 1
 
 
@@ -243,7 +247,9 @@ def test_train_succeeds_with_sufficient_records(service, con):
     service.train()
 
     # Verify linker is still functional
-    query = Mention(mention_id="test_q", legal_name="Acme Technologies", country_code="US")
+    query = Mention(
+        mention_id="test_q", legal_name="Acme Technologies", country_code="US"
+    )
     result = service.resolve(query)
 
     assert result.top is not None
@@ -436,11 +442,15 @@ def test_multiple_resolves_accumulate_state(service, con):
         state = service.state()
 
         # Verify state accumulates
-        assert state.mention_count == i, f"After resolving {i} mentions, should have {i} in DB"
+        assert state.mention_count == i, (
+            f"After resolving {i} mentions, should have {i} in DB"
+        )
 
         # Later mentions should see earlier mentions in results
         if i > 1:
-            assert len(result.candidates) >= 1, "Should see candidates from earlier mentions"
+            assert len(result.candidates) >= 1, (
+                "Should see candidates from earlier mentions"
+            )
 
 
 @pytest.mark.integration
@@ -452,14 +462,22 @@ def test_end_to_end_realistic_scenario(service, con):
     # Stream of mentions: 3 companies with variants
     mentions = [
         # Company A
-        Mention(mention_id="acme_1", legal_name="Acme Corporation Ltd", country_code="US"),
+        Mention(
+            mention_id="acme_1", legal_name="Acme Corporation Ltd", country_code="US"
+        ),
         Mention(mention_id="acme_2", legal_name="Acme Corp", country_code="US"),
         Mention(mention_id="acme_3", legal_name="Acme", country_code="US"),
         # Company B
-        Mention(mention_id="bestco_1", legal_name="BestCo Industries Inc", country_code="US"),
+        Mention(
+            mention_id="bestco_1", legal_name="BestCo Industries Inc", country_code="US"
+        ),
         Mention(mention_id="bestco_2", legal_name="BestCo Inc", country_code="US"),
         # Company C
-        Mention(mention_id="techsoft_1", legal_name="TechSoft Solutions Limited", country_code="US"),
+        Mention(
+            mention_id="techsoft_1",
+            legal_name="TechSoft Solutions Limited",
+            country_code="US",
+        ),
         Mention(mention_id="techsoft_2", legal_name="TechSoft Ltd", country_code="US"),
         Mention(mention_id="techsoft_3", legal_name="TechSoft", country_code="US"),
     ]
@@ -481,9 +499,14 @@ def test_end_to_end_realistic_scenario(service, con):
 
     # Verify all mentions are assigned
     assert set(mention_to_cluster.keys()) == {
-        "acme_1", "acme_2", "acme_3",
-        "bestco_1", "bestco_2",
-        "techsoft_1", "techsoft_2", "techsoft_3"
+        "acme_1",
+        "acme_2",
+        "acme_3",
+        "bestco_1",
+        "bestco_2",
+        "techsoft_1",
+        "techsoft_2",
+        "techsoft_3",
     }, "All mentions should be assigned to clusters"
 
     # Verify different companies are in different clusters
@@ -492,5 +515,6 @@ def test_end_to_end_realistic_scenario(service, con):
     bestco_cluster = mention_to_cluster["bestco_1"]
     techsoft_cluster = mention_to_cluster["techsoft_1"]
 
-    assert len({acme_cluster, bestco_cluster, techsoft_cluster}) == 3, \
+    assert len({acme_cluster, bestco_cluster, techsoft_cluster}) == 3, (
         "Different companies should be in different clusters"
+    )
diff --git a/test/integration/test_redis_integration.py b/test/integration/test_redis_integration.py
index 2b22234..2b0fac4 100644
--- a/test/integration/test_redis_integration.py
+++ b/test/integration/test_redis_integration.py
@@ -15,7 +15,9 @@
 import pytest
 
 
-def create_test_request(request_id: str = "test-001", content: str = "John Smith") -> dict:
+def create_test_request(
+    request_id: str = "test-001", content: str = "John Smith"
+) -> dict:
     """Create a valid EntityMentionResolutionRequest for testing."""
     return {
         "type": "EntityMentionResolutionRequest",
@@ -80,14 +82,20 @@ def test_receive_response(self, redis_client):
         if new_response_count == 0:
             pytest.skip("ERE service not running — skipping response test")
 
-        assert new_response_count == 1, f"Expected 1 new response, got {new_response_count}"
+        assert new_response_count == 1, (
+            f"Expected 1 new response, got {new_response_count}"
+        )
 
         # Retrieve and verify response format (latest response is at index 0)
         response_raw = redis_client.lindex("ere_responses", 0)
         assert response_raw is not None, "Response is empty"
 
         # response_raw is bytes, decode it
-        response_str = response_raw.decode("utf-8") if isinstance(response_raw, bytes) else response_raw
+        response_str = (
+            response_raw.decode("utf-8")
+            if isinstance(response_raw, bytes)
+            else response_raw
+        )
         response = json.loads(response_str)
 
         # Verify response structure
@@ -115,7 +123,9 @@ def test_multiple_requests(self, redis_client):
         if new_response_count == 0:
             pytest.skip("ERE service not running — skipping response verification")
 
-        assert new_response_count == 3, f"Expected 3 new responses, got {new_response_count}"
+        assert new_response_count == 3, (
+            f"Expected 3 new responses, got {new_response_count}"
+        )
 
     def test_redis_authentication(self, redis_client):
         """Test: Verify Redis connection works with authentication."""
@@ -140,4 +150,4 @@ def test_malformed_request_handling(self, redis_client):
 
 if __name__ == "__main__":
     """Allow running tests directly: python test/integration/test_redis_integration.py"""
-    pytest.main([__file__, "-v"])
\ No newline at end of file
+    pytest.main([__file__, "-v"])
diff --git a/test/stress/stress_test.py b/test/stress/stress_test.py
index 16e3db5..80dfb55 100644
--- a/test/stress/stress_test.py
+++ b/test/stress/stress_test.py
@@ -141,7 +141,10 @@ def create_resolver(
 
 
 def seed_and_train(
-    resolver: EntityResolver, mentions: list[Mention], n_seed: int, skip_train: bool = False
+    resolver: EntityResolver,
+    mentions: list[Mention],
+    n_seed: int,
+    skip_train: bool = False,
 ):
     """
     Seed resolver with first n_seed mentions and optionally trigger training.
diff --git a/test/unit/adapters/stubs.py b/test/unit/adapters/stubs.py
index 5529b81..2b7741b 100644
--- a/test/unit/adapters/stubs.py
+++ b/test/unit/adapters/stubs.py
@@ -15,12 +15,14 @@
 def _get_repository_types():
     """Lazy import to avoid circular dependency with services.__init__."""
     from ere.adapters import repositories
+
     return repositories
 
 
 def _get_linker_type():
     """Lazy import to avoid circular dependency."""
     from ere.services import linker
+
     return linker
 
 
@@ -95,9 +97,7 @@ def count(self) -> int:
     def find_for(self, mention_id: MentionId) -> list[MentionLink]:
         """Find all links involving the given mention (either side)."""
         return [
-            link
-            for link in self._links
-            if mention_id in (link.left_id, link.right_id)
+            link for link in self._links if mention_id in (link.left_id, link.right_id)
         ]
 
 
diff --git a/test/unit/adapters/test_duckdb_adapters.py b/test/unit/adapters/test_duckdb_adapters.py
index 8087bc9..03f5b79 100644
--- a/test/unit/adapters/test_duckdb_adapters.py
+++ b/test/unit/adapters/test_duckdb_adapters.py
@@ -80,7 +80,7 @@ def test_resolve_first_mention_persists_to_db(service, con):
     """
     mention = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
 
     result = service.resolve(mention)
@@ -89,7 +89,9 @@ def test_resolve_first_mention_persists_to_db(service, con):
     mention_count = con.execute("SELECT COUNT(*) FROM mentions").fetchone()[0]
     assert mention_count == 1
 
-    cluster_count = con.execute("SELECT COUNT(DISTINCT cluster_id) FROM clusters").fetchone()[0]
+    cluster_count = con.execute(
+        "SELECT COUNT(DISTINCT cluster_id) FROM clusters"
+    ).fetchone()[0]
     assert cluster_count == 1
 
     # Check state
@@ -109,11 +111,11 @@ def test_resolve_strong_match_joins_cluster_in_db(service, con):
     """
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
 
     # Set up linker to return high score
@@ -144,11 +146,11 @@ def test_resolve_weak_match_creates_separate_cluster(service, con):
     """
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Similar but different", "country_code": "US"}
+        attributes={"legal_name": "Similar but different", "country_code": "US"},
     )
 
     # Linker returns score below clustering threshold (0.8)
@@ -179,11 +181,11 @@ def test_resolve_no_match_creates_singleton_cluster(service, con):
     """
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Completely Different", "country_code": "UK"}
+        attributes={"legal_name": "Completely Different", "country_code": "UK"},
     )
 
     # No similarity map entry = no match
@@ -202,12 +204,10 @@ def test_resolve_no_match_creates_singleton_cluster(service, con):
 def test_state_returns_correct_counts(service, con):
     """Verify that service.state() returns accurate counts."""
     m1 = Mention(
-        id=MentionId(value="m1"),
-        attributes={"legal_name": "A", "country_code": "US"}
+        id=MentionId(value="m1"), attributes={"legal_name": "A", "country_code": "US"}
     )
     m2 = Mention(
-        id=MentionId(value="m2"),
-        attributes={"legal_name": "B", "country_code": "US"}
+        id=MentionId(value="m2"), attributes={"legal_name": "B", "country_code": "US"}
     )
 
     service._linker._similarity_map = {frozenset(["m1", "m2"]): 0.9}
@@ -224,12 +224,10 @@ def test_state_returns_correct_counts(service, con):
 def test_cluster_membership_mapping(service, con):
     """Verify cluster_membership dict is correctly structured."""
     m1 = Mention(
-        id=MentionId(value="m1"),
-        attributes={"legal_name": "A", "country_code": "US"}
+        id=MentionId(value="m1"), attributes={"legal_name": "A", "country_code": "US"}
     )
     m2 = Mention(
-        id=MentionId(value="m2"),
-        attributes={"legal_name": "B", "country_code": "US"}
+        id=MentionId(value="m2"), attributes={"legal_name": "B", "country_code": "US"}
     )
 
     service._linker._similarity_map = {frozenset(["m1", "m2"]): 0.9}
diff --git a/test/unit/services/test_entity_resolution_service.py b/test/unit/services/test_entity_resolution_service.py
index dfca7d2..0948f06 100644
--- a/test/unit/services/test_entity_resolution_service.py
+++ b/test/unit/services/test_entity_resolution_service.py
@@ -57,7 +57,7 @@ def test_first_mention_is_singleton(service):
     """Resolving the first mention should create a singleton cluster."""
     mention = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
 
     result = service.resolve(mention)
@@ -79,7 +79,7 @@ def test_strong_match_joins_cluster(service):
     # Resolve m1 first
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     result1 = service.resolve(m1)
     assert result1.top.cluster_id.value == "m1"
@@ -87,7 +87,7 @@ def test_strong_match_joins_cluster(service):
     # Now resolve m2 with strong match to m1
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
 
     # Set up the linker to return a strong match (m1, m2, 0.95)
@@ -116,14 +116,14 @@ def test_below_threshold_becomes_singleton(service):
     # Resolve m1 first
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     service.resolve(m1)
 
     # Resolve m2 with weak match to m1
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "ACME Inc", "country_code": "US"}
+        attributes={"legal_name": "ACME Inc", "country_code": "US"},
     )
 
     # Set up weak match (0.7 < threshold 0.8)
@@ -136,7 +136,9 @@ def test_below_threshold_becomes_singleton(service):
 
     # m2 should be assigned to its own cluster (cluster "m2"),
     # but genCand still includes m1's cluster (via the below-threshold link)
-    assert result2.top.cluster_id.value == "m1"  # Still top by score, but own cluster also present
+    assert (
+        result2.top.cluster_id.value == "m1"
+    )  # Still top by score, but own cluster also present
     assert result2.top.score == pytest.approx(0.7, abs=0.01)
 
     # Verify the new invariant: own cluster is always included
@@ -165,11 +167,11 @@ def test_gen_cand_includes_below_threshold_links(service):
     # Resolve m1 and m3 in cluster 1, m3 in cluster 3
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m3 = Mention(
         id=MentionId(value="m3"),
-        attributes={"legal_name": "Globex", "country_code": "US"}
+        attributes={"legal_name": "Globex", "country_code": "US"},
     )
     service.resolve(m1)
     service.resolve(m3)  # m3 forms its own cluster
@@ -179,7 +181,7 @@ def test_gen_cand_includes_below_threshold_links(service):
     # - weak link (0.7) to m3 (cluster "m3") -> below threshold
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
 
     service._linker = FixedSimilarityLinker(
@@ -210,11 +212,11 @@ def test_gen_cand_groups_by_cluster(service):
     # Cluster 1: m1, m2
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
     service.resolve(m1)
     service._linker = FixedSimilarityLinker({frozenset(["m1", "m2"]): 0.95})
@@ -224,7 +226,7 @@ def test_gen_cand_groups_by_cluster(service):
     # m3 has weak links to both m1 (0.75) and m2 (0.85) in the same cluster
     m3 = Mention(
         id=MentionId(value="m3"),
-        attributes={"legal_name": "Acme Industries", "country_code": "US"}
+        attributes={"legal_name": "Acme Industries", "country_code": "US"},
     )
 
     service._linker = FixedSimilarityLinker(
@@ -258,7 +260,7 @@ def test_train_can_be_called_anytime(service):
         attributes={
             "legal_name": "Company 1",
             "country_code": "US",
-        }
+        },
     )
     service.resolve(mention)
 
@@ -313,7 +315,7 @@ def counting_train():
             attributes={
                 "legal_name": f"Company {i}",
                 "country_code": "US",
-            }
+            },
         )
         service.resolve(mention)
         service._linker.register_mention(mention)
@@ -326,11 +328,11 @@ def test_state_reflects_mentions(service):
     """State should reflect all resolved mentions."""
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
 
     service.resolve(m1)
@@ -348,7 +350,7 @@ def test_state_reflects_clusters(service):
     """State should reflect cluster membership."""
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     service.resolve(m1)
 
@@ -362,11 +364,11 @@ def test_state_reflects_similarities(service):
     """State should reflect all stored similarities."""
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Acme Corp", "country_code": "US"}
+        attributes={"legal_name": "Acme Corp", "country_code": "US"},
     )
 
     service.resolve(m1)
@@ -392,7 +394,7 @@ def test_resolution_result_never_empty(service):
     """Every resolve() call should return non-empty ResolutionResult."""
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     result = service.resolve(m1)
 
@@ -437,7 +439,7 @@ def test_resolution_result_always_top_n_pruned(service):
     for i in range(2, 7):
         mention = Mention(
             id=MentionId(value=f"m{i}"),
-            attributes={"legal_name": f"Company {i}", "country_code": "US"}
+            attributes={"legal_name": f"Company {i}", "country_code": "US"},
         )
         service.resolve(mention)
 
@@ -447,7 +449,7 @@ def test_resolution_result_always_top_n_pruned(service):
 
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Company 1", "country_code": "US"}
+        attributes={"legal_name": "Company 1", "country_code": "US"},
     )
     result = service.resolve(m1)
 
@@ -459,15 +461,15 @@ def test_multiple_independent_clusters(service):
     """Mentions with no links should form independent clusters."""
     m1 = Mention(
         id=MentionId(value="m1"),
-        attributes={"legal_name": "Acme", "country_code": "US"}
+        attributes={"legal_name": "Acme", "country_code": "US"},
     )
     m2 = Mention(
         id=MentionId(value="m2"),
-        attributes={"legal_name": "Globex", "country_code": "US"}
+        attributes={"legal_name": "Globex", "country_code": "US"},
     )
     m3 = Mention(
         id=MentionId(value="m3"),
-        attributes={"legal_name": "Initech", "country_code": "US"}
+        attributes={"legal_name": "Initech", "country_code": "US"},
     )
 
     # No links between any of them

From e2f5af37cd68406aeffdd9aaeb5b442435f94cb2 Mon Sep 17 00:00:00 2001
From: Twicechild <twicechild@grannysparlor.gr>
Date: Thu, 26 Mar 2026 14:52:43 +0200
Subject: [PATCH 04/14] refactor(docker): multi-stage build, .dockerignore,
 non-root user

---
 .dockerignore    | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
 .gitattributes   |  4 ++++
 infra/Dockerfile | 59 ++++++++++++++++++++++++++++++++----------------
 3 files changed, 102 insertions(+), 20 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 .gitattributes

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..33f05e6
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,59 @@
+# Version control
+.git
+.gitignore
+.gitattributes
+
+# Python
+__pycache__
+*.pyc
+*.pyo
+.venv
+.mypy_cache
+.pytest_cache
+.ruff_cache
+*.egg-info
+
+# IDE
+.vscode
+.idea
+
+# Environment
+.env
+.env.*
+!.env.example
+
+# Docker (no need to send these into the build context)
+infra/Dockerfile
+infra/compose.dev.yaml
+infra/README.md
+infra/.env*
+
+# AI / tooling config
+.claude
+CLAUDE.md
+
+# CI
+.github
+
+# Docs
+docs
+
+# Build artifacts and data
+dist
+build
+data
+reports
+coverage.xml
+htmlcov
+.coverage
+.tox
+
+# Tests and demo (not needed at runtime)
+test
+demo
+
+# Project config (not needed at runtime)
+sonar-project.properties
+.pylintrc
+.importlinter
+tox.ini
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..bfec021
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+# Enforce Unix line endings
+*.sh text eol=lf
+Dockerfile text eol=lf
+*.yaml text eol=lf
diff --git a/infra/Dockerfile b/infra/Dockerfile
index 5eb0407..169e41b 100644
--- a/infra/Dockerfile
+++ b/infra/Dockerfile
@@ -1,38 +1,57 @@
-# ── ERE application image ──────────────────────────────────────────────────
-# Builds the Entity Resolution Engine service for local development.
-# Requires only Docker — no local Python, Redis, or DuckDB installation.
-#
+# Multi-stage build for the Entity Resolution Engine.
 # Build context: repository root (one level above /infra)
-# Usage:  docker compose -f infra/docker-compose.yml up --build
-# ───────────────────────────────────────────────────────────────────────────
 
-FROM python:3.12-slim
+# =============================================================================
+# Builder stage: install dependencies
+# =============================================================================
+FROM python:3.12-slim AS builder
+
+ARG POETRY_VERSION=">=2.0.0,<3.0.0"
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    POETRY_VIRTUALENVS_IN_PROJECT=true \
+    POETRY_NO_INTERACTION=1
 
 # git is required to fetch the ers-spec dependency from GitHub
 RUN apt-get update \
     && apt-get install -y --no-install-recommends git \
     && rm -rf /var/lib/apt/lists/*
 
-# Install Poetry (locked to major version 2)
-RUN pip install --no-cache-dir "poetry>=2.0.0,<3.0.0"
+RUN pip install --no-cache-dir "poetry${POETRY_VERSION}"
 
 WORKDIR /app
 
-# ── Dependency layer (cached unless pyproject.toml / poetry.lock change) ───
-COPY pyproject.toml poetry.lock* ./
+COPY pyproject.toml poetry.lock ./
 
-# Install into system Python (no virtualenv needed inside the container)
-RUN poetry config virtualenvs.create false \
-    && poetry install --without dev --no-root --no-interaction
+RUN poetry install --without dev --no-root
 
-# ── Application source ──────────────────────────────────────────────────────
 COPY README.md ./
 COPY src/ ./src/
-COPY infra/config/ ./config/
 
-# Install the ere package itself
-RUN poetry install --without dev --no-interaction
+RUN poetry install --without dev
+
+# =============================================================================
+# Runtime stage: minimal image
+# =============================================================================
+FROM python:3.12-slim AS runtime
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PATH="/app/.venv/bin:${PATH}"
+
+RUN groupadd --gid 1000 appuser && \
+    useradd --uid 1000 --gid appuser --shell /bin/bash --create-home appuser
+
+WORKDIR /app
+
+COPY --from=builder /app/.venv .venv
+COPY --from=builder /app/src src
+COPY config/ ./config/
+
+# Volume mount point for DuckDB persistent storage
+RUN mkdir -p /data && chown appuser:appuser /data
+
+USER appuser
 
-# ── Runtime ─────────────────────────────────────────────────────────────────
-# Fail fast: Python will exit immediately if the module cannot be imported.
 CMD ["python", "-m", "ere.entrypoints.app"]

From 1cf319c7ae95ee4781129063dd65ff8905ba17d9 Mon Sep 17 00:00:00 2001
From: Twicechild <twicechild@grannysparlor.gr>
Date: Thu, 26 Mar 2026 14:53:01 +0200
Subject: [PATCH 05/14] refactor(infra): modernize compose, env, Makefile and
 config layout

---
 .gitignore                                    |  1 +
 Makefile                                      | 51 +++++++++---
 {infra/config => config}/README.md            |  0
 {infra/config => config}/rdf_mapping.yaml     | 40 +++++-----
 {infra/config => config}/resolver.yaml        |  0
 .../config => config}/resolver_compound.yaml  | 50 ++++++------
 .../config => config}/resolver_multirule.yaml | 56 ++++++-------
 infra/.env.example                            | 18 +++++
 infra/.env.local                              | 28 -------
 infra/README.md                               | 71 +++++++++++++++++
 infra/compose.dev.yaml                        | 78 +++++++++++++++++++
 infra/docker-compose.yml                      | 66 ----------------
 src/ere/adapters/rdf_mapper_impl.py           | 14 +++-
 src/ere/services/factories.py                 |  9 ++-
 14 files changed, 299 insertions(+), 183 deletions(-)
 rename {infra/config => config}/README.md (100%)
 rename {infra/config => config}/rdf_mapping.yaml (97%)
 rename {infra/config => config}/resolver.yaml (100%)
 rename {infra/config => config}/resolver_compound.yaml (96%)
 rename {infra/config => config}/resolver_multirule.yaml (96%)
 create mode 100644 infra/.env.example
 delete mode 100644 infra/.env.local
 create mode 100644 infra/README.md
 create mode 100644 infra/compose.dev.yaml
 delete mode 100644 infra/docker-compose.yml

diff --git a/.gitignore b/.gitignore
index a75bb70..65f3ffe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,6 +136,7 @@ celerybeat.pid
 
 # Environments
 .env
+infra/.env
 .envrc
 .venv
 env/
diff --git a/Makefile b/Makefile
index 6a6cfa1..933f068 100644
--- a/Makefile
+++ b/Makefile
@@ -28,6 +28,8 @@ SRC_PATH = ${PROJECT_PATH}/src
 TEST_PATH = ${PROJECT_PATH}/test
 BUILD_PATH = ${PROJECT_PATH}/dist
 INFRA_PATH = ${PROJECT_PATH}/infra
+COMPOSE_FILE = ${INFRA_PATH}/compose.dev.yaml
+ENV_FILE = ${INFRA_PATH}/.env
 PACKAGE_NAME = ere
 
 ICON_DONE = [✔]
@@ -66,9 +68,13 @@ help: ## Display available targets
 	@ echo ""
 	@ echo -e "  $(BUILD_PRINT)Infrastructure (Docker):$(END_BUILD_PRINT)"
 	@ echo "    infra-build          - Build the ERE Docker image"
-	@ echo "    infra-up             - Start full stack (Redis + ERE) in detached mode"
+	@ echo "    infra-up             - Start services (docker compose up -d)"
 	@ echo "    infra-down           - Stop and remove stack containers and networks"
-	@ echo "    infra-logs           - Tail ERE container logs"
+	@ echo "    infra-down-volumes   - Stop services and remove volumes (clean slate)"
+	@ echo "    infra-rebuild        - Rebuild images and start services"
+	@ echo "    infra-rebuild-clean  - Rebuild from scratch (no cache) and start"
+	@ echo "    infra-logs           - Follow service logs"
+	@ echo "    infra-watch          - Start services with file watching (sync src/ and config/)"
 	@ echo ""
 	@ echo -e "  $(BUILD_PRINT)Utilities:$(END_BUILD_PRINT)"
 	@ echo "    clean                - Remove build artifacts and caches"
@@ -158,25 +164,48 @@ ci: ## Full CI pipeline for GitHub Actions (tox)
 #-----------------------------------------------------------------------------
 # Infrastructure commands (Docker)
 #-----------------------------------------------------------------------------
-.PHONY: infra-build infra-up infra-down infra-logs
+.PHONY: check-env infra-build infra-up infra-down infra-down-volumes infra-rebuild infra-rebuild-clean infra-logs infra-watch
 
-infra-build: ## Build the ERE Docker image
+check-env:
+	@ test -f $(ENV_FILE) || (echo -e "$(BUILD_PRINT)$(ICON_ERROR) Missing $(ENV_FILE). Run: cp infra/.env.example infra/.env$(END_BUILD_PRINT)" && exit 1)
+
+infra-build: check-env ## Build the ERE Docker image
 	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Building ERE Docker image$(END_BUILD_PRINT)"
-	@ docker compose -f $(INFRA_PATH)/docker-compose.yml build
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) build
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) ERE image built$(END_BUILD_PRINT)"
 
-infra-up: ## Start full stack: Redis + ERE (docker compose up --build)
+infra-up: check-env ## Start services (docker compose up -d)
 	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Starting ERE stack$(END_BUILD_PRINT)"
-	@ docker compose -f $(INFRA_PATH)/docker-compose.yml up --build -d
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) up -d
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) ERE stack is running — use 'make infra-logs' to follow output$(END_BUILD_PRINT)"
 
-infra-down: ## Stop and remove ERE stack containers and networks
+infra-down: check-env ## Stop and remove ERE stack containers and networks
 	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Stopping ERE stack$(END_BUILD_PRINT)"
-	@ docker compose -f $(INFRA_PATH)/docker-compose.yml down
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) down
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) ERE stack stopped$(END_BUILD_PRINT)"
 
-infra-logs: ## Tail logs from the ERE container
-	@ docker compose -f $(INFRA_PATH)/docker-compose.yml logs -f ere
+infra-down-volumes: check-env ## Stop services and remove volumes (clean slate)
+	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Stopping ERE stack and removing volumes$(END_BUILD_PRINT)"
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) down -v
+	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) ERE stack stopped and volumes removed$(END_BUILD_PRINT)"
+
+infra-rebuild: check-env ## Rebuild images and start services
+	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Rebuilding ERE stack$(END_BUILD_PRINT)"
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) up -d --build
+	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) ERE stack rebuilt and started$(END_BUILD_PRINT)"
+
+infra-rebuild-clean: check-env ## Rebuild from scratch (no cache) and start
+	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Rebuilding ERE stack (no cache)$(END_BUILD_PRINT)"
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) build --no-cache
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) up -d
+	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) ERE stack rebuilt (clean) and started$(END_BUILD_PRINT)"
+
+infra-logs: check-env ## Follow service logs
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) logs -f
+
+infra-watch: check-env ## Start services with file watching (sync src/ and config/)
+	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Starting ERE stack with watch$(END_BUILD_PRINT)"
+	@ docker compose -f $(COMPOSE_FILE) --env-file $(ENV_FILE) watch
 
 #-----------------------------------------------------------------------------
 # Utility commands
diff --git a/infra/config/README.md b/config/README.md
similarity index 100%
rename from infra/config/README.md
rename to config/README.md
diff --git a/infra/config/rdf_mapping.yaml b/config/rdf_mapping.yaml
similarity index 97%
rename from infra/config/rdf_mapping.yaml
rename to config/rdf_mapping.yaml
index 8dd02e2..4b856ed 100644
--- a/infra/config/rdf_mapping.yaml
+++ b/config/rdf_mapping.yaml
@@ -1,20 +1,20 @@
-# Namespace prefix registry - used by rdf_mapper.py to resolve prefixed names in field paths
-namespaces:
-  epo:   "http://data.europa.eu/a4g/ontology#"
-  org:   "http://www.w3.org/ns/org#"
-  locn:  "http://www.w3.org/ns/locn#"
-  cccev: "http://data.europa.eu/m8g/"
-
-# Entity type mappings: entity_type_string -> rdf_type + field property paths
-# Property paths use / as separator for multi-hop traversal.
-# Field names must match entity_fields in resolver.yaml (legal_name, country_code).
-entity_types:
-  ORGANISATION:
-    rdf_type: "org:Organization"
-    fields:
-      legal_name:   "epo:hasLegalName"
-      country_code: "cccev:registeredAddress/epo:hasCountryCode"
-      nuts_code: "cccev:registeredAddress/epo:hasNutsCode"
-      post_code: "cccev:registeredAddress/locn:postCode"
-      post_name: "cccev:registeredAddress/locn:postName"
-      thoroughfare: "cccev:registeredAddress/locn:thoroughfare"
+# Namespace prefix registry - used by rdf_mapper.py to resolve prefixed names in field paths
+namespaces:
+  epo:   "http://data.europa.eu/a4g/ontology#"
+  org:   "http://www.w3.org/ns/org#"
+  locn:  "http://www.w3.org/ns/locn#"
+  cccev: "http://data.europa.eu/m8g/"
+
+# Entity type mappings: entity_type_string -> rdf_type + field property paths
+# Property paths use / as separator for multi-hop traversal.
+# Field names must match entity_fields in resolver.yaml (legal_name, country_code).
+entity_types:
+  ORGANISATION:
+    rdf_type: "org:Organization"
+    fields:
+      legal_name:   "epo:hasLegalName"
+      country_code: "cccev:registeredAddress/epo:hasCountryCode"
+      nuts_code: "cccev:registeredAddress/epo:hasNutsCode"
+      post_code: "cccev:registeredAddress/locn:postCode"
+      post_name: "cccev:registeredAddress/locn:postName"
+      thoroughfare: "cccev:registeredAddress/locn:thoroughfare"
diff --git a/infra/config/resolver.yaml b/config/resolver.yaml
similarity index 100%
rename from infra/config/resolver.yaml
rename to config/resolver.yaml
diff --git a/infra/config/resolver_compound.yaml b/config/resolver_compound.yaml
similarity index 96%
rename from infra/config/resolver_compound.yaml
rename to config/resolver_compound.yaml
index 9cac682..47ff9d9 100644
--- a/infra/config/resolver_compound.yaml
+++ b/config/resolver_compound.yaml
@@ -1,25 +1,25 @@
-# Entity Resolver configuration — Compound blocking (country_code AND city)
-# Blocks pairs unless both country_code AND city match.
-# Creates tight, city-level blocks within countries.
-# Trade-off: fewer comparisons (faster) but may miss cross-city variants.
-
-cache_strategy: tf_incremental
-
-threshold: 0.5
-
-top_n: 100
-
-match_weight_threshold: -10
-
-splink:
-  probability_two_random_records_match: 0.3
-
-  comparisons:
-    - type: jaro_winkler
-      field: legal_name
-      thresholds: [0.9, 0.8]
-
-  # Compound blocking rule: a pair is compared only if both country_code AND city match.
-  # This is expressed as a list with two fields.
-  blocking_rules:
-    - [country_code, city]
+# Entity Resolver configuration — Compound blocking (country_code AND city)
+# Blocks pairs unless both country_code AND city match.
+# Creates tight, city-level blocks within countries.
+# Trade-off: fewer comparisons (faster) but may miss cross-city variants.
+
+cache_strategy: tf_incremental
+
+threshold: 0.5
+
+top_n: 100
+
+match_weight_threshold: -10
+
+splink:
+  probability_two_random_records_match: 0.3
+
+  comparisons:
+    - type: jaro_winkler
+      field: legal_name
+      thresholds: [0.9, 0.8]
+
+  # Compound blocking rule: a pair is compared only if both country_code AND city match.
+  # This is expressed as a list with two fields.
+  blocking_rules:
+    - [country_code, city]
diff --git a/infra/config/resolver_multirule.yaml b/config/resolver_multirule.yaml
similarity index 96%
rename from infra/config/resolver_multirule.yaml
rename to config/resolver_multirule.yaml
index 6e76a8c..c8395c9 100644
--- a/infra/config/resolver_multirule.yaml
+++ b/config/resolver_multirule.yaml
@@ -1,28 +1,28 @@
-# Entity Resolver configuration — Multi-rule blocking (country OR city OR name)
-# Three independent blocking rules evaluated as OR (union).
-# A pair is included if any rule fires: same country, OR same city, OR exact name match.
-# Trade-off: more comparisons (slower) but higher recall for diverse datasets.
-
-cache_strategy: tf_incremental
-
-threshold: 0.5
-
-top_n: 100
-
-match_weight_threshold: -10
-
-splink:
-  probability_two_random_records_match: 0.3
-
-  comparisons:
-    - type: jaro_winkler
-      field: legal_name
-      thresholds: [0.9, 0.8]
-
-  # Multi-rule blocking: three independent rules, evaluated as UNION ALL.
-  # A pair is included if any rule fires (country_code match, OR city match, OR exact legal_name match).
-  # Splink deduplicates the results internally.
-  blocking_rules:
-    - country_code
-    - city
-    - legal_name
+# Entity Resolver configuration — Multi-rule blocking (country OR city OR name)
+# Three independent blocking rules evaluated as OR (union).
+# A pair is included if any rule fires: same country, OR same city, OR exact name match.
+# Trade-off: more comparisons (slower) but higher recall for diverse datasets.
+
+cache_strategy: tf_incremental
+
+threshold: 0.5
+
+top_n: 100
+
+match_weight_threshold: -10
+
+splink:
+  probability_two_random_records_match: 0.3
+
+  comparisons:
+    - type: jaro_winkler
+      field: legal_name
+      thresholds: [0.9, 0.8]
+
+  # Multi-rule blocking: three independent rules, evaluated as UNION ALL.
+  # A pair is included if any rule fires (country_code match, OR city match, OR exact legal_name match).
+  # Splink deduplicates the results internally.
+  blocking_rules:
+    - country_code
+    - city
+    - legal_name
diff --git a/infra/.env.example b/infra/.env.example
new file mode 100644
index 0000000..0057f84
--- /dev/null
+++ b/infra/.env.example
@@ -0,0 +1,18 @@
+# Copy this file to .env and customize as needed:
+#   cp infra/.env.example infra/.env
+
+# Redis
+REDIS_HOST=redis
+REDIS_PORT=6379
+REDIS_DB=0
+REDIS_PASSWORD=changeme
+
+# Queue names
+REQUEST_QUEUE=ere_requests
+RESPONSE_QUEUE=ere_responses
+
+# DuckDB (path inside container, volume-mounted)
+DUCKDB_PATH=/data/app.duckdb
+
+# Logging
+LOG_LEVEL=INFO
diff --git a/infra/.env.local b/infra/.env.local
deleted file mode 100644
index e89187b..0000000
--- a/infra/.env.local
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copy this file to .env.local and customize as needed
-# This file is a template for Docker Compose configuration
-
-# ── Redis Configuration ──────────────────────────────────────────────────────
-# Inside Docker Compose, use 'redis' as hostname. For local testing, use 'localhost'
-REDIS_HOST=redis
-REDIS_PORT=6379
-REDIS_DB=0
-
-# Redis authentication (recommended for security)
-REDIS_PASSWORD=changeme
-
-# ── Redis Queue Names ────────────────────────────────────────────────────────
-# Queue names for entity resolution requests and responses
-REQUEST_QUEUE=ere_requests
-RESPONSE_QUEUE=ere_responses
-
-# ── DuckDB Persistent Storage ────────────────────────────────────────────────
-# Path to DuckDB file inside container (volume-mounted from ere-data volume)
-DUCKDB_PATH=/data/app.duckdb
-
-# ── ERE Service Port ─────────────────────────────────────────────────────────
-# Port exposed to host machine for the ERE service
-APP_PORT=8000
-
-# ── Logging ──────────────────────────────────────────────────────────────────
-# Python logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
-LOG_LEVEL=INFO
diff --git a/infra/README.md b/infra/README.md
new file mode 100644
index 0000000..db68b06
--- /dev/null
+++ b/infra/README.md
@@ -0,0 +1,71 @@
+# Infrastructure
+
+Deployment and infrastructure files for the Entity Resolution Engine.
+
+## Structure
+
+```
+infra/
+├── .env.example      # Environment variable template
+├── compose.dev.yaml  # Docker Compose for local development
+├── Dockerfile        # Multi-stage build (builder + runtime)
+└── README.md
+```
+
+## Services
+
+| Service | Purpose | Port |
+|---|---|---|
+| `ere` | Entity Resolution Engine (Redis queue worker) | — (no HTTP API) |
+| `redis` | Message queue for ERE requests/responses | 6379 |
+| `redisinsight` | Redis GUI (development tool) | 5540 |
+
+## Usage
+
+All commands run from the repo root via `make`:
+
+```bash
+make infra-build          # Build the ERE Docker image
+make infra-up             # Start services (docker compose up -d)
+make infra-down           # Stop and remove containers and networks
+make infra-down-volumes   # Stop services and remove volumes (clean slate)
+make infra-rebuild        # Rebuild images and start services
+make infra-rebuild-clean  # Rebuild from scratch (no cache)
+make infra-logs           # Follow service logs
+make infra-watch          # Start services with file watching (sync src/ and config/)
+```
+
+### File watching (development)
+
+`make infra-watch` uses Docker Compose's `watch` feature to sync source code and
+configuration changes into the running container without a full rebuild:
+
+- **Source changes** (`src/`) are synced live into the container
+- **Config changes** (`config/`) are synced live into the container
+- **Dependency changes** (`pyproject.toml`, `poetry.lock`) trigger a full rebuild
+
+> **Note:** ERE is a long-running queue worker, not an HTTP server with hot-reload.
+> After syncing, restart the container to pick up changes: `docker compose -f infra/compose.dev.yaml restart ere`
+
+### Manual build
+
+```bash
+docker build -f infra/Dockerfile -t ere:latest .
+```
+
+## Configuration
+
+Environment variables are loaded from `infra/.env`. See `infra/.env.example` for available options. To set up:
+
+```bash
+cp infra/.env.example infra/.env
+```
+
+### Resolver configuration
+
+Entity resolution behaviour is configured via YAML files in the top-level `config/` directory:
+
+- **[resolver.yaml](../config/resolver.yaml)** — Splink comparisons, cold-start parameters, blocking rules, thresholds
+- **[rdf_mapping.yaml](../config/rdf_mapping.yaml)** — RDF namespace bindings, field extraction rules, entity type definitions
+
+See the [configuration README](../config/README.md) for detailed tuning guidance.
diff --git a/infra/compose.dev.yaml b/infra/compose.dev.yaml
new file mode 100644
index 0000000..1fddb3a
--- /dev/null
+++ b/infra/compose.dev.yaml
@@ -0,0 +1,78 @@
+# Docker Compose configuration for local development
+
+name: ere-local
+
+services:
+  redis:
+    image: redis:7-alpine
+    container_name: "redis"
+    restart: unless-stopped
+    command: redis-server --requirepass ${REDIS_PASSWORD:-changeme}
+    ports:
+      - "6379:6379"
+    healthcheck:
+      test: ["CMD", "sh", "-c", "redis-cli --no-auth-warning -a $REDIS_PASSWORD ping"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+    environment:
+      - REDIS_PASSWORD=${REDIS_PASSWORD:-changeme}
+    networks:
+      - ere-net
+
+  redisinsight:
+    image: redis/redisinsight:3.2.0
+    container_name: "redisinsight"
+    restart: unless-stopped
+    ports:
+      - "5540:5540"
+    healthcheck:
+      test: ["CMD", "wget", "--spider", "-q", "http://127.0.0.1:5540/api/health"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+    networks:
+      - ere-net
+
+  ere:
+    build:
+      context: ..
+      dockerfile: infra/Dockerfile
+    container_name: "ere"
+    env_file: .env
+    restart: unless-stopped
+    environment:
+      - DUCKDB_PATH=${DUCKDB_PATH:-/data/app.duckdb}
+      - RDF_MAPPING_PATH=/app/config/rdf_mapping.yaml
+      - RESOLVER_CONFIG_PATH=/app/config/resolver.yaml
+      # Remaining REDIS_* and queue vars inherited from env_file
+    healthcheck:
+      test: ["CMD", "sh", "-c", "test -f /proc/1/cmdline"]
+      interval: 10s
+      timeout: 3s
+      retries: 3
+    depends_on:
+      redis:
+        condition: service_healthy
+    volumes:
+      - ere-data:/data
+    develop:
+      watch:
+        - action: sync
+          path: ../src
+          target: /app/src
+        - action: sync
+          path: ../config
+          target: /app/config
+        - action: rebuild
+          path: ../pyproject.toml
+        - action: rebuild
+          path: ../poetry.lock
+    networks:
+      - ere-net
+
+volumes:
+  ere-data:
+
+networks:
+  ere-net:
diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml
deleted file mode 100644
index ef5b8df..0000000
--- a/infra/docker-compose.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: ere-local
-
-services:
-
-  # ── Redis ──────────────────────────────────────────────────────────────────
-  redis:
-    image: redis:7-alpine
-    restart: unless-stopped
-    command: redis-server --requirepass ${REDIS_PASSWORD:-changeme}
-    ports:
-      - "6379:6379"
-    networks:
-      - ere-net
-    healthcheck:
-      test: ["CMD", "sh", "-c", "redis-cli --no-auth-warning -a $REDIS_PASSWORD ping"]
-      interval: 5s
-      timeout: 3s
-      retries: 5
-    environment:
-      - REDIS_PASSWORD=${REDIS_PASSWORD:-changeme}
-
-
-  # ── Redis Insight (GUI for Redis) ──────────────────────────────────────────
-  redisinsight:
-    image: redis/redisinsight:latest
-    restart: unless-stopped
-    ports:
-      - "5540:5540"
-    networks:
-      - ere-net
-    environment:
-      # Optional: set analytics to false if you prefer no telemetry
-      - REDISINSIGHT_ANALYTICS=true
-
-
-  # ── Entity Resolution Engine ───────────────────────────────────────────────
-  ere:
-    build:
-      context: ..
-      dockerfile: infra/Dockerfile
-    env_file: .env.local
-    restart: unless-stopped
-    ports:
-      - "${APP_PORT:-8000}:8000"
-    environment:
-      # DuckDB embedded file location (volume-mounted at /data)
-      - DUCKDB_PATH=${DUCKDB_PATH:-/data/app.duckdb}
-      # Config file paths in the container
-      - RDF_MAPPING_PATH=/app/config/rdf_mapping.yaml
-      - RESOLVER_CONFIG_PATH=/app/config/resolver.yaml
-      # Inherit REQUEST_QUEUE, RESPONSE_QUEUE, REDIS_* from .env.local
-    depends_on:
-      redis:
-        condition: service_healthy
-    volumes:
-      - ere-data:/data    # DuckDB embedded file and other persistent state
-    networks:
-      - ere-net
-
-# ── Shared state ───────────────────────────────────────────────────────────
-volumes:
-  ere-data:
-
-# ── Internal network (not exposed to host) ─────────────────────────────────
-networks:
-  ere-net:
diff --git a/src/ere/adapters/rdf_mapper_impl.py b/src/ere/adapters/rdf_mapper_impl.py
index 243f6b1..62554fc 100644
--- a/src/ere/adapters/rdf_mapper_impl.py
+++ b/src/ere/adapters/rdf_mapper_impl.py
@@ -42,7 +42,11 @@ def _load_mappings(rdf_mapping_path: str | Path = None) -> dict:
             dict: Entity type mappings from config.
         """
         if rdf_mapping_path is None:
-            rdf_mapping_path = Path(__file__).parent.parent.parent.parent / "infra" / "config" / "rdf_mapping.yaml"
+            rdf_mapping_path = (
+                Path(__file__).parent.parent.parent.parent
+                / "config"
+                / "rdf_mapping.yaml"
+            )
         else:
             rdf_mapping_path = Path(rdf_mapping_path)
         return load_entity_mappings(rdf_mapping_path)
@@ -70,9 +74,13 @@ def map_entity_mention_to_domain(self, entity_mention: EntityMention) -> Mention
             )
 
         mention_id = MentionId(
-            value=self._derive_mention_id(eid.source_id, eid.request_id, eid.entity_type)
+            value=self._derive_mention_id(
+                eid.source_id, eid.request_id, eid.entity_type
+            )
+        )
+        attributes = extract_mention_attributes(
+            entity_mention.content, entity_type_config
         )
-        attributes = extract_mention_attributes(entity_mention.content, entity_type_config)
         return Mention(id=mention_id, attributes=attributes)
 
     @staticmethod
diff --git a/src/ere/services/factories.py b/src/ere/services/factories.py
index 6442ae8..debd261 100644
--- a/src/ere/services/factories.py
+++ b/src/ere/services/factories.py
@@ -19,7 +19,10 @@
 from ere.adapters.duckdb_schema import init_schema
 from ere.adapters.rdf_mapper_port import RDFMapper
 from ere.adapters.splink_linker_impl import SpLinkSimilarityLinker
-from ere.services.entity_resolution_service import EntityResolver, EntityResolutionService
+from ere.services.entity_resolution_service import (
+    EntityResolver,
+    EntityResolutionService,
+)
 from ere.services.resolver_config import ResolverConfig
 
 
@@ -47,7 +50,9 @@ def build_entity_resolver(
         Fully-constructed EntityResolver with DuckDB backend and Splink linker.
     """
     if resolver_config_path is None:
-        config_path = Path(__file__).parent.parent.parent.parent / "infra" / "config" / "resolver.yaml"
+        config_path = (
+            Path(__file__).parent.parent.parent.parent / "config" / "resolver.yaml"
+        )
     else:
         config_path = Path(resolver_config_path)
 

From ee8e8cb380f461a31066fcd80357e1499f8e229d Mon Sep 17 00:00:00 2001
From: Twicechild <twicechild@grannysparlor.gr>
Date: Thu, 26 Mar 2026 14:53:09 +0200
Subject: [PATCH 06/14] fix(docs): update references for new infra layout

---
 .github/workflows/code-quality.yaml | 10 ++--
 CHANGELOG.md                        |  2 +-
 README.md                           | 35 +++++++++-----
 demo/README.md                      | 13 +++---
 demo/demo.py                        | 72 +++++++++++++++++++++--------
 docs/algorithm.md                   |  2 +-
 test/stress/README.md               |  2 +-
 test/stress/stress_test.py          |  9 ++--
 8 files changed, 94 insertions(+), 51 deletions(-)

diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml
index 4369e8e..5bc9317 100644
--- a/.github/workflows/code-quality.yaml
+++ b/.github/workflows/code-quality.yaml
@@ -7,8 +7,8 @@
 #   2. Lint, Test & Verify (tox: unit tests + architecture + clean-code checks)
 #   3. SonarCloud analysis (coverage, quality gate)
 #
-# Optional repository secrets:
-#   - SONAR_TOKEN: SonarCloud authentication token (step skipped when absent)
+# Required repository secrets:
+#   - SONAR_TOKEN: SonarCloud authentication token
 #
 # If the private ers-spec dependency fails to resolve with the default
 # GITHUB_TOKEN, add a PAT as GH_TOKEN_PRIVATE_REPOS and uncomment the
@@ -29,8 +29,6 @@ jobs:
   quality:
     name: Lint, Test & Verify
     runs-on: ubuntu-latest
-    env:
-      SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
 
     services:
       redis:
@@ -96,14 +94,14 @@ jobs:
       # ------------------------------------------------------------------
       - name: Run quality checks (unit tests + architecture + clean-code)
         run: |
-          rm -f infra/.env.local
+          rm -f infra/.env
           poetry run tox -e py312,architecture,clean-code
 
       # ------------------------------------------------------------------
       # SonarCloud
       # ------------------------------------------------------------------
       - name: SonarCloud scan
-        if: always() && env.SONAR_TOKEN != ''
+        if: always()
         uses: SonarSource/sonarqube-scan-action@v6
         env:
           SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9980d99..3a6296f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,7 +65,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 **Docker & Deployment**
 - Multi-stage Dockerfile for production-ready containerization
-- `docker-compose.yml` for full-stack setup (Redis + ERE service)
+- `compose.dev.yaml` for full-stack setup (Redis + ERE service)
 - `.env.example` template for configuration
 
 **Documentation**
diff --git a/README.md b/README.md
index 81e76ca..442f396 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ Its primary purpose is to interact with the Entity Resolution System (ERSys). It
 For detailed documentation, see:
 - [Architecture](docs/architecture.md) - description of the applied architecture
 - [Algorithm](docs/algorithm.md) - incremental probabilistic entity linking
-- [Configuration](infra/config/README.md) - field mapping, model tuning, Splink setup
+- [Configuration](config/README.md) - field mapping, model tuning, Splink setup
 - [ERS–ERE Technical Contract v0.2](docs/ERS-ERE-System-Technical-Contract.pdf)
 
 
@@ -66,7 +66,10 @@ make install
 ```
 
 To build and launch Docker-based stack (ERE + Redis):
-1. (optional) Adjust connection and logging config in [.env.local](infra/.env.local).
+1. (optional) Copy and adjust connection and logging config:
+   ```bash
+   cp infra/.env.example infra/.env
+   ```
 2. Run the following:
 ```bash
 # Build the ERE Docker image
@@ -93,7 +96,7 @@ Terminate the service:
 make infra-down
 ```
 
-Note: In order for the demo to work, you need to either set `REDIS_HOST=localhost` in the [.env.local](infra/.env.local) file or pass it to the script as an environment variable.
+Note: In order for the demo to work, you need to either set `REDIS_HOST=localhost` in [infra/.env](infra/.env.example) or pass it to the script as an environment variable.
 
 
 For detailed setup instructions, see `Make targets`.
@@ -133,9 +136,13 @@ Available targets (`make help`):
 
   Infrastructure (Docker):
     infra-build          - Build the ERE Docker image
-    infra-up             - Start full stack (Redis + ERE) in detached mode
+    infra-up             - Start services (docker compose up -d)
     infra-down           - Stop and remove stack containers and networks
-    infra-logs           - Tail ERE container logs
+    infra-down-volumes   - Stop services and remove volumes (clean slate)
+    infra-rebuild        - Rebuild images and start services
+    infra-rebuild-clean  - Rebuild from scratch (no cache) and start
+    infra-logs           - Follow service logs
+    infra-watch          - Start services with file watching (sync src/ and config/)
 
   Utilities:
     clean                - Remove build artifacts and caches
@@ -145,10 +152,10 @@ Available targets (`make help`):
 ### Configuration (Resolver and Mapper)
 
 Entity resolution behaviour is configured via two YAML files:
-- **Resolver configuration** ([resolver.yaml](./infra/config/resolver.yaml)): Splink comparisons, cold-start parameters, similarity thresholds
-- **RDF mapping** ([rdf_mapping.yaml](./infra/config/rdf_mapping.yaml)): RDF namespace bindings, field extraction rules, entity type definitions
+- **Resolver configuration** ([resolver.yaml](./config/resolver.yaml)): Splink comparisons, cold-start parameters, similarity thresholds
+- **RDF mapping** ([rdf_mapping.yaml](./config/rdf_mapping.yaml)): RDF namespace bindings, field extraction rules, entity type definitions
 
-For detailed configuration options and tuning, see the [configuration page](./infra/config/README.md).
+For detailed configuration options and tuning, see the [configuration page](./config/README.md).
 
 ### Examples
 
@@ -203,11 +210,15 @@ docs/
 ├── ERS-ERE-System-Technical-Contract.pdf
 └── *.md             # Topic documentation
 
+config/
+├── resolver.yaml         # Splink comparisons, blocking rules, thresholds
+├── rdf_mapping.yaml      # RDF namespace bindings, field extraction rules
+└── README.md             # Configuration documentation
+
 infra/
 ├── Dockerfile       # ERE service image definition
-├── docker-compose.yml  # Full stack (Redis + ERE)
-├── config           # ERE Configuration
-└── .env.local       # Local runtime config (git-ignored)
+├── compose.dev.yaml # Docker Compose for local development
+└── .env.example     # Environment variable template
 
 demo/
 ├── demo.py          # Entity resolution demonstration script
@@ -266,7 +277,7 @@ make test-integration
 
 # Code formatting and linting
 make format             # Auto-format with Ruff
-make lint-check         # Lint without modifying files
+make lint               # Lint without modifying files
 make lint-fix           # Lint with auto-fix
 ```
 
diff --git a/demo/README.md b/demo/README.md
index aa45f79..4a1ce9f 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -18,7 +18,7 @@ The demo treats ERE as a black box service accessible only through Redis message
 
 ## Configuration
 
-Configuration is loaded from `.env.local` (or environment variables):
+Configuration is loaded from `infra/.env` (or environment variables):
 
 | Variable | Default | Purpose |
 |----------|---------|---------|
@@ -44,14 +44,13 @@ The script tries the configured host first, then falls back to `localhost` if th
 Start the full stack including Redis and ERE:
 
 ```bash
-cd /home/greg/PROJECTS/ERS/ere-basic
-docker-compose -f infra/docker-compose.yml up -d
+make infra-rebuild
 ```
 
 Wait for services to be ready (check logs):
 
 ```bash
-docker-compose -f infra/docker-compose.yml logs -f
+make infra-logs
 ```
 
 ### 2. Locally (development)
@@ -205,7 +204,7 @@ If it returns `PONG`, Redis is running. If not:
 
 - **Docker**: `docker run -d -p 6379:6379 redis:latest`
 - **Local Redis**: `brew install redis && brew services start redis` (macOS)
-- **Docker Compose**: Ensure the service is running: `docker-compose -f infra/docker-compose.yml up redis`
+- **Docker Compose**: Ensure the service is running: `make infra-up`
 
 ### Timeout waiting for responses
 
@@ -216,14 +215,14 @@ If it returns `PONG`, Redis is running. If not:
 
 **Check ERE logs:**
 ```bash
-docker-compose -f infra/docker-compose.yml logs ere
+make infra-logs
 ```
 
 ### Password authentication fails
 
 **Edit Redis connection parameters:**
 
-Option 1: Modify `.env.local`:
+Option 1: Modify `infra/.env`:
 ```bash
 REDIS_PASSWORD=your_password
 ```
diff --git a/demo/demo.py b/demo/demo.py
index 711178d..f8d33bf 100755
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -17,7 +17,7 @@
     Before running a fresh demo with different data, clear the old database:
 
     docker volume rm ere-local_ere-data
-    docker-compose -f infra/docker-compose.yml up -d
+    make infra-rebuild
 
     Failure to do so will mix old mentions with new ones, corrupting demo results.
 """
@@ -35,7 +35,9 @@
 # Default data file path
 DEFAULT_DATA_FILE = Path(__file__).parent / "data" / "org-tiny.json"
 
-DELAY_BETWEEN_MESSAGES = 0  # seconds to wait between sending messages (set to >0 for sequential processing)
+DELAY_BETWEEN_MESSAGES = (
+    0  # seconds to wait between sending messages (set to >0 for sequential processing)
+)
 GLOBAL_TIMEOUT = 0  # seconds to wait for responses before giving up (0 = no timeout)
 
 
@@ -43,13 +45,14 @@
 # Configuration
 # ===============================================================================
 
+
 def load_env_file(env_path: str = None) -> dict:
-    """Load configuration from .env.local or environment variables."""
+    """Load configuration from .env or environment variables."""
     config = {}
 
-    # Try to load from .env.local if it exists
+    # Try to load from .env if it exists
     if env_path is None:
-        env_path = Path(__file__).parent.parent / "infra" / ".env.local"
+        env_path = Path(__file__).parent.parent / "infra" / ".env"
 
     if Path(env_path).exists():
         with open(env_path) as f:
@@ -60,13 +63,23 @@ def load_env_file(env_path: str = None) -> dict:
                         key, value = line.split("=", 1)
                         config[key.strip()] = value.strip()
 
-    # Environment variables override .env.local
-    config["REDIS_HOST"] = os.environ.get("REDIS_HOST", config.get("REDIS_HOST", "localhost"))
-    config["REDIS_PORT"] = int(os.environ.get("REDIS_PORT", config.get("REDIS_PORT", "6379")))
+    # Environment variables override .env
+    config["REDIS_HOST"] = os.environ.get(
+        "REDIS_HOST", config.get("REDIS_HOST", "localhost")
+    )
+    config["REDIS_PORT"] = int(
+        os.environ.get("REDIS_PORT", config.get("REDIS_PORT", "6379"))
+    )
     config["REDIS_DB"] = int(os.environ.get("REDIS_DB", config.get("REDIS_DB", "0")))
-    config["REDIS_PASSWORD"] = os.environ.get("REDIS_PASSWORD", config.get("REDIS_PASSWORD"))
-    config["REQUEST_QUEUE"] = os.environ.get("REQUEST_QUEUE", config.get("REQUEST_QUEUE", "ere_requests"))
-    config["RESPONSE_QUEUE"] = os.environ.get("RESPONSE_QUEUE", config.get("RESPONSE_QUEUE", "ere_responses"))
+    config["REDIS_PASSWORD"] = os.environ.get(
+        "REDIS_PASSWORD", config.get("REDIS_PASSWORD")
+    )
+    config["REQUEST_QUEUE"] = os.environ.get(
+        "REQUEST_QUEUE", config.get("REQUEST_QUEUE", "ere_requests")
+    )
+    config["RESPONSE_QUEUE"] = os.environ.get(
+        "RESPONSE_QUEUE", config.get("RESPONSE_QUEUE", "ere_responses")
+    )
 
     return config
 
@@ -77,6 +90,7 @@ def load_env_file(env_path: str = None) -> dict:
 
 TRACE = 5
 
+
 def setup_logging():
     """Configure logging with timestamps."""
     log_level_name = os.environ.get("LOG_LEVEL", "INFO").upper()
@@ -105,7 +119,10 @@ def setup_logging():
 # Redis Connection
 # ===============================================================================
 
-def check_redis_connectivity(host: str, port: int, db: int, password: str) -> redis.Redis:
+
+def check_redis_connectivity(
+    host: str, port: int, db: int, password: str
+) -> redis.Redis:
     """
     Check Redis connectivity and return client.
 
@@ -124,7 +141,9 @@ def check_redis_connectivity(host: str, port: int, db: int, password: str) -> re
     last_error = None
     for try_host in hosts_to_try:
         try:
-            logging.getLogger(__name__).info(f"Attempting Redis connection to {try_host}:{port}...")
+            logging.getLogger(__name__).info(
+                f"Attempting Redis connection to {try_host}:{port}..."
+            )
             client = redis.Redis(
                 host=try_host,
                 port=port,
@@ -147,6 +166,7 @@ def check_redis_connectivity(host: str, port: int, db: int, password: str) -> re
 # Request/Response Handling
 # ===============================================================================
 
+
 def escape_turtle_string(value: str) -> str:
     """
     Escape a string for safe inclusion in Turtle RDF format.
@@ -223,7 +243,7 @@ def create_entity_mention_request(
         thoroughfare_safe = escape_turtle_string(thoroughfare)
         address_props.append(f'locn:thoroughfare "{thoroughfare_safe}"')
 
-    address_content = ' ;\n        '.join(address_props)
+    address_content = " ;\n        ".join(address_props)
 
     content = f"""@prefix org: <http://www.w3.org/ns/org#> .
 @prefix cccev: <http://data.europa.eu/m8g/> .
@@ -263,6 +283,7 @@ def parse_response(response_bytes: bytes) -> dict:
 # Demo Data Loading
 # ===============================================================================
 
+
 def load_demo_mentions(data_file: str | None = None) -> list[dict]:
     """
     Load demo mentions from a JSON file.
@@ -298,6 +319,7 @@ def load_demo_mentions(data_file: str | None = None) -> list[dict]:
 # Main Demo
 # ===============================================================================
 
+
 def main(data_file: str | None = None):
     """
     Run the Redis-based ERE demo.
@@ -323,7 +345,9 @@ def main(data_file: str | None = None):
     # Load demo mentions from JSON
     try:
         demo_mentions = load_demo_mentions(data_file)
-        logger.info(f"Loaded {len(demo_mentions)} mentions from {data_file or DEFAULT_DATA_FILE}")
+        logger.info(
+            f"Loaded {len(demo_mentions)} mentions from {data_file or DEFAULT_DATA_FILE}"
+        )
     except (FileNotFoundError, ValueError) as e:
         logger.error(f"Failed to load demo mentions: {e}")
         return 1
@@ -357,7 +381,7 @@ def main(data_file: str | None = None):
             f"      \n"
             f"      To reset the database:\n"
             f"      1. docker volume rm ere-local_ere-data\n"
-            f"      2. docker-compose -f infra/docker-compose.yml up -d\n"
+            f"      2. make infra-rebuild\n"
         )
 
     # Send demo requests
@@ -414,7 +438,9 @@ def main(data_file: str | None = None):
     while len(responses_received) < len(request_ids):
         elapsed = time.time() - start_time
         if GLOBAL_TIMEOUT > 0 and elapsed > GLOBAL_TIMEOUT:
-            logger.warning(f"Timeout after {GLOBAL_TIMEOUT}s. Received {len(responses_received)}/{len(request_ids)} responses.")
+            logger.warning(
+                f"Timeout after {GLOBAL_TIMEOUT}s. Received {len(responses_received)}/{len(request_ids)} responses."
+            )
             break
 
         # Try to get a response with short timeout
@@ -425,7 +451,9 @@ def main(data_file: str | None = None):
             response = parse_response(response_bytes)
 
             if logger.isEnabledFor(TRACE):
-                logger.log(TRACE, f"Full response message:\n{json.dumps(response, indent=2)}")
+                logger.log(
+                    TRACE, f"Full response message:\n{json.dumps(response, indent=2)}"
+                )
 
             req_id = response["entity_mention_id"]["request_id"]
             responses_received[req_id] = response
@@ -455,7 +483,9 @@ def main(data_file: str | None = None):
                 )
 
     logger.info("-" * 80)
-    logger.info(f"\nDemo complete. Received {len(responses_received)}/{len(request_ids)} responses.")
+    logger.info(
+        f"\nDemo complete. Received {len(responses_received)}/{len(request_ids)} responses."
+    )
 
     # Build clustering summary as single block
     summary_lines = []
@@ -510,7 +540,9 @@ def main(data_file: str | None = None):
         logger.info("✓ All responses received successfully!")
         return 0
     else:
-        logger.warning(f"✗ Missing {len(request_ids) - len(responses_received)} response(s).")
+        logger.warning(
+            f"✗ Missing {len(request_ids) - len(responses_received)} response(s)."
+        )
         return 1
 
 
diff --git a/docs/algorithm.md b/docs/algorithm.md
index 7ad5bb7..201dbbd 100644
--- a/docs/algorithm.md
+++ b/docs/algorithm.md
@@ -100,7 +100,7 @@ The algorithm processes mentions one at a time, making immediate clustering deci
 | **top_n** | Maximum candidate clusters returned per mention |
 | **blocking_rules** | Pre-filters to reduce similarity computation |
 
-The complete list of configuration parameters together with comprehensive description is available in [Configuration](../infra/config/README.md).
+The complete list of configuration parameters together with comprehensive description is available in [Configuration](../config/README.md).
 
 ## Outputs
 
diff --git a/test/stress/README.md b/test/stress/README.md
index 0d69f7b..cd33711 100644
--- a/test/stress/README.md
+++ b/test/stress/README.md
@@ -45,7 +45,7 @@ poetry run python3 test/stress/stress_test.py \
 ### Optional
 
 **`--config PATH`**
-- Path to resolver config YAML (default: `infra/config/resolver.yaml`)
+- Path to resolver config YAML (default: `config/resolver.yaml`)
 - Determines blocking rules, thresholds, and Splink settings
 
 **`--seed N`**
diff --git a/test/stress/stress_test.py b/test/stress/stress_test.py
index 4588c63..14cbb72 100644
--- a/test/stress/stress_test.py
+++ b/test/stress/stress_test.py
@@ -14,7 +14,7 @@
         --dataset test/stress/data/org-mid.csv \
         --seed 200 \
         --records 500 \
-        --config infra/config/resolver.yaml \
+        --config config/resolver.yaml \
         --output /tmp/stress_mid.json
 """
 
@@ -141,7 +141,10 @@ def create_resolver(
 
 
 def seed_and_train(
-    resolver: EntityResolver, mentions: list[Mention], n_seed: int, skip_train: bool = False
+    resolver: EntityResolver,
+    mentions: list[Mention],
+    n_seed: int,
+    skip_train: bool = False,
 ):
     """
     Seed resolver with first n_seed mentions and optionally trigger training.
@@ -409,7 +412,7 @@ def main():
     )
     parser.add_argument(
         "--config",
-        default="infra/config/resolver.yaml",
+        default="config/resolver.yaml",
         help="Path to resolver config YAML",
     )
     parser.add_argument(

From 9332da6246313727a9289a9087ed83912d4af23d Mon Sep 17 00:00:00 2001
From: Grzegorz Kostkowski <grzegorz.kostkowski@meaningfy.ws>
Date: Fri, 27 Mar 2026 13:15:03 +0100
Subject: [PATCH 07/14] chore: update line endings in the demo script

---
 demo/demo.py | 1126 +++++++++++++++++++++++++-------------------------
 1 file changed, 563 insertions(+), 563 deletions(-)

diff --git a/demo/demo.py b/demo/demo.py
index f8d33bf..1a06939 100755
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -1,563 +1,563 @@
-#!/usr/bin/env python3
-"""
-Demo: Indirect Redis client for ERE (Entity Resolution Engine).
-
-This demo connects to ERE through the Redis queue infrastructure (no direct Python API).
-It demonstrates:
-1. Checking Redis connectivity
-2. Sending EntityMentionResolutionRequest messages to the queue
-3. Listening for EntityMentionResolutionResponse messages
-4. Logging all interactions
-
-The example uses 6 synthetic mentions from ALGORITHM.md that cluster into 2 groups:
-  - Cluster 1: {1, 2, 5}  (organizations with high similarity)
-  - Cluster 2: {3, 4, 6}  (different organizations, also highly similar)
-
-⚠️  IMPORTANT: The ERE resolver persists state in a DuckDB database volume.
-    Before running a fresh demo with different data, clear the old database:
-
-    docker volume rm ere-local_ere-data
-    make infra-rebuild
-
-    Failure to do so will mix old mentions with new ones, corrupting demo results.
-"""
-
-import json
-import logging
-import os
-import sys
-import time
-from datetime import datetime, timezone
-from pathlib import Path
-
-import redis
-
-# Default data file path
-DEFAULT_DATA_FILE = Path(__file__).parent / "data" / "org-tiny.json"
-
-DELAY_BETWEEN_MESSAGES = (
-    0  # seconds to wait between sending messages (set to >0 for sequential processing)
-)
-GLOBAL_TIMEOUT = 0  # seconds to wait for responses before giving up (0 = no timeout)
-
-
-# ===============================================================================
-# Configuration
-# ===============================================================================
-
-
-def load_env_file(env_path: str = None) -> dict:
-    """Load configuration from .env or environment variables."""
-    config = {}
-
-    # Try to load from .env if it exists
-    if env_path is None:
-        env_path = Path(__file__).parent.parent / "infra" / ".env"
-
-    if Path(env_path).exists():
-        with open(env_path) as f:
-            for line in f:
-                line = line.strip()
-                if line and not line.startswith("#"):
-                    if "=" in line:
-                        key, value = line.split("=", 1)
-                        config[key.strip()] = value.strip()
-
-    # Environment variables override .env
-    config["REDIS_HOST"] = os.environ.get(
-        "REDIS_HOST", config.get("REDIS_HOST", "localhost")
-    )
-    config["REDIS_PORT"] = int(
-        os.environ.get("REDIS_PORT", config.get("REDIS_PORT", "6379"))
-    )
-    config["REDIS_DB"] = int(os.environ.get("REDIS_DB", config.get("REDIS_DB", "0")))
-    config["REDIS_PASSWORD"] = os.environ.get(
-        "REDIS_PASSWORD", config.get("REDIS_PASSWORD")
-    )
-    config["REQUEST_QUEUE"] = os.environ.get(
-        "REQUEST_QUEUE", config.get("REQUEST_QUEUE", "ere_requests")
-    )
-    config["RESPONSE_QUEUE"] = os.environ.get(
-        "RESPONSE_QUEUE", config.get("RESPONSE_QUEUE", "ere_responses")
-    )
-
-    return config
-
-
-# ===============================================================================
-# Logging Setup
-# ===============================================================================
-
-TRACE = 5
-
-
-def setup_logging():
-    """Configure logging with timestamps."""
-    log_level_name = os.environ.get("LOG_LEVEL", "INFO").upper()
-
-    # Handle custom TRACE level
-    if log_level_name == "TRACE":
-        log_level = TRACE
-        logging.addLevelName(TRACE, "TRACE")
-    else:
-        log_level = getattr(logging, log_level_name, logging.INFO)
-
-    logging.basicConfig(
-        level=log_level,
-        format="%(asctime)s [%(levelname)s] %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S",
-    )
-
-    logger = logging.getLogger(__name__)
-    logger.setLevel(log_level)
-    logger.info(f"Logging configured at level {log_level_name}")
-
-    return logger
-
-
-# ===============================================================================
-# Redis Connection
-# ===============================================================================
-
-
-def check_redis_connectivity(
-    host: str, port: int, db: int, password: str
-) -> redis.Redis:
-    """
-    Check Redis connectivity and return client.
-
-    Attempts connection to specified host first, then fallback to localhost
-    if configured host is "redis" (Docker).
-
-    Raises:
-        RuntimeError: If Redis is not accessible.
-    """
-    hosts_to_try = [host]
-
-    # Fallback: if configured host is "redis" (Docker), also try localhost
-    if host == "redis":
-        hosts_to_try.append("localhost")
-
-    last_error = None
-    for try_host in hosts_to_try:
-        try:
-            logging.getLogger(__name__).info(
-                f"Attempting Redis connection to {try_host}:{port}..."
-            )
-            client = redis.Redis(
-                host=try_host,
-                port=port,
-                db=db,
-                password=password,
-                decode_responses=False,
-            )
-            client.ping()
-            return client
-        except Exception as e:
-            last_error = e
-            continue
-
-    raise RuntimeError(
-        f"Redis unavailable. Tried hosts: {hosts_to_try}, port: {port}, db: {db}"
-    ) from last_error
-
-
-# ===============================================================================
-# Request/Response Handling
-# ===============================================================================
-
-
-def escape_turtle_string(value: str) -> str:
-    """
-    Escape a string for safe inclusion in Turtle RDF format.
-
-    Handles special characters: backslash, double quotes, newlines, carriage returns, tabs.
-
-    Args:
-        value: String to escape
-
-    Returns:
-        Escaped string safe for use in Turtle string literals
-    """
-    if not value:
-        return value
-
-    # Escape backslash first (must be done before other escapes)
-    value = value.replace("\\", "\\\\")
-    # Escape double quotes
-    value = value.replace('"', '\\"')
-    # Escape newlines
-    value = value.replace("\n", "\\n")
-    # Escape carriage returns
-    value = value.replace("\r", "\\r")
-    # Escape tabs
-    value = value.replace("\t", "\\t")
-
-    return value
-
-
-def create_entity_mention_request(
-    request_id: str,
-    source_id: str,
-    entity_type: str,
-    legal_name: str,
-    country_code: str,
-    nuts_code: str | None = None,
-    post_code: str | None = None,
-    post_name: str | None = None,
-    thoroughfare: str | None = None,
-) -> dict:
-    """
-    Create an EntityMentionResolutionRequest payload.
-
-    Uses RDF/Turtle format with entity metadata including extended address fields.
-    All string values are properly escaped for Turtle compatibility.
-
-    Args:
-        request_id: Unique request identifier
-        source_id: Source system identifier
-        entity_type: Entity type (e.g., ORGANISATION)
-        legal_name: Legal name of the entity
-        country_code: ISO 2-letter country code
-        nuts_code: Optional NUTS regional code
-        post_code: Optional postal code
-        post_name: Optional city/locality name
-        thoroughfare: Optional street address
-    """
-    # Escape all string values for Turtle safety
-    legal_name_safe = escape_turtle_string(legal_name or "")
-    country_code_safe = escape_turtle_string(country_code or "")
-
-    # Build address properties dynamically
-    address_props = [f'epo:hasCountryCode "{country_code_safe}"']
-    if nuts_code:
-        nuts_code_safe = escape_turtle_string(nuts_code)
-        address_props.append(f'epo:hasNutsCode "{nuts_code_safe}"')
-    if post_code:
-        post_code_safe = escape_turtle_string(post_code)
-        address_props.append(f'locn:postCode "{post_code_safe}"')
-    if post_name:
-        post_name_safe = escape_turtle_string(post_name)
-        address_props.append(f'locn:postName "{post_name_safe}"')
-    if thoroughfare:
-        thoroughfare_safe = escape_turtle_string(thoroughfare)
-        address_props.append(f'locn:thoroughfare "{thoroughfare_safe}"')
-
-    address_content = " ;\n        ".join(address_props)
-
-    content = f"""@prefix org: <http://www.w3.org/ns/org#> .
-@prefix cccev: <http://data.europa.eu/m8g/> .
-@prefix epo: <http://data.europa.eu/a4g/ontology#> .
-@prefix locn: <http://www.w3.org/ns/locn#> .
-@prefix epd: <http://data.europa.eu/a4g/resource/> .
-
-epd:ent{request_id} a org:Organization ;
-    epo:hasLegalName "{legal_name_safe}" ;
-    cccev:registeredAddress [
-        {address_content}
-    ] .
-"""
-
-    return {
-        "type": "EntityMentionResolutionRequest",
-        "entity_mention": {
-            "identifiedBy": {
-                "request_id": request_id,
-                "source_id": source_id,
-                "entity_type": entity_type,
-            },
-            "content": content.strip(),
-            "content_type": "text/turtle",
-        },
-        "timestamp": datetime.now(timezone.utc).isoformat(),
-        "ere_request_id": f"{request_id}:01",
-    }
-
-
-def parse_response(response_bytes: bytes) -> dict:
-    """Parse JSON response from Redis."""
-    return json.loads(response_bytes.decode("utf-8"))
-
-
-# ===============================================================================
-# Demo Data Loading
-# ===============================================================================
-
-
-def load_demo_mentions(data_file: str | None = None) -> list[dict]:
-    """
-    Load demo mentions from a JSON file.
-
-    Args:
-        data_file: Path to JSON file containing mentions. If None, uses default.
-
-    Returns:
-        List of mention dicts with keys: request_id, source_id, entity_type,
-                                         legal_name, country_code, description.
-
-    Raises:
-        FileNotFoundError: If data file does not exist.
-        ValueError: If JSON is invalid or missing 'mentions' key.
-    """
-    if data_file is None:
-        data_file = DEFAULT_DATA_FILE
-
-    data_path = Path(data_file)
-    if not data_path.exists():
-        raise FileNotFoundError(f"Data file not found: {data_path}")
-
-    with open(data_path) as f:
-        data = json.load(f)
-
-    if "mentions" not in data:
-        raise ValueError(f"JSON must contain 'mentions' key")
-
-    return data["mentions"]
-
-
-# ===============================================================================
-# Main Demo
-# ===============================================================================
-
-
-def main(data_file: str | None = None):
-    """
-    Run the Redis-based ERE demo.
-
-    Args:
-        data_file: Path to JSON file containing demo mentions.
-                   If None, uses default (mentions_mixed_countries.json).
-    """
-    logger = setup_logging()
-
-    # Load configuration
-    logger.info("Loading configuration...")
-    config = load_env_file()
-    logger.info(
-        f"Redis config: host={config['REDIS_HOST']}, "
-        f"port={config['REDIS_PORT']}, db={config['REDIS_DB']}"
-    )
-    logger.info(
-        f"Queue names: request={config['REQUEST_QUEUE']}, "
-        f"response={config['RESPONSE_QUEUE']}"
-    )
-
-    # Load demo mentions from JSON
-    try:
-        demo_mentions = load_demo_mentions(data_file)
-        logger.info(
-            f"Loaded {len(demo_mentions)} mentions from {data_file or DEFAULT_DATA_FILE}"
-        )
-    except (FileNotFoundError, ValueError) as e:
-        logger.error(f"Failed to load demo mentions: {e}")
-        return 1
-
-    # Check Redis connectivity
-    logger.info("Checking Redis connectivity...")
-    try:
-        redis_client = check_redis_connectivity(
-            host=config["REDIS_HOST"],
-            port=config["REDIS_PORT"],
-            db=config["REDIS_DB"],
-            password=config["REDIS_PASSWORD"],
-        )
-        logger.info("✓ Redis is available")
-    except RuntimeError as e:
-        logger.error(f"✗ Redis check failed: {e}")
-        return 1
-
-    # Clear queues
-    logger.info("Clearing request and response queues...")
-    redis_client.delete(config["REQUEST_QUEUE"], config["RESPONSE_QUEUE"])
-
-    # ⚠️  Check if DuckDB database is non-empty (stale from prior runs)
-    # This guards against corrupting demo results by mixing old and new mentions
-    duckdb_path = Path(os.environ.get("DUCKDB_PATH", "/data/app.duckdb"))
-    if duckdb_path.exists() and duckdb_path.stat().st_size > 0:
-        logger.warning(
-            f"⚠️  WARNING: DuckDB database file exists and is non-empty!\n"
-            f"      This may contain mentions from a prior run.\n"
-            f"      This will CORRUPT demo results by mixing old and new data.\n"
-            f"      \n"
-            f"      To reset the database:\n"
-            f"      1. docker volume rm ere-local_ere-data\n"
-            f"      2. make infra-rebuild\n"
-        )
-
-    # Send demo requests
-    logger.info(f"Sending {len(demo_mentions)} entity mentions...")
-    request_ids = []
-
-    for mention in demo_mentions:
-        request = create_entity_mention_request(
-            request_id=mention["request_id"],
-            source_id=mention["source_id"],
-            entity_type=mention["entity_type"],
-            legal_name=mention["legal_name"],
-            country_code=mention["country_code"],
-            nuts_code=mention.get("nuts_code"),
-            post_code=mention.get("post_code"),
-            post_name=mention.get("post_name"),
-            thoroughfare=mention.get("thoroughfare"),
-        )
-
-        message_json = json.dumps(request)
-        if logger.isEnabledFor(TRACE):
-            logger.log(TRACE, f"Full request message:\n{json.dumps(request, indent=2)}")
-
-        message_bytes = message_json.encode("utf-8")
-        redis_client.rpush(config["REQUEST_QUEUE"], message_bytes)
-        request_ids.append(mention["request_id"])
-
-        logger.info(
-            f"  → Sent request {mention['request_id']}: "
-            f"{mention['legal_name']} ({mention['country_code']}) "
-            f"[{mention.get('description', '')}]"
-        )
-
-        # Wait 1 second between messages to ensure sequential processing
-        if DELAY_BETWEEN_MESSAGES:
-            time.sleep(1)
-
-    logger.info("")
-    logger.info("Listening for responses...")
-    logger.info("-" * 80)
-
-    # Track mentions for summary: map request_id → (legal_name, cluster_id)
-    mention_tracking = {}
-    for mention in demo_mentions:
-        mention_tracking[mention["request_id"]] = {
-            "legal_name": mention["legal_name"],
-            "cluster_id": None,  # Will be filled in from response
-        }
-
-    # Listen for responses
-    responses_received = {}
-    start_time = time.time()
-
-    while len(responses_received) < len(request_ids):
-        elapsed = time.time() - start_time
-        if GLOBAL_TIMEOUT > 0 and elapsed > GLOBAL_TIMEOUT:
-            logger.warning(
-                f"Timeout after {GLOBAL_TIMEOUT}s. Received {len(responses_received)}/{len(request_ids)} responses."
-            )
-            break
-
-        # Try to get a response with short timeout
-        result = redis_client.brpop(config["RESPONSE_QUEUE"], timeout=1)
-
-        if result is not None:
-            _, response_bytes = result
-            response = parse_response(response_bytes)
-
-            if logger.isEnabledFor(TRACE):
-                logger.log(
-                    TRACE, f"Full response message:\n{json.dumps(response, indent=2)}"
-                )
-
-            req_id = response["entity_mention_id"]["request_id"]
-            responses_received[req_id] = response
-
-            logger.info(f"\n✓ Response received for {req_id}:")
-            logger.info(f"  Type: {response['type']}")
-            logger.info(f"  Timestamp: {response['timestamp']}")
-
-            source_id = response["entity_mention_id"]["source_id"]
-            entity_type = response["entity_mention_id"]["entity_type"]
-            logger.info(f"  Mention: ({source_id}, {req_id}, {entity_type})")
-
-            logger.info(f"  Candidates:")
-
-            # Track the top cluster assignment (first candidate is the assignment)
-            if response.get("candidates"):
-                top_candidate = response["candidates"][0]
-                assigned_cluster = top_candidate["cluster_id"]
-                mention_tracking[req_id]["cluster_id"] = assigned_cluster
-                logger.info(f"  → Assigned to cluster: {assigned_cluster}")
-
-            for i, candidate in enumerate(response.get("candidates", []), 1):
-                logger.info(
-                    f"    {i}. Cluster {candidate['cluster_id']}: "
-                    f"confidence={candidate['confidence_score']:.4f}, "
-                    f"similarity={candidate['similarity_score']:.4f}"
-                )
-
-    logger.info("-" * 80)
-    logger.info(
-        f"\nDemo complete. Received {len(responses_received)}/{len(request_ids)} responses."
-    )
-
-    # Build clustering summary as single block
-    summary_lines = []
-    summary_lines.append("=" * 80)
-    summary_lines.append("CLUSTERING SUMMARY")
-    summary_lines.append("=" * 80)
-
-    # Group mentions by assigned cluster
-    clusters = {}
-    unassigned = []
-
-    for req_id in request_ids:
-        tracking = mention_tracking.get(req_id)
-        if tracking:
-            cluster_id = tracking["cluster_id"]
-            legal_name = tracking["legal_name"]
-
-            if cluster_id is None:
-                unassigned.append((req_id, legal_name))
-            else:
-                if cluster_id not in clusters:
-                    clusters[cluster_id] = []
-                clusters[cluster_id].append((req_id, legal_name))
-
-    # Build cluster output
-    if clusters:
-        for cluster_id in sorted(clusters.keys()):
-            members = clusters[cluster_id]
-            summary_lines.append("")
-            summary_lines.append(f"{cluster_id} ({len(members)} members):")
-            for req_id, legal_name in members:
-                summary_lines.append(f"  {req_id:4s} | {legal_name}")
-    else:
-        summary_lines.append("")
-        summary_lines.append("(No clusters formed)")
-
-    # Add unassigned mentions
-    if unassigned:
-        summary_lines.append("")
-        summary_lines.append(f"Unassigned ({len(unassigned)} mentions):")
-        for req_id, legal_name in unassigned:
-            summary_lines.append(f"  {req_id:4s} | {legal_name}")
-
-    summary_lines.append("=" * 80)
-
-    # Print entire summary in one log call
-    summary_block = "\n".join(summary_lines)
-    logger.info(f"\n{summary_block}")
-
-    # Summary
-    if len(responses_received) == len(request_ids):
-        logger.info("✓ All responses received successfully!")
-        return 0
-    else:
-        logger.warning(
-            f"✗ Missing {len(request_ids) - len(responses_received)} response(s)."
-        )
-        return 1
-
-
-if __name__ == "__main__":
-    import argparse
-
-    parser = argparse.ArgumentParser(
-        description="Redis-based ERE demo with parametrized mentions data."
-    )
-    parser.add_argument(
-        "--data",
-        type=str,
-        default=None,
-        help=f"Path to JSON file with demo mentions (default: {DEFAULT_DATA_FILE})",
-    )
-    args = parser.parse_args()
-
-    sys.exit(main(data_file=args.data))
+#!/usr/bin/env python3
+"""
+Demo: Indirect Redis client for ERE (Entity Resolution Engine).
+
+This demo connects to ERE through the Redis queue infrastructure (no direct Python API).
+It demonstrates:
+1. Checking Redis connectivity
+2. Sending EntityMentionResolutionRequest messages to the queue
+3. Listening for EntityMentionResolutionResponse messages
+4. Logging all interactions
+
+The example uses 6 synthetic mentions from ALGORITHM.md that cluster into 2 groups:
+  - Cluster 1: {1, 2, 5}  (organizations with high similarity)
+  - Cluster 2: {3, 4, 6}  (different organizations, also highly similar)
+
+⚠️  IMPORTANT: The ERE resolver persists state in a DuckDB database volume.
+    Before running a fresh demo with different data, clear the old database:
+
+    docker volume rm ere-local_ere-data
+    make infra-rebuild
+
+    Failure to do so will mix old mentions with new ones, corrupting demo results.
+"""
+
+import json
+import logging
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+import redis
+
+# Default data file path
+DEFAULT_DATA_FILE = Path(__file__).parent / "data" / "org-tiny.json"
+
+DELAY_BETWEEN_MESSAGES = (
+    0  # seconds to wait between sending messages (set to >0 for sequential processing)
+)
+GLOBAL_TIMEOUT = 0  # seconds to wait for responses before giving up (0 = no timeout)
+
+
+# ===============================================================================
+# Configuration
+# ===============================================================================
+
+
+def load_env_file(env_path: str = None) -> dict:
+    """Load configuration from .env or environment variables."""
+    config = {}
+
+    # Try to load from .env if it exists
+    if env_path is None:
+        env_path = Path(__file__).parent.parent / "infra" / ".env"
+
+    if Path(env_path).exists():
+        with open(env_path) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith("#"):
+                    if "=" in line:
+                        key, value = line.split("=", 1)
+                        config[key.strip()] = value.strip()
+
+    # Environment variables override .env
+    config["REDIS_HOST"] = os.environ.get(
+        "REDIS_HOST", config.get("REDIS_HOST", "localhost")
+    )
+    config["REDIS_PORT"] = int(
+        os.environ.get("REDIS_PORT", config.get("REDIS_PORT", "6379"))
+    )
+    config["REDIS_DB"] = int(os.environ.get("REDIS_DB", config.get("REDIS_DB", "0")))
+    config["REDIS_PASSWORD"] = os.environ.get(
+        "REDIS_PASSWORD", config.get("REDIS_PASSWORD")
+    )
+    config["REQUEST_QUEUE"] = os.environ.get(
+        "REQUEST_QUEUE", config.get("REQUEST_QUEUE", "ere_requests")
+    )
+    config["RESPONSE_QUEUE"] = os.environ.get(
+        "RESPONSE_QUEUE", config.get("RESPONSE_QUEUE", "ere_responses")
+    )
+
+    return config
+
+
+# ===============================================================================
+# Logging Setup
+# ===============================================================================
+
+TRACE = 5
+
+
+def setup_logging():
+    """Configure logging with timestamps."""
+    log_level_name = os.environ.get("LOG_LEVEL", "INFO").upper()
+
+    # Handle custom TRACE level
+    if log_level_name == "TRACE":
+        log_level = TRACE
+        logging.addLevelName(TRACE, "TRACE")
+    else:
+        log_level = getattr(logging, log_level_name, logging.INFO)
+
+    logging.basicConfig(
+        level=log_level,
+        format="%(asctime)s [%(levelname)s] %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+
+    logger = logging.getLogger(__name__)
+    logger.setLevel(log_level)
+    logger.info(f"Logging configured at level {log_level_name}")
+
+    return logger
+
+
+# ===============================================================================
+# Redis Connection
+# ===============================================================================
+
+
+def check_redis_connectivity(
+    host: str, port: int, db: int, password: str
+) -> redis.Redis:
+    """
+    Check Redis connectivity and return client.
+
+    Attempts connection to specified host first, then fallback to localhost
+    if configured host is "redis" (Docker).
+
+    Raises:
+        RuntimeError: If Redis is not accessible.
+    """
+    hosts_to_try = [host]
+
+    # Fallback: if configured host is "redis" (Docker), also try localhost
+    if host == "redis":
+        hosts_to_try.append("localhost")
+
+    last_error = None
+    for try_host in hosts_to_try:
+        try:
+            logging.getLogger(__name__).info(
+                f"Attempting Redis connection to {try_host}:{port}..."
+            )
+            client = redis.Redis(
+                host=try_host,
+                port=port,
+                db=db,
+                password=password,
+                decode_responses=False,
+            )
+            client.ping()
+            return client
+        except Exception as e:
+            last_error = e
+            continue
+
+    raise RuntimeError(
+        f"Redis unavailable. Tried hosts: {hosts_to_try}, port: {port}, db: {db}"
+    ) from last_error
+
+
+# ===============================================================================
+# Request/Response Handling
+# ===============================================================================
+
+
+def escape_turtle_string(value: str) -> str:
+    """
+    Escape a string for safe inclusion in Turtle RDF format.
+
+    Handles special characters: backslash, double quotes, newlines, carriage returns, tabs.
+
+    Args:
+        value: String to escape
+
+    Returns:
+        Escaped string safe for use in Turtle string literals
+    """
+    if not value:
+        return value
+
+    # Escape backslash first (must be done before other escapes)
+    value = value.replace("\\", "\\\\")
+    # Escape double quotes
+    value = value.replace('"', '\\"')
+    # Escape newlines
+    value = value.replace("\n", "\\n")
+    # Escape carriage returns
+    value = value.replace("\r", "\\r")
+    # Escape tabs
+    value = value.replace("\t", "\\t")
+
+    return value
+
+
+def create_entity_mention_request(
+    request_id: str,
+    source_id: str,
+    entity_type: str,
+    legal_name: str,
+    country_code: str,
+    nuts_code: str | None = None,
+    post_code: str | None = None,
+    post_name: str | None = None,
+    thoroughfare: str | None = None,
+) -> dict:
+    """
+    Create an EntityMentionResolutionRequest payload.
+
+    Uses RDF/Turtle format with entity metadata including extended address fields.
+    All string values are properly escaped for Turtle compatibility.
+
+    Args:
+        request_id: Unique request identifier
+        source_id: Source system identifier
+        entity_type: Entity type (e.g., ORGANISATION)
+        legal_name: Legal name of the entity
+        country_code: ISO 2-letter country code
+        nuts_code: Optional NUTS regional code
+        post_code: Optional postal code
+        post_name: Optional city/locality name
+        thoroughfare: Optional street address
+    """
+    # Escape all string values for Turtle safety
+    legal_name_safe = escape_turtle_string(legal_name or "")
+    country_code_safe = escape_turtle_string(country_code or "")
+
+    # Build address properties dynamically
+    address_props = [f'epo:hasCountryCode "{country_code_safe}"']
+    if nuts_code:
+        nuts_code_safe = escape_turtle_string(nuts_code)
+        address_props.append(f'epo:hasNutsCode "{nuts_code_safe}"')
+    if post_code:
+        post_code_safe = escape_turtle_string(post_code)
+        address_props.append(f'locn:postCode "{post_code_safe}"')
+    if post_name:
+        post_name_safe = escape_turtle_string(post_name)
+        address_props.append(f'locn:postName "{post_name_safe}"')
+    if thoroughfare:
+        thoroughfare_safe = escape_turtle_string(thoroughfare)
+        address_props.append(f'locn:thoroughfare "{thoroughfare_safe}"')
+
+    address_content = " ;\n        ".join(address_props)
+
+    content = f"""@prefix org: <http://www.w3.org/ns/org#> .
+@prefix cccev: <http://data.europa.eu/m8g/> .
+@prefix epo: <http://data.europa.eu/a4g/ontology#> .
+@prefix locn: <http://www.w3.org/ns/locn#> .
+@prefix epd: <http://data.europa.eu/a4g/resource/> .
+
+epd:ent{request_id} a org:Organization ;
+    epo:hasLegalName "{legal_name_safe}" ;
+    cccev:registeredAddress [
+        {address_content}
+    ] .
+"""
+
+    return {
+        "type": "EntityMentionResolutionRequest",
+        "entity_mention": {
+            "identifiedBy": {
+                "request_id": request_id,
+                "source_id": source_id,
+                "entity_type": entity_type,
+            },
+            "content": content.strip(),
+            "content_type": "text/turtle",
+        },
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "ere_request_id": f"{request_id}:01",
+    }
+
+
+def parse_response(response_bytes: bytes) -> dict:
+    """Parse JSON response from Redis."""
+    return json.loads(response_bytes.decode("utf-8"))
+
+
+# ===============================================================================
+# Demo Data Loading
+# ===============================================================================
+
+
+def load_demo_mentions(data_file: str | None = None) -> list[dict]:
+    """
+    Load demo mentions from a JSON file.
+
+    Args:
+        data_file: Path to JSON file containing mentions. If None, uses default.
+
+    Returns:
+        List of mention dicts with keys: request_id, source_id, entity_type,
+                                         legal_name, country_code, description.
+
+    Raises:
+        FileNotFoundError: If data file does not exist.
+        ValueError: If JSON is invalid or missing 'mentions' key.
+    """
+    if data_file is None:
+        data_file = DEFAULT_DATA_FILE
+
+    data_path = Path(data_file)
+    if not data_path.exists():
+        raise FileNotFoundError(f"Data file not found: {data_path}")
+
+    with open(data_path) as f:
+        data = json.load(f)
+
+    if "mentions" not in data:
+        raise ValueError(f"JSON must contain 'mentions' key")
+
+    return data["mentions"]
+
+
+# ===============================================================================
+# Main Demo
+# ===============================================================================
+
+
+def main(data_file: str | None = None):
+    """
+    Run the Redis-based ERE demo.
+
+    Args:
+        data_file: Path to JSON file containing demo mentions.
+                   If None, uses default (mentions_mixed_countries.json).
+    """
+    logger = setup_logging()
+
+    # Load configuration
+    logger.info("Loading configuration...")
+    config = load_env_file()
+    logger.info(
+        f"Redis config: host={config['REDIS_HOST']}, "
+        f"port={config['REDIS_PORT']}, db={config['REDIS_DB']}"
+    )
+    logger.info(
+        f"Queue names: request={config['REQUEST_QUEUE']}, "
+        f"response={config['RESPONSE_QUEUE']}"
+    )
+
+    # Load demo mentions from JSON
+    try:
+        demo_mentions = load_demo_mentions(data_file)
+        logger.info(
+            f"Loaded {len(demo_mentions)} mentions from {data_file or DEFAULT_DATA_FILE}"
+        )
+    except (FileNotFoundError, ValueError) as e:
+        logger.error(f"Failed to load demo mentions: {e}")
+        return 1
+
+    # Check Redis connectivity
+    logger.info("Checking Redis connectivity...")
+    try:
+        redis_client = check_redis_connectivity(
+            host=config["REDIS_HOST"],
+            port=config["REDIS_PORT"],
+            db=config["REDIS_DB"],
+            password=config["REDIS_PASSWORD"],
+        )
+        logger.info("✓ Redis is available")
+    except RuntimeError as e:
+        logger.error(f"✗ Redis check failed: {e}")
+        return 1
+
+    # Clear queues
+    logger.info("Clearing request and response queues...")
+    redis_client.delete(config["REQUEST_QUEUE"], config["RESPONSE_QUEUE"])
+
+    # ⚠️  Check if DuckDB database is non-empty (stale from prior runs)
+    # This guards against corrupting demo results by mixing old and new mentions
+    duckdb_path = Path(os.environ.get("DUCKDB_PATH", "/data/app.duckdb"))
+    if duckdb_path.exists() and duckdb_path.stat().st_size > 0:
+        logger.warning(
+            f"⚠️  WARNING: DuckDB database file exists and is non-empty!\n"
+            f"      This may contain mentions from a prior run.\n"
+            f"      This will CORRUPT demo results by mixing old and new data.\n"
+            f"      \n"
+            f"      To reset the database:\n"
+            f"      1. docker volume rm ere-local_ere-data\n"
+            f"      2. make infra-rebuild\n"
+        )
+
+    # Send demo requests
+    logger.info(f"Sending {len(demo_mentions)} entity mentions...")
+    request_ids = []
+
+    for mention in demo_mentions:
+        request = create_entity_mention_request(
+            request_id=mention["request_id"],
+            source_id=mention["source_id"],
+            entity_type=mention["entity_type"],
+            legal_name=mention["legal_name"],
+            country_code=mention["country_code"],
+            nuts_code=mention.get("nuts_code"),
+            post_code=mention.get("post_code"),
+            post_name=mention.get("post_name"),
+            thoroughfare=mention.get("thoroughfare"),
+        )
+
+        message_json = json.dumps(request)
+        if logger.isEnabledFor(TRACE):
+            logger.log(TRACE, f"Full request message:\n{json.dumps(request, indent=2)}")
+
+        message_bytes = message_json.encode("utf-8")
+        redis_client.rpush(config["REQUEST_QUEUE"], message_bytes)
+        request_ids.append(mention["request_id"])
+
+        logger.info(
+            f"  → Sent request {mention['request_id']}: "
+            f"{mention['legal_name']} ({mention['country_code']}) "
+            f"[{mention.get('description', '')}]"
+        )
+
+        # Wait 1 second between messages to ensure sequential processing
+        if DELAY_BETWEEN_MESSAGES:
+            time.sleep(1)
+
+    logger.info("")
+    logger.info("Listening for responses...")
+    logger.info("-" * 80)
+
+    # Track mentions for summary: map request_id → (legal_name, cluster_id)
+    mention_tracking = {}
+    for mention in demo_mentions:
+        mention_tracking[mention["request_id"]] = {
+            "legal_name": mention["legal_name"],
+            "cluster_id": None,  # Will be filled in from response
+        }
+
+    # Listen for responses
+    responses_received = {}
+    start_time = time.time()
+
+    while len(responses_received) < len(request_ids):
+        elapsed = time.time() - start_time
+        if GLOBAL_TIMEOUT > 0 and elapsed > GLOBAL_TIMEOUT:
+            logger.warning(
+                f"Timeout after {GLOBAL_TIMEOUT}s. Received {len(responses_received)}/{len(request_ids)} responses."
+            )
+            break
+
+        # Try to get a response with short timeout
+        result = redis_client.brpop(config["RESPONSE_QUEUE"], timeout=1)
+
+        if result is not None:
+            _, response_bytes = result
+            response = parse_response(response_bytes)
+
+            if logger.isEnabledFor(TRACE):
+                logger.log(
+                    TRACE, f"Full response message:\n{json.dumps(response, indent=2)}"
+                )
+
+            req_id = response["entity_mention_id"]["request_id"]
+            responses_received[req_id] = response
+
+            logger.info(f"\n✓ Response received for {req_id}:")
+            logger.info(f"  Type: {response['type']}")
+            logger.info(f"  Timestamp: {response['timestamp']}")
+
+            source_id = response["entity_mention_id"]["source_id"]
+            entity_type = response["entity_mention_id"]["entity_type"]
+            logger.info(f"  Mention: ({source_id}, {req_id}, {entity_type})")
+
+            logger.info(f"  Candidates:")
+
+            # Track the top cluster assignment (first candidate is the assignment)
+            if response.get("candidates"):
+                top_candidate = response["candidates"][0]
+                assigned_cluster = top_candidate["cluster_id"]
+                mention_tracking[req_id]["cluster_id"] = assigned_cluster
+                logger.info(f"  → Assigned to cluster: {assigned_cluster}")
+
+            for i, candidate in enumerate(response.get("candidates", []), 1):
+                logger.info(
+                    f"    {i}. Cluster {candidate['cluster_id']}: "
+                    f"confidence={candidate['confidence_score']:.4f}, "
+                    f"similarity={candidate['similarity_score']:.4f}"
+                )
+
+    logger.info("-" * 80)
+    logger.info(
+        f"\nDemo complete. Received {len(responses_received)}/{len(request_ids)} responses."
+    )
+
+    # Build clustering summary as single block
+    summary_lines = []
+    summary_lines.append("=" * 80)
+    summary_lines.append("CLUSTERING SUMMARY")
+    summary_lines.append("=" * 80)
+
+    # Group mentions by assigned cluster
+    clusters = {}
+    unassigned = []
+
+    for req_id in request_ids:
+        tracking = mention_tracking.get(req_id)
+        if tracking:
+            cluster_id = tracking["cluster_id"]
+            legal_name = tracking["legal_name"]
+
+            if cluster_id is None:
+                unassigned.append((req_id, legal_name))
+            else:
+                if cluster_id not in clusters:
+                    clusters[cluster_id] = []
+                clusters[cluster_id].append((req_id, legal_name))
+
+    # Build cluster output
+    if clusters:
+        for cluster_id in sorted(clusters.keys()):
+            members = clusters[cluster_id]
+            summary_lines.append("")
+            summary_lines.append(f"{cluster_id} ({len(members)} members):")
+            for req_id, legal_name in members:
+                summary_lines.append(f"  {req_id:4s} | {legal_name}")
+    else:
+        summary_lines.append("")
+        summary_lines.append("(No clusters formed)")
+
+    # Add unassigned mentions
+    if unassigned:
+        summary_lines.append("")
+        summary_lines.append(f"Unassigned ({len(unassigned)} mentions):")
+        for req_id, legal_name in unassigned:
+            summary_lines.append(f"  {req_id:4s} | {legal_name}")
+
+    summary_lines.append("=" * 80)
+
+    # Print entire summary in one log call
+    summary_block = "\n".join(summary_lines)
+    logger.info(f"\n{summary_block}")
+
+    # Summary
+    if len(responses_received) == len(request_ids):
+        logger.info("✓ All responses received successfully!")
+        return 0
+    else:
+        logger.warning(
+            f"✗ Missing {len(request_ids) - len(responses_received)} response(s)."
+        )
+        return 1
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Redis-based ERE demo with parametrized mentions data."
+    )
+    parser.add_argument(
+        "--data",
+        type=str,
+        default=None,
+        help=f"Path to JSON file with demo mentions (default: {DEFAULT_DATA_FILE})",
+    )
+    args = parser.parse_args()
+
+    sys.exit(main(data_file=args.data))

From def5ab0253afe5553a11bbbe5c4af9aef6988a65 Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 16:42:10 +0200
Subject: [PATCH 08/14] using develop branch of ers-spec for now

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7520dbf..e4e5579 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ pandas = ">=2.0,<3.0"
 splink = ">=4.0,<5.0"
 
 # TODO: should we have a registry?
-ers-spec = { git = "https://github.com/OP-TED/entity-resolution-spec.git", branch = "0.3.0-rc.1" }
+ers-spec = { git = "https://github.com/OP-TED/entity-resolution-spec.git", branch = "develop" }
 
 
 [tool.pytest.ini_options]

From c0607a699741b32149a4de5f6234874f70fc2cc7 Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 17:59:25 +0200
Subject: [PATCH 09/14] test: add unit tests to reach 85% coverage threshold
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the SonarQube quality gate failure from PR#21 (69% coverage on new code).

New test modules:
- test/unit/adapters/test_utils.py: message parsing (get_request/response_from_message)
- test/unit/adapters/test_adapter_factories.py: build_rdf_mapper factory
- test/unit/entrypoints/test_queue_worker.py: RedisQueueWorker with mocked Redis
- test/unit/utils/test_logging.py: configure_logging and TRACE level
- test/unit/services/test_services_factories.py: build_entity_resolver (in-memory + persistent DuckDB)
- test/unit/test_models.py: MentionLink/ResolutionResult edge cases + app.main() failure paths

Extended test files:
- stubs.py: add StubRDFMapper and find_by_id to InMemoryMentionRepository
- test_entity_resolution_service.py: EntityResolutionService process_request paths
- test_duckdb_adapters.py: load_all and save_all([]) coverage

Coverage: 61% → 85% (unit + BDD combined)
---
 test/unit/adapters/stubs.py                   |  31 +++++
 test/unit/adapters/test_adapter_factories.py  |  18 +++
 test/unit/adapters/test_duckdb_adapters.py    |  26 ++++
 test/unit/adapters/test_utils.py              |  74 +++++++++++
 test/unit/entrypoints/__init__.py             |   0
 test/unit/entrypoints/test_queue_worker.py    | 124 ++++++++++++++++++
 .../test_entity_resolution_service.py         | 119 ++++++++++++++++-
 test/unit/services/test_services_factories.py |  67 ++++++++++
 test/unit/test_models.py                      | 111 ++++++++++++++++
 test/unit/utils/__init__.py                   |   0
 test/unit/utils/test_logging.py               |  54 ++++++++
 11 files changed, 622 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/adapters/test_adapter_factories.py
 create mode 100644 test/unit/adapters/test_utils.py
 create mode 100644 test/unit/entrypoints/__init__.py
 create mode 100644 test/unit/entrypoints/test_queue_worker.py
 create mode 100644 test/unit/services/test_services_factories.py
 create mode 100644 test/unit/test_models.py
 create mode 100644 test/unit/utils/__init__.py
 create mode 100644 test/unit/utils/test_logging.py

diff --git a/test/unit/adapters/stubs.py b/test/unit/adapters/stubs.py
index 2b7741b..d2054bc 100644
--- a/test/unit/adapters/stubs.py
+++ b/test/unit/adapters/stubs.py
@@ -2,6 +2,9 @@
 
 from typing import Protocol, runtime_checkable
 
+from erspec.models.core import EntityMention
+
+from ere.adapters.rdf_mapper_port import RDFMapper
 from ere.models.resolver import (
     ClusterId,
     ClusterMembership,
@@ -78,6 +81,9 @@ def save(self, mention: Mention) -> None:
     def load_all(self) -> list[Mention]:
         return list(self._mentions.values())
 
+    def find_by_id(self, mention_id: MentionId) -> Mention | None:
+        return self._mentions.get(mention_id)
+
     def count(self) -> int:
         return len(self._mentions)
 
@@ -193,3 +199,28 @@ def register_mention(self, mention: Mention) -> None:
     def train(self) -> None:
         """No-op for fixed linker (scores are pre-configured)."""
         pass
+
+
+class StubRDFMapper(RDFMapper):
+    """
+    RDFMapper stub for unit testing.
+
+    Returns a pre-configured Mention without performing any RDF parsing.
+    Optionally raises a configured exception to test error paths.
+    """
+
+    def __init__(
+        self,
+        mention_to_return: Mention = None,
+        error: Exception = None,
+    ):
+        self._mention = mention_to_return or Mention(
+            id=MentionId(value="stub-mention-id"),
+            attributes={"legal_name": "Stub Corp", "country_code": "US"},
+        )
+        self._error = error
+
+    def map_entity_mention_to_domain(self, entity_mention: EntityMention) -> Mention:
+        if self._error is not None:
+            raise self._error
+        return self._mention
diff --git a/test/unit/adapters/test_adapter_factories.py b/test/unit/adapters/test_adapter_factories.py
new file mode 100644
index 0000000..e339df4
--- /dev/null
+++ b/test/unit/adapters/test_adapter_factories.py
@@ -0,0 +1,18 @@
+"""Unit tests for adapters.factories: RDFMapper construction."""
+
+from pathlib import Path
+
+from ere.adapters.factories import build_rdf_mapper
+from ere.adapters.rdf_mapper_port import RDFMapper
+
+TEST_RDF_MAPPING = Path(__file__).parent.parent.parent / "resources" / "rdf_mapping.yaml"
+
+
+def test_build_rdf_mapper_with_explicit_path_returns_mapper():
+    mapper = build_rdf_mapper(rdf_mapping_path=TEST_RDF_MAPPING)
+    assert isinstance(mapper, RDFMapper)
+
+
+def test_build_rdf_mapper_without_path_uses_default():
+    mapper = build_rdf_mapper()
+    assert isinstance(mapper, RDFMapper)
diff --git a/test/unit/adapters/test_duckdb_adapters.py b/test/unit/adapters/test_duckdb_adapters.py
index 03f5b79..fa6cb4b 100644
--- a/test/unit/adapters/test_duckdb_adapters.py
+++ b/test/unit/adapters/test_duckdb_adapters.py
@@ -244,3 +244,29 @@ def test_cluster_membership_mapping(service, con):
     assert len(memberships[cluster_id]) == 2
     assert MentionId(value="m1") in memberships[cluster_id]
     assert MentionId(value="m2") in memberships[cluster_id]
+
+
+def test_mention_repository_load_all_returns_persisted_mentions(con, entity_fields):
+    """load_all should return all mentions previously saved."""
+    repo = DuckDBMentionRepository(con, entity_fields)
+    m1 = Mention(id=MentionId(value="la1"), attributes={"legal_name": "Alpha", "country_code": "DE"})
+    m2 = Mention(id=MentionId(value="la2"), attributes={"legal_name": "Beta", "country_code": "FR"})
+
+    repo.save(m1)
+    repo.save(m2)
+
+    loaded = repo.load_all()
+
+    assert len(loaded) == 2
+    ids = {m.id.value for m in loaded}
+    assert ids == {"la1", "la2"}
+
+
+def test_similarity_repository_save_all_empty_is_noop(con):
+    """save_all with an empty list should not raise and not write any rows."""
+    repo = DuckDBSimilarityRepository(con)
+
+    repo.save_all([])  # must not raise
+
+    count = con.execute("SELECT COUNT(*) FROM similarities").fetchone()[0]
+    assert count == 0
diff --git a/test/unit/adapters/test_utils.py b/test/unit/adapters/test_utils.py
new file mode 100644
index 0000000..80fb431
--- /dev/null
+++ b/test/unit/adapters/test_utils.py
@@ -0,0 +1,74 @@
+"""Unit tests for adapters.utils: message parsing utilities."""
+
+import json
+from datetime import datetime, timezone
+
+import pytest
+from erspec.models.core import EntityMention, EntityMentionIdentifier
+from erspec.models.ere import (
+    EREErrorResponse,
+    EntityMentionResolutionRequest,
+    EntityMentionResolutionResponse,
+)
+from linkml_runtime.dumpers import JSONDumper
+
+from ere.adapters.utils import (
+    get_message_object,
+    get_request_from_message,
+    get_response_from_message,
+)
+
+_dumper = JSONDumper()
+
+
+def _make_request(request_id: str = "utils-test-001") -> EntityMentionResolutionRequest:
+    return EntityMentionResolutionRequest(
+        entity_mention=EntityMention(
+            identifiedBy=EntityMentionIdentifier(
+                request_id=request_id,
+                source_id="utils-test-src",
+                entity_type="http://test.org/Org",
+            ),
+            content_type="text/turtle",
+            content="<>",
+        ),
+        ere_request_id=request_id,
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+
+
+def _serialise(obj) -> bytes:
+    return _dumper.dumps(obj).encode("utf-8")
+
+
+def test_get_request_from_message_returns_request():
+    raw = _serialise(_make_request("req-parse-01"))
+    result = get_request_from_message(raw)
+    assert isinstance(result, EntityMentionResolutionRequest)
+    assert result.ere_request_id == "req-parse-01"
+
+
+def test_get_response_from_message_returns_error_response():
+    response = EREErrorResponse(
+        ere_request_id="resp-parse-01",
+        error_type="TestError",
+        error_title="Test",
+        error_detail="detail",
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+    raw = _serialise(response)
+    result = get_response_from_message(raw)
+    assert isinstance(result, EREErrorResponse)
+    assert result.ere_request_id == "resp-parse-01"
+
+
+def test_get_message_object_raises_on_missing_type():
+    raw = json.dumps({"ere_request_id": "no-type"}).encode("utf-8")
+    with pytest.raises(ValueError, match="message without 'type' field"):
+        get_message_object(raw, {})
+
+
+def test_get_message_object_raises_on_unsupported_type():
+    raw = json.dumps({"type": "UnknownClass", "ere_request_id": "x"}).encode("utf-8")
+    with pytest.raises(ValueError, match='unsupported message class: "UnknownClass"'):
+        get_message_object(raw, {})
diff --git a/test/unit/entrypoints/__init__.py b/test/unit/entrypoints/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/unit/entrypoints/test_queue_worker.py b/test/unit/entrypoints/test_queue_worker.py
new file mode 100644
index 0000000..b55f143
--- /dev/null
+++ b/test/unit/entrypoints/test_queue_worker.py
@@ -0,0 +1,124 @@
+"""Unit tests for RedisQueueWorker entrypoint (mocked Redis and service)."""
+
+import json
+from datetime import datetime, timezone
+from unittest.mock import MagicMock
+
+import pytest
+from erspec.models.core import EntityMention, EntityMentionIdentifier
+from erspec.models.ere import (
+    EREErrorResponse,
+    EntityMentionResolutionRequest,
+    EntityMentionResolutionResponse,
+)
+from linkml_runtime.dumpers import JSONDumper
+
+from ere.entrypoints.queue_worker import RedisQueueWorker
+
+_dumper = JSONDumper()
+
+
+def _make_request(request_id: str = "qw-test-001") -> EntityMentionResolutionRequest:
+    return EntityMentionResolutionRequest(
+        entity_mention=EntityMention(
+            identifiedBy=EntityMentionIdentifier(
+                request_id=request_id,
+                source_id="qw-src",
+                entity_type="http://test.org/Org",
+            ),
+            content_type="text/turtle",
+            content="<>",
+        ),
+        ere_request_id=request_id,
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+
+
+def _make_response(request_id: str = "qw-test-001") -> EntityMentionResolutionResponse:
+    return EntityMentionResolutionResponse(
+        entity_mention_id=EntityMentionIdentifier(
+            request_id=request_id,
+            source_id="qw-src",
+            entity_type="http://test.org/Org",
+        ),
+        candidates=[],
+        ere_request_id=request_id,
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+
+
+@pytest.fixture
+def mock_redis():
+    return MagicMock()
+
+
+@pytest.fixture
+def mock_service():
+    return MagicMock()
+
+
+@pytest.fixture
+def worker(mock_redis, mock_service) -> RedisQueueWorker:
+    return RedisQueueWorker(
+        redis_client=mock_redis,
+        entity_resolution_service=mock_service,
+        request_queue="ere_requests",
+        response_queue="ere_responses",
+        queue_timeout=1,
+    )
+
+
+def test_process_single_message_returns_false_on_timeout(worker, mock_redis):
+    mock_redis.brpop.return_value = None
+
+    result = worker.process_single_message()
+
+    assert result is False
+
+
+def test_process_single_message_returns_true_on_success(worker, mock_redis, mock_service):
+    request = _make_request("qw-happy")
+    raw_msg = _dumper.dumps(request).encode("utf-8")
+    mock_redis.brpop.return_value = ("ere_requests", raw_msg)
+    mock_service.process_request.return_value = _make_response("qw-happy")
+
+    result = worker.process_single_message()
+
+    assert result is True
+    mock_service.process_request.assert_called_once()
+    mock_redis.lpush.assert_called_once()
+
+
+def test_process_single_message_sends_error_response_on_parse_failure(
+    worker, mock_redis, mock_service
+):
+    mock_redis.brpop.return_value = ("ere_requests", b"not valid json at all")
+
+    result = worker.process_single_message()
+
+    assert result is True
+    mock_redis.lpush.assert_called_once()
+    pushed_payload = mock_redis.lpush.call_args[0][1]
+    pushed_json = json.loads(pushed_payload)
+    assert pushed_json.get("error_type") == "ProcessingError"
+
+
+def test_send_response_logs_error_on_redis_failure(worker, mock_redis):
+    mock_redis.lpush.side_effect = ConnectionError("redis down")
+    response = EREErrorResponse(
+        ere_request_id="err-resp",
+        error_type="TestError",
+        error_title="Test",
+        error_detail="detail",
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+    worker._send_response(response)  # must not raise
+
+
+def test_build_error_response_returns_ere_error_response():
+    response = RedisQueueWorker._build_error_response("something broke", "req-err")
+
+    assert isinstance(response, EREErrorResponse)
+    assert response.ere_request_id == "req-err"
+    assert response.error_type == "ProcessingError"
+    assert "something broke" in response.error_detail
diff --git a/test/unit/services/test_entity_resolution_service.py b/test/unit/services/test_entity_resolution_service.py
index 0948f06..617cd42 100644
--- a/test/unit/services/test_entity_resolution_service.py
+++ b/test/unit/services/test_entity_resolution_service.py
@@ -1,6 +1,14 @@
-"""Unit tests for EntityResolver (no DuckDB, no Splink)."""
+"""Unit tests for EntityResolver and EntityResolutionService (no DuckDB, no Splink)."""
 
 import pytest
+from datetime import datetime, timezone
+
+from erspec.models.core import EntityMention, EntityMentionIdentifier
+from erspec.models.ere import (
+    EREErrorResponse,
+    EntityMentionResolutionRequest,
+    EntityMentionResolutionResponse,
+)
 
 from ere.models.resolver import (
     ClusterId,
@@ -8,13 +16,18 @@
     MentionId,
     MentionLink,
 )
-from ere.services.entity_resolution_service import EntityResolver
+from ere.services.entity_resolution_service import (
+    EntityResolutionService,
+    EntityResolver,
+    resolve_entity_mention,
+)
 from ere.services.resolver_config import DuckDBConfig, ResolverConfig
 from test.unit.adapters.stubs import (
     FixedSimilarityLinker,
     InMemoryClusterRepository,
     InMemoryMentionRepository,
     InMemorySimilarityRepository,
+    StubRDFMapper,
 )
 
 
@@ -484,3 +497,105 @@ def test_multiple_independent_clusters(service):
     state = service.state()
     assert state.cluster_count == 3
     assert state.mention_count == 3
+
+
+# ===============================================================================
+# resolve_entity_mention guard tests
+# ===============================================================================
+
+
+def test_resolve_entity_mention_raises_when_resolver_is_none():
+    mention = EntityMention(
+        identifiedBy=EntityMentionIdentifier(
+            request_id="m1",
+            source_id="src",
+            entity_type="http://test.org/Org",
+        ),
+        content_type="text/turtle",
+        content="<>",
+    )
+    with pytest.raises(ValueError, match="resolver must be provided"):
+        resolve_entity_mention(mention, resolver=None, mapper=StubRDFMapper())
+
+
+def test_resolve_entity_mention_raises_when_mapper_is_none(service):
+    mention = EntityMention(
+        identifiedBy=EntityMentionIdentifier(
+            request_id="m1",
+            source_id="src",
+            entity_type="http://test.org/Org",
+        ),
+        content_type="text/turtle",
+        content="<>",
+    )
+    with pytest.raises(ValueError, match="mapper must be provided"):
+        resolve_entity_mention(mention, resolver=service, mapper=None)
+
+
+# ===============================================================================
+# EntityResolutionService tests
+# ===============================================================================
+
+
+@pytest.fixture
+def stub_mapper() -> StubRDFMapper:
+    return StubRDFMapper()
+
+
+@pytest.fixture
+def resolution_service(service: EntityResolver, stub_mapper: StubRDFMapper) -> EntityResolutionService:
+    return EntityResolutionService(resolver=service, mapper=stub_mapper)
+
+
+def _make_request(request_id: str = "req-001") -> EntityMentionResolutionRequest:
+    return EntityMentionResolutionRequest(
+        entity_mention=EntityMention(
+            identifiedBy=EntityMentionIdentifier(
+                request_id=request_id,
+                source_id="test-src",
+                entity_type="http://test.org/Org",
+            ),
+            content_type="text/turtle",
+            content="<>",
+        ),
+        ere_request_id=request_id,
+        timestamp=datetime.now(timezone.utc).isoformat(),
+    )
+
+
+def test_process_request_unsupported_type_returns_error_response(resolution_service):
+    class UnknownRequest:
+        ere_request_id = "unknown-001"
+
+    response = resolution_service.process_request(UnknownRequest())
+
+    assert isinstance(response, EREErrorResponse)
+    assert response.error_type == "UnsupportedRequestType"
+
+
+def test_process_request_happy_path_returns_resolution_response(resolution_service):
+    request = _make_request("req-happy")
+
+    response = resolution_service.process_request(request)
+
+    assert isinstance(response, EntityMentionResolutionResponse)
+    assert response.ere_request_id == "req-happy"
+    assert len(response.candidates) >= 1
+
+
+def test_process_request_mapper_error_returns_error_response(service: EntityResolver):
+    failing_mapper = StubRDFMapper(error=ValueError("RDF parse failure"))
+    svc = EntityResolutionService(resolver=service, mapper=failing_mapper)
+
+    response = svc.process_request(_make_request("req-fail"))
+
+    assert isinstance(response, EREErrorResponse)
+    assert response.error_type == "ValueError"
+    assert "RDF parse failure" in response.error_detail
+
+
+def test_call_delegates_to_process_request(resolution_service):
+    request = _make_request("req-call")
+    response = resolution_service(request)
+    assert isinstance(response, EntityMentionResolutionResponse)
+    assert response.ere_request_id == "req-call"
diff --git a/test/unit/services/test_services_factories.py b/test/unit/services/test_services_factories.py
new file mode 100644
index 0000000..46398d8
--- /dev/null
+++ b/test/unit/services/test_services_factories.py
@@ -0,0 +1,67 @@
+"""Unit tests for services.factories: construction of resolver and service."""
+
+from pathlib import Path
+
+import pytest
+import yaml
+
+from ere.services.entity_resolution_service import EntityResolutionService, EntityResolver
+from ere.services.factories import build_entity_resolution_service, build_entity_resolver
+from test.unit.adapters.stubs import StubRDFMapper
+
+TEST_RESOLVER_CONFIG = Path(__file__).parent.parent.parent / "resources" / "resolver.yaml"
+
+
+def test_build_entity_resolver_returns_entity_resolver():
+    resolver = build_entity_resolver(resolver_config_path=TEST_RESOLVER_CONFIG)
+    assert isinstance(resolver, EntityResolver)
+
+
+def test_build_entity_resolver_uses_default_config_when_no_path_given():
+    resolver = build_entity_resolver()
+    assert isinstance(resolver, EntityResolver)
+
+
+def test_build_entity_resolver_with_explicit_entity_fields():
+    resolver = build_entity_resolver(
+        entity_fields=["legal_name"],
+        resolver_config_path=TEST_RESOLVER_CONFIG,
+    )
+    assert isinstance(resolver, EntityResolver)
+
+
+def test_build_entity_resolver_with_persistent_duckdb(tmp_path):
+    db_file = str(tmp_path / "test.duckdb")
+    with open(TEST_RESOLVER_CONFIG, encoding="utf-8") as f:
+        raw = yaml.safe_load(f)
+    raw["duckdb"] = {"type": "persistent", "path": db_file}
+    config = tmp_path / "persistent.yaml"
+    config.write_text(yaml.dump(raw), encoding="utf-8")
+
+    resolver = build_entity_resolver(resolver_config_path=config, duckdb_path=db_file)
+    assert isinstance(resolver, EntityResolver)
+
+
+def test_build_entity_resolver_raises_on_invalid_duckdb_type(tmp_path):
+    bad_config = tmp_path / "bad.yaml"
+    bad_config.write_text(
+        "threshold: 0.8\n"
+        "match_weight_threshold: -10\n"
+        "top_n: 10\n"
+        "entity_fields: [legal_name]\n"
+        "duckdb:\n"
+        "  type: invalid_type\n"
+        "  path: ':memory:'\n",
+        encoding="utf-8",
+    )
+    with pytest.raises(ValueError, match="Invalid duckdb type"):
+        build_entity_resolver(resolver_config_path=bad_config)
+
+
+def test_build_entity_resolution_service_returns_service():
+    resolver = build_entity_resolver(resolver_config_path=TEST_RESOLVER_CONFIG)
+    mapper = StubRDFMapper()
+
+    service = build_entity_resolution_service(resolver, mapper)
+
+    assert isinstance(service, EntityResolutionService)
diff --git a/test/unit/test_models.py b/test/unit/test_models.py
new file mode 100644
index 0000000..d984596
--- /dev/null
+++ b/test/unit/test_models.py
@@ -0,0 +1,111 @@
+"""Unit tests for domain model edge cases (error paths and utility methods)."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from ere.models.resolver import ClusterId, MentionId
+from ere.models.resolver.cluster import CandidateCluster, ResolutionResult
+from ere.models.resolver.similarity import MentionLink
+
+
+# ============================================================================
+# MentionLink
+# ============================================================================
+
+
+def test_mention_link_rejects_same_left_and_right_id():
+    m = MentionId(value="x")
+    with pytest.raises(ValueError, match="left_id and right_id must differ"):
+        MentionLink(left_id=m, right_id=m, score=0.9)
+
+
+def test_mention_link_other_returns_right_when_from_is_left():
+    left = MentionId(value="a")
+    right = MentionId(value="b")
+    link = MentionLink(left_id=left, right_id=right, score=0.5)
+    assert link.other(left) == right
+
+
+def test_mention_link_other_returns_left_when_from_is_right():
+    left = MentionId(value="a")
+    right = MentionId(value="b")
+    link = MentionLink(left_id=left, right_id=right, score=0.5)
+    assert link.other(right) == left
+
+
+def test_mention_link_other_raises_when_id_not_in_link():
+    left = MentionId(value="a")
+    right = MentionId(value="b")
+    unknown = MentionId(value="z")
+    link = MentionLink(left_id=left, right_id=right, score=0.5)
+    with pytest.raises(ValueError):
+        link.other(unknown)
+
+
+# ============================================================================
+# ResolutionResult / CandidateCluster
+# ============================================================================
+
+
+def test_resolution_result_rejects_empty_candidates():
+    with pytest.raises(ValueError, match="must be non-empty"):
+        ResolutionResult(candidates=())
+
+
+def test_candidate_cluster_as_tuple_returns_id_and_score():
+    c = CandidateCluster(cluster_id=ClusterId(value="c1"), score=0.75)
+    assert c.as_tuple() == ("c1", 0.75)
+
+
+def test_resolution_result_as_tuples_returns_list():
+    candidates = (
+        CandidateCluster(cluster_id=ClusterId(value="c1"), score=0.9),
+        CandidateCluster(cluster_id=ClusterId(value="c2"), score=0.6),
+    )
+    result = ResolutionResult(candidates=candidates)
+    assert result.as_tuples() == [("c1", 0.9), ("c2", 0.6)]
+
+
+# ============================================================================
+# app.main() failure paths
+# ============================================================================
+
+
+def test_main_exits_when_redis_connection_fails(monkeypatch):
+    monkeypatch.setattr("sys.argv", ["ere"])
+    with patch("redis.Redis") as mock_redis_cls, \
+         patch("ere.entrypoints.app.configure_logging"):
+        mock_redis_cls.return_value.ping.side_effect = ConnectionError("no redis")
+        with pytest.raises(SystemExit) as exc:
+            from ere.entrypoints.app import main
+            main()
+    assert exc.value.code == 1
+
+
+def test_main_exits_when_service_build_fails(monkeypatch):
+    monkeypatch.setattr("sys.argv", ["ere"])
+    with patch("redis.Redis") as mock_redis_cls, \
+         patch("ere.entrypoints.app.configure_logging"), \
+         patch("ere.entrypoints.app.build_entity_resolver", side_effect=RuntimeError("build fail")):
+        mock_redis_cls.return_value.ping.return_value = True
+        with pytest.raises(SystemExit) as exc:
+            from ere.entrypoints.app import main
+            main()
+    assert exc.value.code == 1
+
+
+def test_main_runs_loop_until_keyboard_interrupt(monkeypatch):
+    monkeypatch.setattr("sys.argv", ["ere"])
+    mock_resolver = MagicMock()
+    mock_resolver._mention_repo._con = MagicMock()
+
+    with patch("redis.Redis") as mock_redis_cls, \
+         patch("ere.entrypoints.app.configure_logging"), \
+         patch("ere.entrypoints.app.build_entity_resolver", return_value=mock_resolver), \
+         patch("ere.entrypoints.app.build_rdf_mapper", return_value=MagicMock()), \
+         patch("ere.entrypoints.app.build_entity_resolution_service", return_value=MagicMock()), \
+         patch("ere.entrypoints.app.RedisQueueWorker") as mock_worker_cls:
+        mock_redis_cls.return_value.ping.return_value = True
+        mock_worker_cls.return_value.process_single_message.side_effect = KeyboardInterrupt()
+        from ere.entrypoints.app import main
+        main()  # must return cleanly (KeyboardInterrupt caught internally)
diff --git a/test/unit/utils/__init__.py b/test/unit/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/test/unit/utils/test_logging.py b/test/unit/utils/test_logging.py
new file mode 100644
index 0000000..455d1bd
--- /dev/null
+++ b/test/unit/utils/test_logging.py
@@ -0,0 +1,54 @@
+"""Unit tests for utils.logging: log-level setup and TRACE level."""
+
+import logging
+from unittest.mock import call, patch
+
+import pytest
+
+from ere.utils.logging import TRACE_LEVEL_NUM, configure_logging
+
+
+def test_configure_logging_passes_warning_level_to_basicconfig():
+    with patch("logging.basicConfig") as mock_bc:
+        configure_logging("WARNING")
+    mock_bc.assert_called_once()
+    assert mock_bc.call_args[1]["level"] == logging.WARNING
+
+
+def test_configure_logging_passes_trace_level_to_basicconfig():
+    with patch("logging.basicConfig") as mock_bc:
+        configure_logging("TRACE")
+    mock_bc.assert_called_once()
+    assert mock_bc.call_args[1]["level"] == TRACE_LEVEL_NUM
+
+
+def test_configure_logging_reads_env_var(monkeypatch):
+    monkeypatch.setenv("LOG_LEVEL", "ERROR")
+    with patch("logging.basicConfig") as mock_bc:
+        configure_logging()
+    assert mock_bc.call_args[1]["level"] == logging.ERROR
+
+
+def test_configure_logging_defaults_to_info(monkeypatch):
+    monkeypatch.delenv("LOG_LEVEL", raising=False)
+    with patch("logging.basicConfig") as mock_bc:
+        configure_logging()
+    assert mock_bc.call_args[1]["level"] == logging.INFO
+
+
+def test_trace_method_exists_on_logger():
+    log = logging.getLogger("test.trace")
+    assert callable(getattr(log, "trace", None))
+
+
+def test_trace_method_logs_when_enabled(caplog):
+    log = logging.getLogger("test.trace.enabled")
+    with caplog.at_level(TRACE_LEVEL_NUM, logger="test.trace.enabled"):
+        log.trace("trace message sent")
+    assert "trace message sent" in caplog.text
+
+
+def test_trace_method_does_not_log_when_disabled():
+    log = logging.getLogger("test.trace.silent")
+    log.setLevel(logging.INFO)
+    log.trace("this should not explode")

From c63a43134ee372e68a7aa895066fa52955802213 Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 18:02:06 +0200
Subject: [PATCH 10/14] updated project setup

---
 .claude/skills/gitnexus/gitnexus-cli/SKILL.md |  82 ++++++++++++
 .../gitnexus/gitnexus-debugging/SKILL.md      |  89 +++++++++++++
 .../gitnexus/gitnexus-exploring/SKILL.md      |  78 +++++++++++
 .../skills/gitnexus/gitnexus-guide/SKILL.md   |  64 +++++++++
 .../gitnexus-impact-analysis/SKILL.md         |  97 ++++++++++++++
 .../gitnexus/gitnexus-refactoring/SKILL.md    | 121 ++++++++++++++++++
 .gitignore                                    |   2 +-
 7 files changed, 532 insertions(+), 1 deletion(-)
 create mode 100644 .claude/skills/gitnexus/gitnexus-cli/SKILL.md
 create mode 100644 .claude/skills/gitnexus/gitnexus-debugging/SKILL.md
 create mode 100644 .claude/skills/gitnexus/gitnexus-exploring/SKILL.md
 create mode 100644 .claude/skills/gitnexus/gitnexus-guide/SKILL.md
 create mode 100644 .claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md
 create mode 100644 .claude/skills/gitnexus/gitnexus-refactoring/SKILL.md

diff --git a/.claude/skills/gitnexus/gitnexus-cli/SKILL.md b/.claude/skills/gitnexus/gitnexus-cli/SKILL.md
new file mode 100644
index 0000000..c9e0af3
--- /dev/null
+++ b/.claude/skills/gitnexus/gitnexus-cli/SKILL.md
@@ -0,0 +1,82 @@
+---
+name: gitnexus-cli
+description: "Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: \"Index this repo\", \"Reanalyze the codebase\", \"Generate a wiki\""
+---
+
+# GitNexus CLI Commands
+
+All commands work via `npx` — no global install required.
+
+## Commands
+
+### analyze — Build or refresh the index
+
+```bash
+npx gitnexus analyze
+```
+
+Run from the project root. This parses all source files, builds the knowledge graph, writes it to `.gitnexus/`, and generates CLAUDE.md / AGENTS.md context files.
+
+| Flag           | Effect                                                           |
+| -------------- | ---------------------------------------------------------------- |
+| `--force`      | Force full re-index even if up to date                           |
+| `--embeddings` | Enable embedding generation for semantic search (off by default) |
+
+**When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated.
+
+### status — Check index freshness
+
+```bash
+npx gitnexus status
+```
+
+Shows whether the current repo has a GitNexus index, when it was last updated, and symbol/relationship counts. Use this to check if re-indexing is needed.
+
+### clean — Delete the index
+
+```bash
+npx gitnexus clean
+```
+
+Deletes the `.gitnexus/` directory and unregisters the repo from the global registry. Use before re-indexing if the index is corrupt or after removing GitNexus from a project.
+
+| Flag      | Effect                                            |
+| --------- | ------------------------------------------------- |
+| `--force` | Skip confirmation prompt                          |
+| `--all`   | Clean all indexed repos, not just the current one |
+
+### wiki — Generate documentation from the graph
+
+```bash
+npx gitnexus wiki
+```
+
+Generates repository documentation from the knowledge graph using an LLM. Requires an API key (saved to `~/.gitnexus/config.json` on first use).
+
+| Flag                | Effect                                    |
+| ------------------- | ----------------------------------------- |
+| `--force`           | Force full regeneration                   |
+| `--model <model>`   | LLM model (default: minimax/minimax-m2.5) |
+| `--base-url <url>`  | LLM API base URL                          |
+| `--api-key <key>`   | LLM API key                               |
+| `--concurrency <n>` | Parallel LLM calls (default: 3)           |
+| `--gist`            | Publish wiki as a public GitHub Gist      |
+
+### list — Show all indexed repos
+
+```bash
+npx gitnexus list
+```
+
+Lists all repositories registered in `~/.gitnexus/registry.json`. The MCP `list_repos` tool provides the same information.
+
+## After Indexing
+
+1. **Read `gitnexus://repo/{name}/context`** to verify the index loaded
+2. Use the other GitNexus skills (`exploring`, `debugging`, `impact-analysis`, `refactoring`) for your task
+
+## Troubleshooting
+
+- **"Not inside a git repository"**: Run from a directory inside a git repo
+- **Index is stale after re-analyzing**: Restart Claude Code to reload the MCP server
+- **Embeddings slow**: Omit `--embeddings` (it's off by default) or set `OPENAI_API_KEY` for faster API-based embedding
diff --git a/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md b/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md
new file mode 100644
index 0000000..9510b97
--- /dev/null
+++ b/.claude/skills/gitnexus/gitnexus-debugging/SKILL.md
@@ -0,0 +1,89 @@
+---
+name: gitnexus-debugging
+description: "Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: \"Why is X failing?\", \"Where does this error come from?\", \"Trace this bug\""
+---
+
+# Debugging with GitNexus
+
+## When to Use
+
+- "Why is this function failing?"
+- "Trace where this error comes from"
+- "Who calls this method?"
+- "This endpoint returns 500"
+- Investigating bugs, errors, or unexpected behavior
+
+## Workflow
+
+```
+1. gitnexus_query({query: "<error or symptom>"})            → Find related execution flows
+2. gitnexus_context({name: "<suspect>"})                    → See callers/callees/processes
+3. READ gitnexus://repo/{name}/process/{name}                → Trace execution flow
+4. gitnexus_cypher({query: "MATCH path..."})                 → Custom traces if needed
+```
+
+> If "Index is stale" → run `npx gitnexus analyze` in terminal.
+
+## Checklist
+
+```
+- [ ] Understand the symptom (error message, unexpected behavior)
+- [ ] gitnexus_query for error text or related code
+- [ ] Identify the suspect function from returned processes
+- [ ] gitnexus_context to see callers and callees
+- [ ] Trace execution flow via process resource if applicable
+- [ ] gitnexus_cypher for custom call chain traces if needed
+- [ ] Read source files to confirm root cause
+```
+
+## Debugging Patterns
+
+| Symptom              | GitNexus Approach                                          |
+| -------------------- | ---------------------------------------------------------- |
+| Error message        | `gitnexus_query` for error text → `context` on throw sites |
+| Wrong return value   | `context` on the function → trace callees for data flow    |
+| Intermittent failure | `context` → look for external calls, async deps            |
+| Performance issue    | `context` → find symbols with many callers (hot paths)     |
+| Recent regression    | `detect_changes` to see what your changes affect           |
+
+## Tools
+
+**gitnexus_query** — find code related to error:
+
+```
+gitnexus_query({query: "payment validation error"})
+→ Processes: CheckoutFlow, ErrorHandling
+→ Symbols: validatePayment, handlePaymentError, PaymentException
+```
+
+**gitnexus_context** — full context for a suspect:
+
+```
+gitnexus_context({name: "validatePayment"})
+→ Incoming calls: processCheckout, webhookHandler
+→ Outgoing calls: verifyCard, fetchRates (external API!)
+→ Processes: CheckoutFlow (step 3/7)
+```
+
+**gitnexus_cypher** — custom call chain traces:
+
+```cypher
+MATCH path = (a)-[:CodeRelation {type: 'CALLS'}*1..2]->(b:Function {name: "validatePayment"})
+RETURN [n IN nodes(path) | n.name] AS chain
+```
+
+## Example: "Payment endpoint returns 500 intermittently"
+
+```
+1. gitnexus_query({query: "payment error handling"})
+   → Processes: CheckoutFlow, ErrorHandling
+   → Symbols: validatePayment, handlePaymentError
+
+2. gitnexus_context({name: "validatePayment"})
+   → Outgoing calls: verifyCard, fetchRates (external API!)
+
+3. READ gitnexus://repo/my-app/process/CheckoutFlow
+   → Step 3: validatePayment → calls fetchRates (external)
+
+4. Root cause: fetchRates calls external API without proper timeout
+```
diff --git a/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md b/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md
new file mode 100644
index 0000000..927a4e4
--- /dev/null
+++ b/.claude/skills/gitnexus/gitnexus-exploring/SKILL.md
@@ -0,0 +1,78 @@
+---
+name: gitnexus-exploring
+description: "Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: \"How does X work?\", \"What calls this function?\", \"Show me the auth flow\""
+---
+
+# Exploring Codebases with GitNexus
+
+## When to Use
+
+- "How does authentication work?"
+- "What's the project structure?"
+- "Show me the main components"
+- "Where is the database logic?"
+- Understanding code you haven't seen before
+
+## Workflow
+
+```
+1. READ gitnexus://repos                          → Discover indexed repos
+2. READ gitnexus://repo/{name}/context             → Codebase overview, check staleness
+3. gitnexus_query({query: "<what you want to understand>"})  → Find related execution flows
+4. gitnexus_context({name: "<symbol>"})            → Deep dive on specific symbol
+5. READ gitnexus://repo/{name}/process/{name}      → Trace full execution flow
+```
+
+> If step 2 says "Index is stale" → run `npx gitnexus analyze` in terminal.
+
+## Checklist
+
+```
+- [ ] READ gitnexus://repo/{name}/context
+- [ ] gitnexus_query for the concept you want to understand
+- [ ] Review returned processes (execution flows)
+- [ ] gitnexus_context on key symbols for callers/callees
+- [ ] READ process resource for full execution traces
+- [ ] Read source files for implementation details
+```
+
+## Resources
+
+| Resource                                | What you get                                            |
+| --------------------------------------- | ------------------------------------------------------- |
+| `gitnexus://repo/{name}/context`        | Stats, staleness warning (~150 tokens)                  |
+| `gitnexus://repo/{name}/clusters`       | All functional areas with cohesion scores (~300 tokens) |
+| `gitnexus://repo/{name}/cluster/{name}` | Area members with file paths (~500 tokens)              |
+| `gitnexus://repo/{name}/process/{name}` | Step-by-step execution trace (~200 tokens)              |
+
+## Tools
+
+**gitnexus_query** — find execution flows related to a concept:
+
+```
+gitnexus_query({query: "payment processing"})
+→ Processes: CheckoutFlow, RefundFlow, WebhookHandler
+→ Symbols grouped by flow with file locations
+```
+
+**gitnexus_context** — 360-degree view of a symbol:
+
+```
+gitnexus_context({name: "validateUser"})
+→ Incoming calls: loginHandler, apiMiddleware
+→ Outgoing calls: checkToken, getUserById
+→ Processes: LoginFlow (step 2/5), TokenRefresh (step 1/3)
+```
+
+## Example: "How does payment processing work?"
+
+```
+1. READ gitnexus://repo/my-app/context       → 918 symbols, 45 processes
+2. gitnexus_query({query: "payment processing"})
+   → CheckoutFlow: processPayment → validateCard → chargeStripe
+   → RefundFlow: initiateRefund → calculateRefund → processRefund
+3. gitnexus_context({name: "processPayment"})
+   → Incoming: checkoutHandler, webhookHandler
+   → Outgoing: validateCard, chargeStripe, saveTransaction
+4. Read src/payments/processor.ts for implementation details
+```
diff --git a/.claude/skills/gitnexus/gitnexus-guide/SKILL.md b/.claude/skills/gitnexus/gitnexus-guide/SKILL.md
new file mode 100644
index 0000000..937ac73
--- /dev/null
+++ b/.claude/skills/gitnexus/gitnexus-guide/SKILL.md
@@ -0,0 +1,64 @@
+---
+name: gitnexus-guide
+description: "Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: \"What GitNexus tools are available?\", \"How do I use GitNexus?\""
+---
+
+# GitNexus Guide
+
+Quick reference for all GitNexus MCP tools, resources, and the knowledge graph schema.
+
+## Always Start Here
+
+For any task involving code understanding, debugging, impact analysis, or refactoring:
+
+1. **Read `gitnexus://repo/{name}/context`** — codebase overview + check index freshness
+2. **Match your task to a skill below** and **read that skill file**
+3. **Follow the skill's workflow and checklist**
+
+> If step 1 warns the index is stale, run `npx gitnexus analyze` in the terminal first.
+
+## Skills
+
+| Task                                         | Skill to read       |
+| -------------------------------------------- | ------------------- |
+| Understand architecture / "How does X work?" | `gitnexus-exploring`         |
+| Blast radius / "What breaks if I change X?"  | `gitnexus-impact-analysis`   |
+| Trace bugs / "Why is X failing?"             | `gitnexus-debugging`         |
+| Rename / extract / split / refactor          | `gitnexus-refactoring`       |
+| Tools, resources, schema reference           | `gitnexus-guide` (this file) |
+| Index, status, clean, wiki CLI commands      | `gitnexus-cli`               |
+
+## Tools Reference
+
+| Tool             | What it gives you                                                        |
+| ---------------- | ------------------------------------------------------------------------ |
+| `query`          | Process-grouped code intelligence — execution flows related to a concept |
+| `context`        | 360-degree symbol view — categorized refs, processes it participates in  |
+| `impact`         | Symbol blast radius — what breaks at depth 1/2/3 with confidence         |
+| `detect_changes` | Git-diff impact — what do your current changes affect                    |
+| `rename`         | Multi-file coordinated rename with confidence-tagged edits               |
+| `cypher`         | Raw graph queries (read `gitnexus://repo/{name}/schema` first)           |
+| `list_repos`     | Discover indexed repos                                                   |
+
+## Resources Reference
+
+Lightweight reads (~100-500 tokens) for navigation:
+
+| Resource                                       | Content                                   |
+| ---------------------------------------------- | ----------------------------------------- |
+| `gitnexus://repo/{name}/context`               | Stats, staleness check                    |
+| `gitnexus://repo/{name}/clusters`              | All functional areas with cohesion scores |
+| `gitnexus://repo/{name}/cluster/{clusterName}` | Area members                              |
+| `gitnexus://repo/{name}/processes`             | All execution flows                       |
+| `gitnexus://repo/{name}/process/{processName}` | Step-by-step trace                        |
+| `gitnexus://repo/{name}/schema`                | Graph schema for Cypher                   |
+
+## Graph Schema
+
+**Nodes:** File, Function, Class, Interface, Method, Community, Process
+**Edges (via CodeRelation.type):** CALLS, IMPORTS, EXTENDS, IMPLEMENTS, DEFINES, MEMBER_OF, STEP_IN_PROCESS
+
+```cypher
+MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "myFunc"})
+RETURN caller.name, caller.filePath
+```
diff --git a/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md b/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md
new file mode 100644
index 0000000..e19af28
--- /dev/null
+++ b/.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md
@@ -0,0 +1,97 @@
+---
+name: gitnexus-impact-analysis
+description: "Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: \"Is it safe to change X?\", \"What depends on this?\", \"What will break?\""
+---
+
+# Impact Analysis with GitNexus
+
+## When to Use
+
+- "Is it safe to change this function?"
+- "What will break if I modify X?"
+- "Show me the blast radius"
+- "Who uses this code?"
+- Before making non-trivial code changes
+- Before committing — to understand what your changes affect
+
+## Workflow
+
+```
+1. gitnexus_impact({target: "X", direction: "upstream"})  → What depends on this
+2. READ gitnexus://repo/{name}/processes                   → Check affected execution flows
+3. gitnexus_detect_changes()                               → Map current git changes to affected flows
+4. Assess risk and report to user
+```
+
+> If "Index is stale" → run `npx gitnexus analyze` in terminal.
+
+## Checklist
+
+```
+- [ ] gitnexus_impact({target, direction: "upstream"}) to find dependents
+- [ ] Review d=1 items first (these WILL BREAK)
+- [ ] Check high-confidence (>0.8) dependencies
+- [ ] READ processes to check affected execution flows
+- [ ] gitnexus_detect_changes() for pre-commit check
+- [ ] Assess risk level and report to user
+```
+
+## Understanding Output
+
+| Depth | Risk Level       | Meaning                  |
+| ----- | ---------------- | ------------------------ |
+| d=1   | **WILL BREAK**   | Direct callers/importers |
+| d=2   | LIKELY AFFECTED  | Indirect dependencies    |
+| d=3   | MAY NEED TESTING | Transitive effects       |
+
+## Risk Assessment
+
+| Affected                       | Risk     |
+| ------------------------------ | -------- |
+| <5 symbols, few processes      | LOW      |
+| 5-15 symbols, 2-5 processes    | MEDIUM   |
+| >15 symbols or many processes  | HIGH     |
+| Critical path (auth, payments) | CRITICAL |
+
+## Tools
+
+**gitnexus_impact** — the primary tool for symbol blast radius:
+
+```
+gitnexus_impact({
+  target: "validateUser",
+  direction: "upstream",
+  minConfidence: 0.8,
+  maxDepth: 3
+})
+
+→ d=1 (WILL BREAK):
+  - loginHandler (src/auth/login.ts:42) [CALLS, 100%]
+  - apiMiddleware (src/api/middleware.ts:15) [CALLS, 100%]
+
+→ d=2 (LIKELY AFFECTED):
+  - authRouter (src/routes/auth.ts:22) [CALLS, 95%]
+```
+
+**gitnexus_detect_changes** — git-diff based impact analysis:
+
+```
+gitnexus_detect_changes({scope: "staged"})
+
+→ Changed: 5 symbols in 3 files
+→ Affected: LoginFlow, TokenRefresh, APIMiddlewarePipeline
+→ Risk: MEDIUM
+```
+
+## Example: "What breaks if I change validateUser?"
+
+```
+1. gitnexus_impact({target: "validateUser", direction: "upstream"})
+   → d=1: loginHandler, apiMiddleware (WILL BREAK)
+   → d=2: authRouter, sessionManager (LIKELY AFFECTED)
+
+2. READ gitnexus://repo/my-app/processes
+   → LoginFlow and TokenRefresh touch validateUser
+
+3. Risk: 2 direct callers, 2 processes = MEDIUM
+```
diff --git a/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md b/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md
new file mode 100644
index 0000000..f48cc01
--- /dev/null
+++ b/.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md
@@ -0,0 +1,121 @@
+---
+name: gitnexus-refactoring
+description: "Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: \"Rename this function\", \"Extract this into a module\", \"Refactor this class\", \"Move this to a separate file\""
+---
+
+# Refactoring with GitNexus
+
+## When to Use
+
+- "Rename this function safely"
+- "Extract this into a module"
+- "Split this service"
+- "Move this to a new file"
+- Any task involving renaming, extracting, splitting, or restructuring code
+
+## Workflow
+
+```
+1. gitnexus_impact({target: "X", direction: "upstream"})  → Map all dependents
+2. gitnexus_query({query: "X"})                            → Find execution flows involving X
+3. gitnexus_context({name: "X"})                           → See all incoming/outgoing refs
+4. Plan update order: interfaces → implementations → callers → tests
+```
+
+> If "Index is stale" → run `npx gitnexus analyze` in terminal.
+
+## Checklists
+
+### Rename Symbol
+
+```
+- [ ] gitnexus_rename({symbol_name: "oldName", new_name: "newName", dry_run: true}) — preview all edits
+- [ ] Review graph edits (high confidence) and ast_search edits (review carefully)
+- [ ] If satisfied: gitnexus_rename({..., dry_run: false}) — apply edits
+- [ ] gitnexus_detect_changes() — verify only expected files changed
+- [ ] Run tests for affected processes
+```
+
+### Extract Module
+
+```
+- [ ] gitnexus_context({name: target}) — see all incoming/outgoing refs
+- [ ] gitnexus_impact({target, direction: "upstream"}) — find all external callers
+- [ ] Define new module interface
+- [ ] Extract code, update imports
+- [ ] gitnexus_detect_changes() — verify affected scope
+- [ ] Run tests for affected processes
+```
+
+### Split Function/Service
+
+```
+- [ ] gitnexus_context({name: target}) — understand all callees
+- [ ] Group callees by responsibility
+- [ ] gitnexus_impact({target, direction: "upstream"}) — map callers to update
+- [ ] Create new functions/services
+- [ ] Update callers
+- [ ] gitnexus_detect_changes() — verify affected scope
+- [ ] Run tests for affected processes
+```
+
+## Tools
+
+**gitnexus_rename** — automated multi-file rename:
+
+```
+gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true})
+→ 12 edits across 8 files
+→ 10 graph edits (high confidence), 2 ast_search edits (review)
+→ Changes: [{file_path, edits: [{line, old_text, new_text, confidence}]}]
+```
+
+**gitnexus_impact** — map all dependents first:
+
+```
+gitnexus_impact({target: "validateUser", direction: "upstream"})
+→ d=1: loginHandler, apiMiddleware, testUtils
+→ Affected Processes: LoginFlow, TokenRefresh
+```
+
+**gitnexus_detect_changes** — verify your changes after refactoring:
+
+```
+gitnexus_detect_changes({scope: "all"})
+→ Changed: 8 files, 12 symbols
+→ Affected processes: LoginFlow, TokenRefresh
+→ Risk: MEDIUM
+```
+
+**gitnexus_cypher** — custom reference queries:
+
+```cypher
+MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "validateUser"})
+RETURN caller.name, caller.filePath ORDER BY caller.filePath
+```
+
+## Risk Rules
+
+| Risk Factor         | Mitigation                                |
+| ------------------- | ----------------------------------------- |
+| Many callers (>5)   | Use gitnexus_rename for automated updates |
+| Cross-area refs     | Use detect_changes after to verify scope  |
+| String/dynamic refs | gitnexus_query to find them               |
+| External/public API | Version and deprecate properly            |
+
+## Example: Rename `validateUser` to `authenticateUser`
+
+```
+1. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true})
+   → 12 edits: 10 graph (safe), 2 ast_search (review)
+   → Files: validator.ts, login.ts, middleware.ts, config.json...
+
+2. Review ast_search edits (config.json: dynamic reference!)
+
+3. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: false})
+   → Applied 12 edits across 8 files
+
+4. gitnexus_detect_changes({scope: "all"})
+   → Affected: LoginFlow, TokenRefresh
+   → Risk: MEDIUM — run tests for these flows
+```
diff --git a/.gitignore b/.gitignore
index 65f3ffe..6379a8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -216,4 +216,4 @@ poetry.toml
 .vscode
 .import_linter_cache
 .pycharm_plugin
-
+.idea

From d5dd2460c422accce43994910d5a285f0e7f10e3 Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 18:12:49 +0200
Subject: [PATCH 11/14] chore(infra): align env vars with ERSys naming
 convention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Renames LOG_LEVEL → ERE_LOG_LEVEL to match the ERSys unified .env.example,
allowing integration tests to run against the shared ERSys infrastructure
without any compose-level variable mapping.

Changes:
- src/ere/utils/logging.py: read ERE_LOG_LEVEL instead of LOG_LEVEL
- src/ere/entrypoints/app.py: update env var name in docstring
- demo/demo.py: read ERE_LOG_LEVEL instead of LOG_LEVEL
- test/unit/utils/test_logging.py: update env var references
- infra/.env.example: new file, ERE-relevant subset of ERSys .env.example
- infra/compose.dev.yaml: remove LOG_LEVEL mapping (no longer needed)
---
 demo/demo.py                    |  2 +-
 infra/.env.example              | 19 ++++++++++++-------
 src/ere/entrypoints/app.py      |  2 +-
 src/ere/utils/logging.py        |  4 ++--
 test/unit/utils/test_logging.py |  4 ++--
 5 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/demo/demo.py b/demo/demo.py
index 1a06939..6bf2570 100755
--- a/demo/demo.py
+++ b/demo/demo.py
@@ -93,7 +93,7 @@ def load_env_file(env_path: str = None) -> dict:
 
 def setup_logging():
     """Configure logging with timestamps."""
-    log_level_name = os.environ.get("LOG_LEVEL", "INFO").upper()
+    log_level_name = os.environ.get("ERE_LOG_LEVEL", "INFO").upper()
 
     # Handle custom TRACE level
     if log_level_name == "TRACE":
diff --git a/infra/.env.example b/infra/.env.example
index 0057f84..80795f5 100644
--- a/infra/.env.example
+++ b/infra/.env.example
@@ -1,18 +1,23 @@
-# Copy this file to .env and customize as needed:
-#   cp infra/.env.example infra/.env
+# ERE local development environment
+# Copy to infra/.env and customise: cp infra/.env.example infra/.env
+#
+# Compatible with the ERSys unified environment (infra/.env.example).
+# When running ERE standalone, use this file with infra/compose.dev.yaml.
+# When running inside the full ERSys stack, the parent project's .env covers these.
 
-# Redis
+# --- Redis ---
 REDIS_HOST=redis
 REDIS_PORT=6379
 REDIS_DB=0
 REDIS_PASSWORD=changeme
 
-# Queue names
+# --- Queues ---
 REQUEST_QUEUE=ere_requests
 RESPONSE_QUEUE=ere_responses
 
-# DuckDB (path inside container, volume-mounted)
+# --- Storage ---
 DUCKDB_PATH=/data/app.duckdb
 
-# Logging
-LOG_LEVEL=INFO
+# --- Logging ---
+# ERSys uses ERE_LOG_LEVEL; compose.dev.yaml maps it to LOG_LEVEL internally.
+ERE_LOG_LEVEL=INFO
diff --git a/src/ere/entrypoints/app.py b/src/ere/entrypoints/app.py
index e4077db..6a6fbd7 100644
--- a/src/ere/entrypoints/app.py
+++ b/src/ere/entrypoints/app.py
@@ -12,7 +12,7 @@
     REDIS_HOST            Redis hostname (default: localhost)
     REDIS_PORT            Redis port (default: 6379)
     REDIS_DB              Redis DB index (default: 0)
-    LOG_LEVEL             Python log level name (default: INFO) — supports TRACE
+    ERE_LOG_LEVEL         Python log level name (default: INFO) — supports TRACE
     RDF_MAPPING_PATH      Path to rdf_mapping.yaml config file
     RESOLVER_CONFIG_PATH  Path to resolver.yaml config file
     DUCKDB_PATH           Path to persistent DuckDB file (overrides resolver.yaml)
diff --git a/src/ere/utils/logging.py b/src/ere/utils/logging.py
index 9100a1b..70e36a3 100644
--- a/src/ere/utils/logging.py
+++ b/src/ere/utils/logging.py
@@ -26,10 +26,10 @@ def configure_logging(log_level: str = None) -> None:
 
     Args:
         log_level: Log level name (e.g., 'DEBUG', 'INFO', 'TRACE').
-                  If None, reads from LOG_LEVEL environment variable (default: INFO).
+                  If None, reads from ERE_LOG_LEVEL environment variable (default: INFO).
     """
     if log_level is None:
-        log_level = os.environ.get("LOG_LEVEL", "INFO").upper()
+        log_level = os.environ.get("ERE_LOG_LEVEL", "INFO").upper()
     else:
         log_level = log_level.upper()
 
diff --git a/test/unit/utils/test_logging.py b/test/unit/utils/test_logging.py
index 455d1bd..b285a0d 100644
--- a/test/unit/utils/test_logging.py
+++ b/test/unit/utils/test_logging.py
@@ -23,14 +23,14 @@ def test_configure_logging_passes_trace_level_to_basicconfig():
 
 
 def test_configure_logging_reads_env_var(monkeypatch):
-    monkeypatch.setenv("LOG_LEVEL", "ERROR")
+    monkeypatch.setenv("ERE_LOG_LEVEL", "ERROR")
     with patch("logging.basicConfig") as mock_bc:
         configure_logging()
     assert mock_bc.call_args[1]["level"] == logging.ERROR
 
 
 def test_configure_logging_defaults_to_info(monkeypatch):
-    monkeypatch.delenv("LOG_LEVEL", raising=False)
+    monkeypatch.delenv("ERE_LOG_LEVEL", raising=False)
     with patch("logging.basicConfig") as mock_bc:
         configure_logging()
     assert mock_bc.call_args[1]["level"] == logging.INFO

From 19e787c1817d2cb74f443287a8f9872a3937651b Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 22:17:21 +0200
Subject: [PATCH 12/14] docs(agents): add ERE-specific agent operating
 instructions

AGENTS.md (and its CLAUDE.md mirror) now contains ERE-specific guidance:
commits/PR rules, dev workflow, make targets reference, architecture
rules, memory conventions, and gotchas. Replaces GitNexus-only boilerplate.

Also aligns dev tooling:
- Makefile: test-integration depends on check-env; test target sources .env
- infra/.env.example: REDIS_HOST defaults to localhost for standalone dev
- test/e2e/test_app.py: replace walrus operator with explicit env default
---
 AGENTS.md            | 219 +++++++++++++++++++++++++++++++++++++++++++
 CLAUDE.md            | 219 +++++++++++++++++++++++++++++++++++++++++++
 Makefile             |   8 +-
 infra/.env.example   |   2 +-
 test/e2e/test_app.py |   3 +-
 5 files changed, 444 insertions(+), 7 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 CLAUDE.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..26b1f0f
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,219 @@
+# ERE — Agent Operating Instructions
+
+This file governs how AI agents operate in this repository.
+It complements `CLAUDE.md` (which governs Claude Code specifically) and `.claude/CLAUDE.md` (project instructions).
+
+---
+
+## Commits and PRs
+
+- **Never auto-commit** unless the user explicitly asks.
+- **Never force-push** to `main` or `develop`.
+- **Never add co-author lines**, tool names, or agent names to commit messages.
+- Commit format: `type(scope): concise description` — e.g. `feat(adapters): add splink resolver factory`.
+- Stage only files you modified: `git add <file>`, never `git add -A` blindly.
+- Before committing, run `make lint` and `make test-unit` to verify nothing is broken.
+- PRs target `develop` (not `main`) unless told otherwise.
+- When creating a PR, include a short summary and a test-plan checklist.
+
+---
+
+## Working Methodology
+
+### Before touching code
+
+1. Read `WORKING.md` — it points to the active task file.
+2. Read the referenced `docs/tasks/yyyy-mm-dd-*.md` fully.
+3. Understand the current branch state: `git log --oneline -10`.
+
+### Running the stack for integration tests
+
+Integration tests require Redis to be running. Start it first:
+
+```bash
+make infra-up          # starts Redis + RedisInsight via Docker Compose
+make test-integration  # then run integration tests
+make infra-down        # tear down when done
+```
+
+Unit tests do **not** require any infrastructure:
+
+```bash
+make test-unit         # fast, self-contained, uses your venv
+```
+
+### Typical development loop
+
+```bash
+make install           # first time or after pyproject.toml changes
+make test-unit         # red → green → refactor
+make lint              # quick style check
+make check-architecture  # verify import-linter contracts
+make all-quality-checks  # before opening a PR
+```
+
+---
+
+## Tooling Reference
+
+| Target | What it does |
+|--------|-------------|
+| `make install` | Install deps via Poetry |
+| `make test-unit` | pytest unit suite + coverage report |
+| `make test-integration` | integration tests (Redis must be up) |
+| `make test-coverage` | HTML coverage report → `htmlcov/index.html` |
+| `make lint` | pylint (fast, your venv) |
+| `make format` | Ruff formatter |
+| `make lint-fix` | Ruff auto-fix |
+| `make check-clean-code` | pylint + radon + xenon (tox isolated) |
+| `make check-architecture` | import-linter contracts (tox isolated) |
+| `make all-quality-checks` | lint + clean-code + architecture |
+| `make ci` | full tox pipeline (py312 + architecture + clean-code) |
+| `make infra-up` | Start Redis stack (Docker Compose) |
+| `make infra-down` | Stop Redis stack |
+| `make infra-watch` | Live-reload mode (syncs `src/` and `config/`) |
+
+---
+
+## Architecture Rules (enforced by import-linter)
+
+Dependency direction must never be violated:
+
+```
+entrypoints → services → models
+                       ↘
+                       adapters → models
+```
+
+- `models/` — no I/O, no framework imports, no side effects.
+- `adapters/` — infrastructure only; never calls `services/`.
+- `services/` — orchestrates domain and adapters; never imports from `entrypoints/`.
+- `entrypoints/` — parses input, calls services, formats output; no business logic.
+
+Violations block CI. Check with `make check-architecture` before opening a PR.
+
+---
+
+## Memory Conventions
+
+Save to memory only what is non-obvious and persists across conversations:
+
+- Architectural decisions that aren't evident from the code (e.g. resolver factory registry pattern, DuckDB threading model).
+- Design constraints explained by the user that aren't in comments or docs.
+- User preferences about how to collaborate (e.g. "never suggest walrus operators", "prefer explicit factory injection").
+
+Do **not** save to memory:
+- Current task state (use the task file in `docs/tasks/`).
+- Git history or recent changes (readable via `git log`).
+- File paths or code structure (readable from the repo).
+
+---
+
+## Gotchas
+
+- **`logging.basicConfig` is a no-op** when handlers already exist (conftest sets them up via `dictConfig`). Mock it with `patch("logging.basicConfig")` in logging tests.
+- **DuckDB in tests**: use in-memory mode (`:memory:`) or a temp file via `tmp_path`; never a fixed path that leaks between tests.
+- **Integration tests are marked** with `@pytest.mark.integration` — `make test-unit` skips them automatically.
+- **`infra/.env`** is required for `make infra-*` targets. Copy from `infra/.env.example` on first use.
+- **Config files** live in `config/` (repo root), not `infra/config/` — the `1cf319c` refactor moved them.
+- **erspec models** are LinkML-generated with snake_case fields (e.g. `legal_name`, not `legalName`). Do not edit generated files — update the schema and regenerate.
+- **`ERE_LOG_LEVEL`** is the canonical env var for log level in this service (not `LOG_LEVEL`).
+
+---
+
+<!-- gitnexus:start -->
+# GitNexus — Code Intelligence
+
+This project is indexed by GitNexus as **entity-resolution-engine-basic** (528 symbols, 1372 relationships, 36 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
+
+> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
+
+## Always Do
+
+- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user.
+- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows.
+- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits.
+- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance.
+- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`.
+
+## When Debugging
+
+1. `gitnexus_query({query: "<error or symptom>"})` — find execution flows related to the issue
+2. `gitnexus_context({name: "<suspect function>"})` — see all callers, callees, and process participation
+3. `READ gitnexus://repo/entity-resolution-engine-basic/process/{processName}` — trace the full execution flow step by step
+4. For regressions: `gitnexus_detect_changes({scope: "compare", base_ref: "main"})` — see what your branch changed
+
+## When Refactoring
+
+- **Renaming**: MUST use `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` first. Review the preview — graph edits are safe, text_search edits need manual review. Then run with `dry_run: false`.
+- **Extracting/Splitting**: MUST run `gitnexus_context({name: "target"})` to see all incoming/outgoing refs, then `gitnexus_impact({target: "target", direction: "upstream"})` to find all external callers before moving code.
+- After any refactor: run `gitnexus_detect_changes({scope: "all"})` to verify only expected files changed.
+
+## Never Do
+
+- NEVER edit a function, class, or method without first running `gitnexus_impact` on it.
+- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis.
+- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph.
+- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope.
+
+## Tools Quick Reference
+
+| Tool | When to use | Command |
+|------|-------------|---------|
+| `query` | Find code by concept | `gitnexus_query({query: "auth validation"})` |
+| `context` | 360-degree view of one symbol | `gitnexus_context({name: "validateUser"})` |
+| `impact` | Blast radius before editing | `gitnexus_impact({target: "X", direction: "upstream"})` |
+| `detect_changes` | Pre-commit scope check | `gitnexus_detect_changes({scope: "staged"})` |
+| `rename` | Safe multi-file rename | `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` |
+| `cypher` | Custom graph queries | `gitnexus_cypher({query: "MATCH ..."})` |
+
+## Impact Risk Levels
+
+| Depth | Meaning | Action |
+|-------|---------|--------|
+| d=1 | WILL BREAK — direct callers/importers | MUST update these |
+| d=2 | LIKELY AFFECTED — indirect deps | Should test |
+| d=3 | MAY NEED TESTING — transitive | Test if critical path |
+
+## Resources
+
+| Resource | Use for |
+|----------|---------|
+| `gitnexus://repo/entity-resolution-engine-basic/context` | Codebase overview, check index freshness |
+| `gitnexus://repo/entity-resolution-engine-basic/clusters` | All functional areas |
+| `gitnexus://repo/entity-resolution-engine-basic/processes` | All execution flows |
+| `gitnexus://repo/entity-resolution-engine-basic/process/{name}` | Step-by-step execution trace |
+
+## Self-Check Before Finishing
+
+Before completing any code modification task, verify:
+1. `gitnexus_impact` was run for all modified symbols
+2. No HIGH/CRITICAL risk warnings were ignored
+3. `gitnexus_detect_changes()` confirms changes match expected scope
+4. All d=1 (WILL BREAK) dependents were updated
+
+## Keeping the Index Fresh
+
+After committing code changes, the GitNexus index becomes stale. Re-run analyze to update it:
+
+```bash
+npx gitnexus analyze
+```
+
+If the index previously included embeddings, preserve them by adding `--embeddings`:
+
+```bash
+npx gitnexus analyze --embeddings
+```
+
+To check whether embeddings exist, inspect `.gitnexus/meta.json` — the `stats.embeddings` field shows the count (0 means no embeddings). **Running analyze without `--embeddings` will delete any previously generated embeddings.**
+
+> Claude Code users: A PostToolUse hook handles this automatically after `git commit` and `git merge`.
+
+## CLI
+
+- Re-index: `npx gitnexus analyze`
+- Check freshness: `npx gitnexus status`
+- Generate docs: `npx gitnexus wiki`
+
+<!-- gitnexus:end -->
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..26b1f0f
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,219 @@
+# ERE — Agent Operating Instructions
+
+This file governs how AI agents operate in this repository.
+It complements `CLAUDE.md` (which governs Claude Code specifically) and `.claude/CLAUDE.md` (project instructions).
+
+---
+
+## Commits and PRs
+
+- **Never auto-commit** unless the user explicitly asks.
+- **Never force-push** to `main` or `develop`.
+- **Never add co-author lines**, tool names, or agent names to commit messages.
+- Commit format: `type(scope): concise description` — e.g. `feat(adapters): add splink resolver factory`.
+- Stage only files you modified: `git add <file>`, never `git add -A` blindly.
+- Before committing, run `make lint` and `make test-unit` to verify nothing is broken.
+- PRs target `develop` (not `main`) unless told otherwise.
+- When creating a PR, include a short summary and a test-plan checklist.
+
+---
+
+## Working Methodology
+
+### Before touching code
+
+1. Read `WORKING.md` — it points to the active task file.
+2. Read the referenced `docs/tasks/yyyy-mm-dd-*.md` fully.
+3. Understand the current branch state: `git log --oneline -10`.
+
+### Running the stack for integration tests
+
+Integration tests require Redis to be running. Start it first:
+
+```bash
+make infra-up          # starts Redis + RedisInsight via Docker Compose
+make test-integration  # then run integration tests
+make infra-down        # tear down when done
+```
+
+Unit tests do **not** require any infrastructure:
+
+```bash
+make test-unit         # fast, self-contained, uses your venv
+```
+
+### Typical development loop
+
+```bash
+make install           # first time or after pyproject.toml changes
+make test-unit         # red → green → refactor
+make lint              # quick style check
+make check-architecture  # verify import-linter contracts
+make all-quality-checks  # before opening a PR
+```
+
+---
+
+## Tooling Reference
+
+| Target | What it does |
+|--------|-------------|
+| `make install` | Install deps via Poetry |
+| `make test-unit` | pytest unit suite + coverage report |
+| `make test-integration` | integration tests (Redis must be up) |
+| `make test-coverage` | HTML coverage report → `htmlcov/index.html` |
+| `make lint` | pylint (fast, your venv) |
+| `make format` | Ruff formatter |
+| `make lint-fix` | Ruff auto-fix |
+| `make check-clean-code` | pylint + radon + xenon (tox isolated) |
+| `make check-architecture` | import-linter contracts (tox isolated) |
+| `make all-quality-checks` | lint + clean-code + architecture |
+| `make ci` | full tox pipeline (py312 + architecture + clean-code) |
+| `make infra-up` | Start Redis stack (Docker Compose) |
+| `make infra-down` | Stop Redis stack |
+| `make infra-watch` | Live-reload mode (syncs `src/` and `config/`) |
+
+---
+
+## Architecture Rules (enforced by import-linter)
+
+Dependency direction must never be violated:
+
+```
+entrypoints → services → models
+                       ↘
+                       adapters → models
+```
+
+- `models/` — no I/O, no framework imports, no side effects.
+- `adapters/` — infrastructure only; never calls `services/`.
+- `services/` — orchestrates domain and adapters; never imports from `entrypoints/`.
+- `entrypoints/` — parses input, calls services, formats output; no business logic.
+
+Violations block CI. Check with `make check-architecture` before opening a PR.
+
+---
+
+## Memory Conventions
+
+Save to memory only what is non-obvious and persists across conversations:
+
+- Architectural decisions that aren't evident from the code (e.g. resolver factory registry pattern, DuckDB threading model).
+- Design constraints explained by the user that aren't in comments or docs.
+- User preferences about how to collaborate (e.g. "never suggest walrus operators", "prefer explicit factory injection").
+
+Do **not** save to memory:
+- Current task state (use the task file in `docs/tasks/`).
+- Git history or recent changes (readable via `git log`).
+- File paths or code structure (readable from the repo).
+
+---
+
+## Gotchas
+
+- **`logging.basicConfig` is a no-op** when handlers already exist (conftest sets them up via `dictConfig`). Mock it with `patch("logging.basicConfig")` in logging tests.
+- **DuckDB in tests**: use in-memory mode (`:memory:`) or a temp file via `tmp_path`; never a fixed path that leaks between tests.
+- **Integration tests are marked** with `@pytest.mark.integration` — `make test-unit` skips them automatically.
+- **`infra/.env`** is required for `make infra-*` targets. Copy from `infra/.env.example` on first use.
+- **Config files** live in `config/` (repo root), not `infra/config/` — the `1cf319c` refactor moved them.
+- **erspec models** are LinkML-generated with snake_case fields (e.g. `legal_name`, not `legalName`). Do not edit generated files — update the schema and regenerate.
+- **`ERE_LOG_LEVEL`** is the canonical env var for log level in this service (not `LOG_LEVEL`).
+
+---
+
+<!-- gitnexus:start -->
+# GitNexus — Code Intelligence
+
+This project is indexed by GitNexus as **entity-resolution-engine-basic** (528 symbols, 1372 relationships, 36 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
+
+> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
+
+## Always Do
+
+- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user.
+- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows.
+- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits.
+- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance.
+- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`.
+
+## When Debugging
+
+1. `gitnexus_query({query: "<error or symptom>"})` — find execution flows related to the issue
+2. `gitnexus_context({name: "<suspect function>"})` — see all callers, callees, and process participation
+3. `READ gitnexus://repo/entity-resolution-engine-basic/process/{processName}` — trace the full execution flow step by step
+4. For regressions: `gitnexus_detect_changes({scope: "compare", base_ref: "main"})` — see what your branch changed
+
+## When Refactoring
+
+- **Renaming**: MUST use `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` first. Review the preview — graph edits are safe, text_search edits need manual review. Then run with `dry_run: false`.
+- **Extracting/Splitting**: MUST run `gitnexus_context({name: "target"})` to see all incoming/outgoing refs, then `gitnexus_impact({target: "target", direction: "upstream"})` to find all external callers before moving code.
+- After any refactor: run `gitnexus_detect_changes({scope: "all"})` to verify only expected files changed.
+
+## Never Do
+
+- NEVER edit a function, class, or method without first running `gitnexus_impact` on it.
+- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis.
+- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph.
+- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope.
+
+## Tools Quick Reference
+
+| Tool | When to use | Command |
+|------|-------------|---------|
+| `query` | Find code by concept | `gitnexus_query({query: "auth validation"})` |
+| `context` | 360-degree view of one symbol | `gitnexus_context({name: "validateUser"})` |
+| `impact` | Blast radius before editing | `gitnexus_impact({target: "X", direction: "upstream"})` |
+| `detect_changes` | Pre-commit scope check | `gitnexus_detect_changes({scope: "staged"})` |
+| `rename` | Safe multi-file rename | `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` |
+| `cypher` | Custom graph queries | `gitnexus_cypher({query: "MATCH ..."})` |
+
+## Impact Risk Levels
+
+| Depth | Meaning | Action |
+|-------|---------|--------|
+| d=1 | WILL BREAK — direct callers/importers | MUST update these |
+| d=2 | LIKELY AFFECTED — indirect deps | Should test |
+| d=3 | MAY NEED TESTING — transitive | Test if critical path |
+
+## Resources
+
+| Resource | Use for |
+|----------|---------|
+| `gitnexus://repo/entity-resolution-engine-basic/context` | Codebase overview, check index freshness |
+| `gitnexus://repo/entity-resolution-engine-basic/clusters` | All functional areas |
+| `gitnexus://repo/entity-resolution-engine-basic/processes` | All execution flows |
+| `gitnexus://repo/entity-resolution-engine-basic/process/{name}` | Step-by-step execution trace |
+
+## Self-Check Before Finishing
+
+Before completing any code modification task, verify:
+1. `gitnexus_impact` was run for all modified symbols
+2. No HIGH/CRITICAL risk warnings were ignored
+3. `gitnexus_detect_changes()` confirms changes match expected scope
+4. All d=1 (WILL BREAK) dependents were updated
+
+## Keeping the Index Fresh
+
+After committing code changes, the GitNexus index becomes stale. Re-run analyze to update it:
+
+```bash
+npx gitnexus analyze
+```
+
+If the index previously included embeddings, preserve them by adding `--embeddings`:
+
+```bash
+npx gitnexus analyze --embeddings
+```
+
+To check whether embeddings exist, inspect `.gitnexus/meta.json` — the `stats.embeddings` field shows the count (0 means no embeddings). **Running analyze without `--embeddings` will delete any previously generated embeddings.**
+
+> Claude Code users: A PostToolUse hook handles this automatically after `git commit` and `git merge`.
+
+## CLI
+
+- Re-index: `npx gitnexus analyze`
+- Check freshness: `npx gitnexus status`
+- Generate docs: `npx gitnexus wiki`
+
+<!-- gitnexus:end -->
diff --git a/Makefile b/Makefile
index 933f068..d62d251 100644
--- a/Makefile
+++ b/Makefile
@@ -103,7 +103,7 @@ build: ## Build the package distribution
 .PHONY: test test-unit test-integration test-coverage
 test: ## Run all tests
 	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Running all tests$(END_BUILD_PRINT)"
-	@ poetry run pytest $(TEST_PATH)
+	@ set -a && . $(ENV_FILE) && set +a && poetry run pytest $(TEST_PATH)
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) All tests passed$(END_BUILD_PRINT)"
 
 test-unit: ## Run unit tests with coverage (fast, uses your venv)
@@ -112,14 +112,14 @@ test-unit: ## Run unit tests with coverage (fast, uses your venv)
 	    --cov=src --cov-report=term-missing --cov-report=html
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) Unit tests passed (coverage: htmlcov/index.html)$(END_BUILD_PRINT)"
 
-test-integration: ## Run integration tests only
+test-integration: check-env ## Run integration tests only (requires Redis — run make infra-up first)
 	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Running integration tests$(END_BUILD_PRINT)"
-	@ poetry run pytest $(TEST_PATH) -m "integration"
+	@ set -a && . $(ENV_FILE) && set +a && poetry run pytest $(TEST_PATH) -m "integration"
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) Integration tests passed$(END_BUILD_PRINT)"
 
 test-coverage: ## Generate detailed HTML coverage report
 	@ echo -e "$(BUILD_PRINT)$(ICON_PROGRESS) Generating coverage report$(END_BUILD_PRINT)"
-	@ poetry run pytest $(TEST_PATH) -m "not integration" \
+	@ set -a && . $(ENV_FILE) && set +a && poetry run pytest $(TEST_PATH) -m "not integration" \
 	    --cov=src --cov-report=html --cov-report=term-missing
 	@ echo -e "$(BUILD_PRINT)$(ICON_DONE) Coverage report: htmlcov/index.html$(END_BUILD_PRINT)"
 
diff --git a/infra/.env.example b/infra/.env.example
index 80795f5..e6d0f0c 100644
--- a/infra/.env.example
+++ b/infra/.env.example
@@ -6,7 +6,7 @@
 # When running inside the full ERSys stack, the parent project's .env covers these.
 
 # --- Redis ---
-REDIS_HOST=redis
+REDIS_HOST=localhost
 REDIS_PORT=6379
 REDIS_DB=0
 REDIS_PASSWORD=changeme
diff --git a/test/e2e/test_app.py b/test/e2e/test_app.py
index b15194e..f13ef18 100644
--- a/test/e2e/test_app.py
+++ b/test/e2e/test_app.py
@@ -42,8 +42,7 @@ def test_app_main_processes_single_request(
     monkeypatch.setenv("REDIS_HOST", os.environ.get("REDIS_HOST", "localhost"))
     monkeypatch.setenv("REDIS_PORT", os.environ.get("REDIS_PORT", "6379"))
     monkeypatch.setenv("REDIS_DB", os.environ.get("REDIS_DB", "0"))
-    if redis_password := os.environ.get("REDIS_PASSWORD"):
-        monkeypatch.setenv("REDIS_PASSWORD", redis_password)
+    monkeypatch.setenv("REDIS_PASSWORD", os.environ.get("REDIS_PASSWORD", "changeme"))
     monkeypatch.setenv("REQUEST_QUEUE", req_queue)
     monkeypatch.setenv("RESPONSE_QUEUE", resp_queue)
     monkeypatch.setenv("RESOLVER_CONFIG_PATH", str(resolver_config_path))

From 0daca1dc6136fb0f86dc5f23c7bf83e8a40baff4 Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 22:17:33 +0200
Subject: [PATCH 13/14] updated project setup

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e4e5579..adc2487 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,5 @@
 [project]
-name = "ere"
+name = "ere-basic"
 version = "0.1.0"
 description = "A basic implementation of the Entity Resolution Engine (ERE)."
 authors = [

From e5b1440cfbda2e4fc6f8ccf3fded7e40d98a2b0b Mon Sep 17 00:00:00 2001
From: Eugeniu Costetchi <eugen@meaningfy.ws>
Date: Thu, 2 Apr 2026 22:18:19 +0200
Subject: [PATCH 14/14] updated project setup

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index adc2487..15c2bc9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ere-basic"
-version = "0.1.0"
+version = "0.4.0"
 description = "A basic implementation of the Entity Resolution Engine (ERE)."
 authors = [
     {name = "Meaningfy",email = "hi@meaningfy.ws"}