From 595b35c61750d86c6e569dfee52403015a939dbc Mon Sep 17 00:00:00 2001 From: Lucas Ricarte Date: Tue, 7 Apr 2026 23:13:43 -0700 Subject: [PATCH 1/2] fix: Use atomic writing when saving files to avoid corrupting them --- abstra_json_sql/persistence/json.py | 73 ++++++++++++++++++----------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/abstra_json_sql/persistence/json.py b/abstra_json_sql/persistence/json.py index 1bc7c6c..6972a1b 100644 --- a/abstra_json_sql/persistence/json.py +++ b/abstra_json_sql/persistence/json.py @@ -1,10 +1,29 @@ import json +import os +import tempfile from pathlib import Path from typing import List, Optional from ..tables import Column, ColumnType, ITablesSnapshot, Table +def _atomic_write_text(path: Path, content: str) -> None: + """Write content to a file atomically using write-to-temp + os.replace.""" + fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(content) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, path) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + + class FileSystemJsonTables(ITablesSnapshot): workdir: Path @@ -16,7 +35,7 @@ def _ensure_metadata_table(self): """Ensure the metadata table exists""" metadata_path = self.workdir / "__schema__.json" if not metadata_path.exists(): - metadata_path.write_text(json.dumps({})) + _atomic_write_text(metadata_path, json.dumps({})) def _get_table_metadata_by_name( self, table_name: str @@ -62,7 +81,7 @@ def _save_table_metadata( column_dicts.append(col_dict) metadata[table_id] = {"table_name": table_name, "columns": column_dicts} - metadata_path.write_text(json.dumps(metadata, indent=2)) + _atomic_write_text(metadata_path, json.dumps(metadata, indent=2)) def _remove_table_metadata(self, table_id: str): """Remove table metadata from the __schema__.json file""" @@ -70,7 +89,7 @@ def _remove_table_metadata(self, table_id: str): metadata = json.loads(metadata_path.read_text()) if table_id in metadata: del metadata[table_id] - metadata_path.write_text(json.dumps(metadata, indent=2)) + _atomic_write_text(metadata_path, json.dumps(metadata, indent=2)) def get_table(self, name: str) -> Optional[Table]: table_id, columns = self._get_table_metadata_by_name(name) @@ -123,7 +142,7 @@ def add_table(self, table: Table): row_with_ids = table.convert_row_to_column_ids(row) data_with_ids.append(row_with_ids) - table_path.write_text(json.dumps(data_with_ids, indent=2)) + _atomic_write_text(table_path, json.dumps(data_with_ids, indent=2)) # Save columns metadata self._save_table_metadata(table.table_id, table.name, table.columns) @@ -165,14 +184,14 @@ def _insert(self, table_name: str, row: dict): temp_table = Table(name=table_name, columns=columns, data=[], table_id=table_id) rows = json.loads(table_path.read_text()) - assert isinstance(rows, list), ( - f"File {table_path} does not contain a list of rows" - ) + assert isinstance( + rows, list + ), f"File {table_path} does not contain a list of rows" # Convert row to column ID format row_with_ids = temp_table.convert_row_to_column_ids(row) rows.append(row_with_ids) - table_path.write_text(json.dumps(rows, indent=2)) + _atomic_write_text(table_path, json.dumps(rows, indent=2)) def add_column(self, table_name: str, column: Column): table_id, existing_columns = self._get_table_metadata_by_name(table_name) @@ -184,9 +203,9 @@ def add_column(self, table_name: str, column: Column): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance(rows, list), ( - f"File {table_path} does not contain a list of rows" - ) + assert isinstance( + rows, list + ), f"File {table_path} does not contain a list of rows" # Check if column already exists if any(col.name == column.name for col in existing_columns): @@ -197,7 +216,7 @@ def add_column(self, table_name: str, column: Column): # Add column to data using column ID for row in rows: row[column.column_id] = column.default - table_path.write_text(json.dumps(rows, indent=2)) + _atomic_write_text(table_path, json.dumps(rows, indent=2)) # Update metadata existing_columns.append(column) @@ -213,9 +232,9 @@ def remove_column(self, table_name: str, column_name: str): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance(rows, list), ( - f"File {table_path} does not contain a list of rows" - ) + assert isinstance( + rows, list + ), f"File {table_path} does not contain a list of rows" # Remove column from data using column ID column_to_remove = None @@ -228,7 +247,7 @@ def remove_column(self, table_name: str, column_name: str): for row in rows: if column_to_remove.column_id in row: del row[column_to_remove.column_id] - table_path.write_text(json.dumps(rows, indent=2)) + _atomic_write_text(table_path, json.dumps(rows, indent=2)) # Update metadata columns = [col for col in columns if col.name != column_name] @@ -244,9 +263,9 @@ def rename_column(self, table_name: str, old_name: str, new_name: str): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance(rows, list), ( - f"File {table_path} does not contain a list of rows" - ) + assert isinstance( + rows, list + ), f"File {table_path} does not contain a list of rows" # Data doesn't need to change for rename_column since we use column IDs # Only metadata needs to be updated @@ -284,16 +303,16 @@ def _update(self, table_name: str, idx: int, changes: dict): temp_table = Table(name=table_name, columns=columns, data=[], table_id=table_id) rows = json.loads(table_path.read_text()) - assert isinstance(rows, list), ( - f"File {table_path} does not contain a list of rows" - ) + assert isinstance( + rows, list + ), f"File {table_path} does not contain a list of rows" if idx < 0 or idx >= len(rows): raise IndexError(f"Index {idx} out of range for table {table_name}") # Convert changes to column ID format changes_with_ids = temp_table.convert_row_to_column_ids(changes) rows[idx].update(changes_with_ids) - table_path.write_text(json.dumps(rows, indent=2)) + _atomic_write_text(table_path, json.dumps(rows, indent=2)) def _delete(self, table_name: str, idxs: List[int]): table_id, _ = self._get_table_metadata_by_name(table_name) @@ -305,13 +324,13 @@ def _delete(self, table_name: str, idxs: List[int]): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance(rows, list), ( - f"File {table_path} does not contain a list of rows" - ) + assert isinstance( + rows, list + ), f"File {table_path} does not contain a list of rows" # Sort indices in descending order to avoid index shifting for idx in sorted(idxs, reverse=True): if idx < 0 or idx >= len(rows): raise IndexError(f"Index {idx} out of range for table {table_name}") del rows[idx] - table_path.write_text(json.dumps(rows, indent=2)) + _atomic_write_text(table_path, json.dumps(rows, indent=2)) From a2b4a6a64af7e66a8b12a9e8364e03e5de2658a3 Mon Sep 17 00:00:00 2001 From: Lucas Ricarte Date: Tue, 7 Apr 2026 23:25:48 -0700 Subject: [PATCH 2/2] Ruff --- abstra_json_sql/persistence/json.py | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/abstra_json_sql/persistence/json.py b/abstra_json_sql/persistence/json.py index 6972a1b..d103f28 100644 --- a/abstra_json_sql/persistence/json.py +++ b/abstra_json_sql/persistence/json.py @@ -184,9 +184,9 @@ def _insert(self, table_name: str, row: dict): temp_table = Table(name=table_name, columns=columns, data=[], table_id=table_id) rows = json.loads(table_path.read_text()) - assert isinstance( - rows, list - ), f"File {table_path} does not contain a list of rows" + assert isinstance(rows, list), ( + f"File {table_path} does not contain a list of rows" + ) # Convert row to column ID format row_with_ids = temp_table.convert_row_to_column_ids(row) @@ -203,9 +203,9 @@ def add_column(self, table_name: str, column: Column): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance( - rows, list - ), f"File {table_path} does not contain a list of rows" + assert isinstance(rows, list), ( + f"File {table_path} does not contain a list of rows" + ) # Check if column already exists if any(col.name == column.name for col in existing_columns): @@ -232,9 +232,9 @@ def remove_column(self, table_name: str, column_name: str): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance( - rows, list - ), f"File {table_path} does not contain a list of rows" + assert isinstance(rows, list), ( + f"File {table_path} does not contain a list of rows" + ) # Remove column from data using column ID column_to_remove = None @@ -263,9 +263,9 @@ def rename_column(self, table_name: str, old_name: str, new_name: str): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance( - rows, list - ), f"File {table_path} does not contain a list of rows" + assert isinstance(rows, list), ( + f"File {table_path} does not contain a list of rows" + ) # Data doesn't need to change for rename_column since we use column IDs # Only metadata needs to be updated @@ -303,9 +303,9 @@ def _update(self, table_name: str, idx: int, changes: dict): temp_table = Table(name=table_name, columns=columns, data=[], table_id=table_id) rows = json.loads(table_path.read_text()) - assert isinstance( - rows, list - ), f"File {table_path} does not contain a list of rows" + assert isinstance(rows, list), ( + f"File {table_path} does not contain a list of rows" + ) if idx < 0 or idx >= len(rows): raise IndexError(f"Index {idx} out of range for table {table_name}") @@ -324,9 +324,9 @@ def _delete(self, table_name: str, idxs: List[int]): raise FileNotFoundError(f"File {table_path} does not exist") rows = json.loads(table_path.read_text()) - assert isinstance( - rows, list - ), f"File {table_path} does not contain a list of rows" + assert isinstance(rows, list), ( + f"File {table_path} does not contain a list of rows" + ) # Sort indices in descending order to avoid index shifting for idx in sorted(idxs, reverse=True):