From f85eeff48423097e0d2b01a5c9bbbd6792f2c107 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 10 Apr 2026 11:48:42 -0700 Subject: [PATCH 1/2] docs: formalize locator refresh state contract (Fixes #387) --- README.md | 2 ++ crates/pet-core/src/lib.rs | 26 ++++++++++---- crates/pet/src/jsonrpc.rs | 71 ++++++++++++++++++++++++++++++++++++++ docs/LOCATOR_STATE.md | 46 ++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 6 deletions(-) create mode 100644 docs/LOCATOR_STATE.md diff --git a/README.md b/README.md index 64ca2e3e..c3dba078 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ This project will be consumed by the [Python extension](https://marketplace.visu Our approach prioritizes performance and efficiency by leveraging Rust. We minimize I/O operations by collecting all necessary environment information at once, which reduces repeated I/O and the need to spawn additional processes, significantly enhancing overall performance. +Locator refresh-state contracts are documented in [docs/LOCATOR_STATE.md](docs/LOCATOR_STATE.md). + ## Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a diff --git a/crates/pet-core/src/lib.rs b/crates/pet-core/src/lib.rs index 9829131f..fe8f4018 100644 --- a/crates/pet-core/src/lib.rs +++ b/crates/pet-core/src/lib.rs @@ -63,13 +63,24 @@ pub enum LocatorKind { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RefreshStatePersistence { - /// The locator keeps no mutable state across requests. + /// The locator keeps no mutable state that survives a request. Stateless, - /// The locator keeps configured inputs only; refresh must not copy them back. + /// The locator keeps configured inputs only. + /// + /// Refresh creates and configures transient locator instances for one request. A + /// locator in this category must get its configuration from that request's + /// configuration snapshot, not by copying anything back from the transient + /// locator into the long-lived shared locator. ConfiguredOnly, - /// The locator keeps cache-like state, but later requests can repopulate it on demand. + /// The locator keeps cache-like state that later requests can repopulate on demand. + /// + /// Refresh may populate this state on a transient locator, but correctness must + /// not depend on syncing it back into the long-lived shared locator. SelfHydratingCache, - /// The locator keeps refresh-discovered state that later requests depend on for correctness. + /// The locator keeps refresh-discovered state that later requests depend on. + /// + /// Locators in this category must override `sync_refresh_state_from()` and copy + /// only correctness-critical discovery state for the provided sync scope. SyncedDiscoveryState, } @@ -121,8 +132,11 @@ pub trait Locator: Any + Send + Sync { } /// Describes what mutable state, if any, must survive a refresh boundary. /// - /// Refresh runs execute against transient locator graphs and then invoke - /// `sync_refresh_state_from()` on the long-lived shared locators. + /// Refresh requests run against transient locator graphs. After a refresh + /// completes, the server invokes `sync_refresh_state_from()` on the long-lived + /// shared locator graph while the starting configuration generation is still + /// current. The returned classification is the contract the locator makes with + /// that sync step. fn refresh_state(&self) -> RefreshStatePersistence { RefreshStatePersistence::Stateless } diff --git a/crates/pet/src/jsonrpc.rs b/crates/pet/src/jsonrpc.rs index 4efe3a00..4313c67a 100644 --- a/crates/pet/src/jsonrpc.rs +++ b/crates/pet/src/jsonrpc.rs @@ -1223,6 +1223,7 @@ mod tests { use pet_conda::manager::CondaManager; use pet_core::manager::EnvManager; use pet_core::manager::EnvManagerType; + use pet_core::LocatorKind; use pet_core::RefreshStatePersistence; use std::path::PathBuf; use std::sync::mpsc; @@ -1699,6 +1700,76 @@ mod tests { assert!(!synced); } + #[test] + fn test_locator_graph_refresh_state_contracts_are_explicit() { + let environment = EnvironmentApi::new(); + let conda_locator = Arc::new(Conda::from(&environment)); + let poetry_locator = Arc::new(Poetry::from(&environment)); + let locators = create_locators(conda_locator, poetry_locator, &environment); + + let actual = locators + .iter() + .map(|locator| (locator.get_kind(), locator.refresh_state())) + .collect::>(); + + let expected = vec![ + #[cfg(windows)] + ( + LocatorKind::WindowsStore, + RefreshStatePersistence::SyncedDiscoveryState, + ), + #[cfg(windows)] + ( + LocatorKind::WindowsRegistry, + RefreshStatePersistence::SyncedDiscoveryState, + ), + #[cfg(windows)] + (LocatorKind::WinPython, RefreshStatePersistence::Stateless), + ( + LocatorKind::PyEnv, + RefreshStatePersistence::SelfHydratingCache, + ), + (LocatorKind::Pixi, RefreshStatePersistence::Stateless), + ( + LocatorKind::Conda, + RefreshStatePersistence::SyncedDiscoveryState, + ), + (LocatorKind::Uv, RefreshStatePersistence::ConfiguredOnly), + ( + LocatorKind::Poetry, + RefreshStatePersistence::SyncedDiscoveryState, + ), + (LocatorKind::PipEnv, RefreshStatePersistence::ConfiguredOnly), + ( + LocatorKind::VirtualEnvWrapper, + RefreshStatePersistence::Stateless, + ), + (LocatorKind::Venv, RefreshStatePersistence::Stateless), + (LocatorKind::VirtualEnv, RefreshStatePersistence::Stateless), + #[cfg(unix)] + (LocatorKind::Homebrew, RefreshStatePersistence::Stateless), + #[cfg(target_os = "macos")] + (LocatorKind::MacXCode, RefreshStatePersistence::Stateless), + #[cfg(target_os = "macos")] + ( + LocatorKind::MacCommandLineTools, + RefreshStatePersistence::Stateless, + ), + #[cfg(target_os = "macos")] + ( + LocatorKind::MacPythonOrg, + RefreshStatePersistence::Stateless, + ), + #[cfg(all(unix, not(target_os = "macos")))] + ( + LocatorKind::LinuxGlobal, + RefreshStatePersistence::SelfHydratingCache, + ), + ]; + + assert_eq!(actual, expected); + } + #[test] fn test_refresh_coordinator_does_not_join_different_generations() { let coordinator = RefreshCoordinator::default(); diff --git a/docs/LOCATOR_STATE.md b/docs/LOCATOR_STATE.md new file mode 100644 index 00000000..eac34e48 --- /dev/null +++ b/docs/LOCATOR_STATE.md @@ -0,0 +1,46 @@ +# Locator Refresh State + +Refresh requests run on a transient locator graph. The server configures that graph from a single configuration snapshot, runs discovery, and then syncs selected refresh-discovered state back into the long-lived shared locator graph only if that configuration generation is still current. + +The `Locator::refresh_state()` classification is the contract for that boundary. It keeps configured inputs, self-hydrating caches, and correctness-critical discovery state distinct. + +## Classifications + +| Classification | Meaning | Sync behavior | +| --- | --- | --- | +| `Stateless` | The locator keeps no mutable state that survives a request. | Nothing is copied back. | +| `ConfiguredOnly` | The locator stores configured inputs such as executable paths or workspace directories. | Refresh must use the transient locator's request snapshot and must not copy this state back. | +| `SelfHydratingCache` | The locator stores a cache that later requests can rebuild on demand. | Refresh may fill a transient cache, but correctness must not rely on syncing it. | +| `SyncedDiscoveryState` | The locator stores refresh-discovered state that later requests need for correctness or fidelity. | The locator must override `sync_refresh_state_from()` and copy only state appropriate for the `RefreshStateSyncScope`. | + +## Current Locator Inventory + +| Locator | Mutable state | Classification | Notes | +| --- | --- | --- | --- | +| WindowsStore | Discovered Store environments | `SyncedDiscoveryState` | Full and matching global-kind refreshes replace the cache; workspace refreshes leave it alone. | +| WindowsRegistry | Discovered registry managers and environments | `SyncedDiscoveryState` | Full and matching global-kind refreshes replace the cache; workspace refreshes leave it alone. | +| WinPython | None | `Stateless` | Windows-only locator. | +| PyEnv | Manager and versions-directory cache | `SelfHydratingCache` | `find()` clears the cache, and `try_from()` can rebuild it from the environment. | +| Pixi | None | `Stateless` | Identification is derived from filesystem markers. | +| Conda | Environment, manager, and mamba-manager discovery caches; configured executable | `SyncedDiscoveryState` | Discovery caches are synced. The configured executable remains configuration state and is not copied from refresh locators. | +| Uv | Configured workspace directories; immutable uv install directory | `ConfiguredOnly` | Workspace directories come from the request configuration snapshot. | +| Poetry | Configured workspace directories and executable; discovered search result | `SyncedDiscoveryState` | Search results are synced or merged by scope. Configured inputs are not copied back. | +| PipEnv | Configured pipenv executable | `ConfiguredOnly` | The executable comes from the configuration snapshot. | +| VirtualEnvWrapper | Environment variables captured at construction | `Stateless` | No refresh-discovered mutable state. | +| Venv | None | `Stateless` | Identification is derived from `pyvenv.cfg` and filesystem layout. | +| VirtualEnv | None | `Stateless` | Identification is derived from virtualenv markers. | +| Homebrew | Environment variables captured at construction | `Stateless` | No refresh-discovered mutable state. | +| MacXCode | None | `Stateless` | macOS-only locator. | +| MacCommandLineTools | None | `Stateless` | macOS-only locator. | +| MacPythonOrg | None | `Stateless` | macOS-only locator. | +| LinuxGlobalPython | Reported executable cache | `SelfHydratingCache` | `try_from()` can repopulate the cache by scanning known global bin directories. | + +## Updating The Contract + +When adding mutable state to a locator, classify it before relying on it across refreshes: + +1. If it is configured input, keep it under `ConfiguredOnly` and source it from `Configuration`. +2. If it is only a performance cache, use `SelfHydratingCache` and make later requests able to rebuild it. +3. If later requests need refresh-discovered state, use `SyncedDiscoveryState`, implement `sync_refresh_state_from()`, and cover full, workspace, and kind-filtered scopes with tests. + +The locator graph has a regression test in `crates/pet/src/jsonrpc.rs` that pins the current classification of each locator created by `create_locators()`. \ No newline at end of file From 5cb4e8aa77db031b399b54128b35b8f4357ad110 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 10 Apr 2026 11:55:50 -0700 Subject: [PATCH 2/2] docs: address locator state review feedback --- README.md | 12 +++++------ docs/LOCATOR_STATE.md | 50 +++++++++++++++++++++---------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index c3dba078..684f014b 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Python environment tools for Visual Studio Code -Performant Python environment tooling and support, such as locating all global Python installs and virtual environments. +Performant Python environment tooling and support, such as locating all global Python installs and virtual environments. This project will be consumed by the [Python extension](https://marketplace.visualstudio.com/items?itemName=ms-python.python) directly. You can find the code to consume `pet` in the Python extension [source code](https://github.com/microsoft/vscode-python/blob/main/src/client/pythonEnvironments/base/locators/common/nativePythonFinder.ts). For more information on JSNORPC requests/notifications for this tool, please reference [/docs/JSONRPC.md](https://github.com/microsoft/python-environment-tools/blob/main/docs/JSONRPC.md). -## Environment Types Supported +## Environment Types Supported - python.org - Windows Store @@ -22,7 +22,7 @@ This project will be consumed by the [Python extension](https://marketplace.visu - VirtualEnv - Python on your PATH -## Features +## Features - Discovery of all global Python installs - Discovery of all Python virtual environments @@ -35,7 +35,7 @@ Locator refresh-state contracts are documented in [docs/LOCATOR_STATE.md](docs/L ## Contributing -This project welcomes contributions and suggestions. Most contributions require you to agree to a +This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. @@ -49,8 +49,8 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio ## Trademarks -This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft -trademarks or logos is subject to and must follow +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft +trademarks or logos is subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. Any use of third-party trademarks or logos are subject to those third-party's policies. diff --git a/docs/LOCATOR_STATE.md b/docs/LOCATOR_STATE.md index eac34e48..2a8c08e0 100644 --- a/docs/LOCATOR_STATE.md +++ b/docs/LOCATOR_STATE.md @@ -6,34 +6,34 @@ The `Locator::refresh_state()` classification is the contract for that boundary. ## Classifications -| Classification | Meaning | Sync behavior | -| --- | --- | --- | -| `Stateless` | The locator keeps no mutable state that survives a request. | Nothing is copied back. | -| `ConfiguredOnly` | The locator stores configured inputs such as executable paths or workspace directories. | Refresh must use the transient locator's request snapshot and must not copy this state back. | -| `SelfHydratingCache` | The locator stores a cache that later requests can rebuild on demand. | Refresh may fill a transient cache, but correctness must not rely on syncing it. | +| Classification | Meaning | Sync behavior | +| ---------------------- | ------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------- | +| `Stateless` | The locator keeps no mutable state that survives a request. | Nothing is copied back. | +| `ConfiguredOnly` | The locator stores configured inputs such as executable paths or workspace directories. | Refresh must use the transient locator's request snapshot and must not copy this state back. | +| `SelfHydratingCache` | The locator stores a cache that later requests can rebuild on demand. | Refresh may fill a transient cache, but correctness must not rely on syncing it. | | `SyncedDiscoveryState` | The locator stores refresh-discovered state that later requests need for correctness or fidelity. | The locator must override `sync_refresh_state_from()` and copy only state appropriate for the `RefreshStateSyncScope`. | ## Current Locator Inventory -| Locator | Mutable state | Classification | Notes | -| --- | --- | --- | --- | -| WindowsStore | Discovered Store environments | `SyncedDiscoveryState` | Full and matching global-kind refreshes replace the cache; workspace refreshes leave it alone. | -| WindowsRegistry | Discovered registry managers and environments | `SyncedDiscoveryState` | Full and matching global-kind refreshes replace the cache; workspace refreshes leave it alone. | -| WinPython | None | `Stateless` | Windows-only locator. | -| PyEnv | Manager and versions-directory cache | `SelfHydratingCache` | `find()` clears the cache, and `try_from()` can rebuild it from the environment. | -| Pixi | None | `Stateless` | Identification is derived from filesystem markers. | -| Conda | Environment, manager, and mamba-manager discovery caches; configured executable | `SyncedDiscoveryState` | Discovery caches are synced. The configured executable remains configuration state and is not copied from refresh locators. | -| Uv | Configured workspace directories; immutable uv install directory | `ConfiguredOnly` | Workspace directories come from the request configuration snapshot. | -| Poetry | Configured workspace directories and executable; discovered search result | `SyncedDiscoveryState` | Search results are synced or merged by scope. Configured inputs are not copied back. | -| PipEnv | Configured pipenv executable | `ConfiguredOnly` | The executable comes from the configuration snapshot. | -| VirtualEnvWrapper | Environment variables captured at construction | `Stateless` | No refresh-discovered mutable state. | -| Venv | None | `Stateless` | Identification is derived from `pyvenv.cfg` and filesystem layout. | -| VirtualEnv | None | `Stateless` | Identification is derived from virtualenv markers. | -| Homebrew | Environment variables captured at construction | `Stateless` | No refresh-discovered mutable state. | -| MacXCode | None | `Stateless` | macOS-only locator. | -| MacCommandLineTools | None | `Stateless` | macOS-only locator. | -| MacPythonOrg | None | `Stateless` | macOS-only locator. | -| LinuxGlobalPython | Reported executable cache | `SelfHydratingCache` | `try_from()` can repopulate the cache by scanning known global bin directories. | +| Locator | Mutable state | Classification | Notes | +| ------------------- | ------------------------------------------------------------------------------- | ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| WindowsStore | Discovered Store environments | `SyncedDiscoveryState` | Full and matching global-kind refreshes replace the cache; workspace refreshes leave it alone. | +| WindowsRegistry | Discovered registry managers and environments | `SyncedDiscoveryState` | Full and matching global-kind refreshes replace the cache; workspace refreshes leave it alone. | +| WinPython | None | `Stateless` | Windows-only locator. | +| PyEnv | Manager and versions-directory cache | `SelfHydratingCache` | `find()` clears the cache, and `try_from()` can rebuild it from the environment. | +| Pixi | None | `Stateless` | Identification is derived from filesystem markers. | +| Conda | Environment, manager, and mamba-manager discovery caches; configured executable | `SyncedDiscoveryState` | Discovery caches are synced. The configured executable remains configuration state and is not copied from refresh locators. | +| Uv | Configured workspace directories; immutable uv install directory | `ConfiguredOnly` | Workspace directories come from the request configuration snapshot. | +| Poetry | Configured workspace directories and executable; discovered search result | `SyncedDiscoveryState` | Search results are synced or merged by scope. Configured inputs are not copied back. | +| PipEnv | Configured pipenv executable | `ConfiguredOnly` | The executable comes from the configuration snapshot. | +| VirtualEnvWrapper | Environment variables captured at construction | `Stateless` | No refresh-discovered mutable state. | +| Venv | None | `Stateless` | Identification is derived from `pyvenv.cfg` and filesystem layout. | +| VirtualEnv | None | `Stateless` | Identification is derived from virtualenv markers. | +| Homebrew | Environment variables captured at construction | `Stateless` | No refresh-discovered mutable state. | +| MacXCode | None | `Stateless` | macOS-only locator. | +| MacCommandLineTools | None | `Stateless` | macOS-only locator. | +| MacPythonOrg | None | `Stateless` | macOS-only locator. | +| LinuxGlobal | Reported executable cache | `SelfHydratingCache` | `try_from()` can repopulate the cache by scanning known global bin directories. | ## Updating The Contract @@ -43,4 +43,4 @@ When adding mutable state to a locator, classify it before relying on it across 2. If it is only a performance cache, use `SelfHydratingCache` and make later requests able to rebuild it. 3. If later requests need refresh-discovered state, use `SyncedDiscoveryState`, implement `sync_refresh_state_from()`, and cover full, workspace, and kind-filtered scopes with tests. -The locator graph has a regression test in `crates/pet/src/jsonrpc.rs` that pins the current classification of each locator created by `create_locators()`. \ No newline at end of file +The locator graph has a regression test in `crates/pet/src/jsonrpc.rs` that pins the current classification of each locator created by `create_locators()`.