Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
724956a
SDK refactoring
ritaagarwala-sf Apr 7, 2026
626ccf6
SDK refactoring
ritaagarwala-sf Apr 7, 2026
a7138c5
@W-21605883
ritaagarwala-sf Apr 16, 2026
009603c
Merge remote-tracking branch 'origin/main' into SDK_refactoring
ritaagarwala-sf Apr 16, 2026
9867549
Removing reference to proxy for cleanup
ritaagarwala-sf Apr 16, 2026
7076dfa
Restoring changes done to script client
ritaagarwala-sf Apr 17, 2026
be4148b
Using pydantic for llm_gateway models
ritaagarwala-sf Apr 17, 2026
ea5891a
Updating potery.lock
ritaagarwala-sf Apr 17, 2026
661f400
Some fix
ritaagarwala-sf Apr 17, 2026
217ff14
Updating poetery.lock
ritaagarwala-sf Apr 20, 2026
caf53a6
Fixing lint error
ritaagarwala-sf Apr 20, 2026
33b6e71
Adding explict src to get rid of lint error : src/datacustomcode/io/r…
ritaagarwala-sf Apr 20, 2026
0e11fe7
Adding explict src to get rid of lint error : src/datacustomcode/io/r…
ritaagarwala-sf Apr 20, 2026
731f9ec
Trying to fix lint error
ritaagarwala-sf Apr 20, 2026
b8349e9
Fixing lint errors
ritaagarwala-sf Apr 20, 2026
d576453
make test fix
ritaagarwala-sf Apr 20, 2026
3959bac
Adding mandatory model name to llm gateway call
ritaagarwala-sf Apr 20, 2026
e24ae3e
Adding unit test
ritaagarwala-sf Apr 20, 2026
aa87f55
Merging from main and adding testcase
ritaagarwala-sf Apr 20, 2026
7bc5fa3
Merging from main
ritaagarwala-sf Apr 20, 2026
a68b42c
Review comments
ritaagarwala-sf Apr 21, 2026
59fe203
Creating runtime package under function module. And adding systm_type…
ritaagarwala-sf Apr 21, 2026
128d252
Renaming system_types to feature_types
ritaagarwala-sf Apr 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ repos:
exclude: \.py$

- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.13.0
rev: v2.14.0
hooks:
- id: pretty-format-toml
args: [--autofix]
Expand All @@ -69,7 +69,7 @@ repos:
hooks:
- id: mypy
name: mypy
entry: mypy src/datacustomcode
entry: mypy --explicit-package-bases src/datacustomcode
language: system
pass_filenames: false
types: [python]
Expand Down
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
# Changelog

## 3.0.0

### Breaking Changes

- **Added `runtime: datacustomcode.runtime.function.Runtime` to function contract for codeType `function`.

Function now mandates runtime as arguments.

**Why:** `runTime` allows access to resources ( llm_gateway / file ) available during function execution.

**Migration:** use function(request: dict, runTime: Runtime) instead od function(request: dict)

```python
# Before
def function(request: dict):
pass

# After
def function(request: dict, runTime: Runtime):
pass
```


## 2.0.0

### Breaking Changes
Expand Down
2,121 changes: 1,071 additions & 1,050 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ use_parentheses = true

[tool.mypy]
check_untyped_defs = false
explicit_package_bases = true
ignore_missing_imports = true
mypy_path = "src"
no_implicit_optional = true
plugins = [
'pydantic.mypy'
Expand All @@ -99,7 +101,7 @@ click = "^8.1.8"
loguru = "^0.7.3"
numpy = "*"
pandas = "*"
pydantic = "^1.8.2 || ^2.0.0"
pydantic = "2.13.1"
pyspark = "3.5.1"
python = ">=3.10,<3.12"
pyyaml = "^6.0"
Expand Down
Empty file added src/__init__.py
Empty file.
24 changes: 6 additions & 18 deletions src/datacustomcode/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,6 @@ def __new__(
spark_provider: Optional["BaseSparkSessionProvider"] = None,
code_type: str = "script",
) -> Client:
if "function" in code_type:
return cls._new_function_client()

if cls._instance is None:
cls._instance = super().__new__(cls)
Expand Down Expand Up @@ -175,16 +173,6 @@ def __new__(
raise ValueError("Cannot set reader or writer after client is initialized")
return cls._instance

@classmethod
def _new_function_client(cls) -> Client:
cls._instance = super().__new__(cls)
cls._instance._proxy = (
config.proxy_config.to_object() # type: ignore
if config.proxy_config is not None
else None
)
return cls._instance

def read_dlo(self, name: str) -> PySparkDataFrame:
"""Read a DLO from Data Cloud.

Expand All @@ -195,7 +183,7 @@ def read_dlo(self, name: str) -> PySparkDataFrame:
A PySpark DataFrame containing the DLO data.
"""
self._record_dlo_access(name)
return self._reader.read_dlo(name)
return self._reader.read_dlo(name) # type: ignore[no-any-return]

def read_dmo(self, name: str) -> PySparkDataFrame:
"""Read a DMO from Data Cloud.
Expand All @@ -207,7 +195,7 @@ def read_dmo(self, name: str) -> PySparkDataFrame:
A PySpark DataFrame containing the DMO data.
"""
self._record_dmo_access(name)
return self._reader.read_dmo(name)
return self._reader.read_dmo(name) # type: ignore[no-any-return]

def write_to_dlo(
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
Expand All @@ -220,7 +208,7 @@ def write_to_dlo(
write_mode: The write mode to use for writing to the DLO.
"""
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DMO)
return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs)
return self._writer.write_to_dlo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]

def write_to_dmo(
self, name: str, dataframe: PySparkDataFrame, write_mode: WriteMode, **kwargs
Expand All @@ -233,17 +221,17 @@ def write_to_dmo(
write_mode: The write mode to use for writing to the DMO.
"""
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO)
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return]

def call_llm_gateway(self, LLM_MODEL_ID: str, prompt: str, maxTokens: int) -> str:
if self._proxy is None:
raise ValueError("No proxy configured; set proxy or proxy_config")
return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens)
return self._proxy.call_llm_gateway(LLM_MODEL_ID, prompt, maxTokens) # type: ignore[no-any-return]

def find_file_path(self, file_name: str) -> Path:
"""Return a file path"""

return self._file.find_file_path(file_name)
return self._file.find_file_path(file_name) # type: ignore[no-any-return]

def _validate_data_layer_history_does_not_contain(
self, data_cloud_object_type: DataCloudObjectType
Expand Down
6 changes: 3 additions & 3 deletions src/datacustomcode/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@
# This lets all readers and writers to be findable via config
from datacustomcode.io import * # noqa: F403
from datacustomcode.io.base import BaseDataAccessLayer
from datacustomcode.io.reader.base import BaseDataCloudReader # noqa: TCH001
from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa: TCH001
from datacustomcode.io.reader.base import BaseDataCloudReader # noqa: TCH002
from datacustomcode.io.writer.base import BaseDataCloudWriter # noqa: TCH002
from datacustomcode.proxy.base import BaseProxyAccessLayer
from datacustomcode.proxy.client.base import BaseProxyClient # noqa: TCH001
from datacustomcode.proxy.client.base import BaseProxyClient # noqa: TCH002
from datacustomcode.spark.base import BaseSparkSessionProvider

DEFAULT_CONFIG_NAME = "config.yaml"
Expand Down
20 changes: 20 additions & 0 deletions src/datacustomcode/function/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Function runtime for Data Cloud Custom Code."""

from datacustomcode.function.runtime import Runtime

__all__ = ["Runtime"]
18 changes: 18 additions & 0 deletions src/datacustomcode/function/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class BaseRuntime:
"""Base class for datacustomcode run time"""
89 changes: 89 additions & 0 deletions src/datacustomcode/function/features_types/chunking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Pydantic models for byoc-function-proto (uds_chunking.proto)
Auto-generated - validation rules from buf.validate
"""

from typing import (
Any,
Dict,
List,
Literal,
)

from pydantic import BaseModel, Field


class DocElement(BaseModel):
"""Document element to be chunked"""

text: str = Field(..., description="Text content to be chunked")
metadata: Dict[str, Any] = Field(
default_factory=dict, description="Source document metadata"
)


class ChunkOutput(BaseModel):
"""Output chunk from the chunking process"""

chunk_id: str = Field(..., description="UUID for this chunk")
chunk_type: str = Field(..., description="Type: 'text'")
text: str = Field(..., description="Chunk text content")
seq_no: int = Field(..., description="Sequential chunk number (1-based)")
metadata: Dict[str, str] = Field(
default_factory=dict, description="Metadata from source (DMO fields)"
)
tag_metadata: Dict[str, Any] = Field(
default_factory=dict, description="Additional tags"
)
citations: Dict[str, Any] = Field(
default_factory=dict, description="Citation information"
)


class StatusResponse(BaseModel):
"""Status response for operation"""

status_type: str = Field(..., description="'success' or 'error'")
status_message: str = Field(..., description="Human-readable status")


class UdsChunkingV1BatchRequest(BaseModel):
"""Batch request for UDS chunking"""

version: Literal["v1"] = Field(
default="v1", description="API version, must be 'v1'"
)
input: List[DocElement] = Field(
..., min_length=1, description="List of documents (min 1)"
)
max_characters: int = Field(..., description="Max chars per chunk (default: 100)")
additional_params: Dict[str, Any] = Field(
default_factory=dict, description="Future extension point"
)


class UdsChunkingV1BatchResponse(BaseModel):
"""Batch response for UDS chunking"""

version: Literal["v1"] = Field(
default="v1", description="API version, must be 'v1'"
)
output: List[ChunkOutput] = Field(
default_factory=list, description="Flat list of chunks from all docs"
)
status: StatusResponse = Field(..., description="Overall operation status")
77 changes: 77 additions & 0 deletions src/datacustomcode/function/runtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import threading
from typing import Optional

from datacustomcode.file.path.default import DefaultFindFilePath
from datacustomcode.function.base import BaseRuntime
from datacustomcode.llm_gateway.default import DefaultLLMGateway


class Runtime(BaseRuntime):
"""Client for Function code type.

NOTE: Do not instantiate this class directly.
It will be provided to your function by the SDK:

def function(request: dict, runtime: RunTime) -> dict:
response = {...}
return response

"""

_instance: Optional["Runtime"] = None
_lock = threading.Lock()

def __new__(cls):
"""Create singleton instance (thread-safe)."""
with cls._lock:
if cls._instance is not None:
raise RuntimeError(
"Runtime can only be instantiated once by the SDK.\n\n"
"Do not instantiate it yourself. Accept it as a parameter:\n\n"
" from datacustomcode.runtime.function.RunTime import Function\n"
" \n"
" def function(request: dict, runtime: Runtime) -> dict:\n"
" response = {...}\n"
" return response"
)
cls._instance = super().__new__(cls)
return cls._instance

def __init__(self) -> None:
# Prevent re-initialization
if hasattr(self, "_initialized"):
return

self._initialized = True

super().__init__()

# Initialize resources
self._llm_gateway = DefaultLLMGateway()
self._file = DefaultFindFilePath()

@property
def llm_gateway(self) -> DefaultLLMGateway:
"""Access LLM operations."""
return self._llm_gateway

@property
def file(self) -> DefaultFindFilePath:
"""Access file operations."""
return self._file
4 changes: 2 additions & 2 deletions src/datacustomcode/io/reader/query_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def read_dlo(
self, "_sf_cli_reader", None
)
if sf_cli_reader is not None:
return sf_cli_reader.read_dlo(name, schema)
return sf_cli_reader.read_dlo(name, schema) # type: ignore[no-any-return]

query = self._build_query(name)

Expand Down Expand Up @@ -230,7 +230,7 @@ def read_dmo(
self, "_sf_cli_reader", None
)
if sf_cli_reader is not None:
return sf_cli_reader.read_dmo(name, schema)
return sf_cli_reader.read_dmo(name, schema) # type: ignore[no-any-return]

query = self._build_query(name)

Expand Down
14 changes: 14 additions & 0 deletions src/datacustomcode/llm_gateway/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading
Loading