diff --git a/code_agent/utils/llm_clients/llm_client.py b/code_agent/utils/llm_clients/llm_client.py index e70bb5e..1bf630e 100644 --- a/code_agent/utils/llm_clients/llm_client.py +++ b/code_agent/utils/llm_clients/llm_client.py @@ -80,9 +80,12 @@ def __init__(self, model_config: ModelConfig) -> None: self.client = OpenRouterClient(model_config) case LLMProvider.OLLAMA: - from .ollama_client import OllamaClient + # Use Ollama's OpenAI-compatible /v1/chat/completions endpoint so that: + # - tool calling follows the same code path as other compatible providers + # - requests are visible in Ollama server logs (HTTP) + from .openai_compat_client import OpenAICompatClient - self.client = OllamaClient(model_config) + self.client = OpenAICompatClient(model_config, self.provider.value) case ( LLMProvider.DOUBAO | LLMProvider.DEEPSEEK diff --git a/code_agent/utils/llm_clients/ollama_client.py b/code_agent/utils/llm_clients/ollama_client.py index 2283696..b6f3fb0 100644 --- a/code_agent/utils/llm_clients/ollama_client.py +++ b/code_agent/utils/llm_clients/ollama_client.py @@ -7,6 +7,7 @@ """ import json +import os import uuid try: from typing import override @@ -15,7 +16,7 @@ def override(func): return func import openai -from ollama import chat as ollama_chat # pyright: ignore[reportUnknownVariableType] +from ollama import Client as OllamaSDKClient # pyright: ignore[reportUnknownVariableType] from openai.types.responses import ( FunctionToolParam, ResponseFunctionToolCallParam, @@ -34,13 +35,22 @@ class OllamaClient(BaseLLMClient): def __init__(self, model_config: ModelConfig): super().__init__(model_config) + configured_base_url = ( + model_config.model_provider.base_url + or os.getenv("OLLAMA_HOST") + or "http://localhost:11434" + ) + # The OpenAI-compatible endpoint often uses /v1, while ollama.Client + # expects the service host root. + if configured_base_url.endswith("/v1"): + configured_base_url = configured_base_url[:-3] + self.client: openai.OpenAI = openai.OpenAI( # by default ollama doesn't require any api key. It should set to be "ollama". api_key=self.api_key, - base_url=model_config.model_provider.base_url - if model_config.model_provider.base_url - else "http://localhost:11434/v1", + base_url=(configured_base_url.rstrip("/") + "/v1"), ) + self.ollama_client = OllamaSDKClient(host=configured_base_url) self.message_history: ResponseInputParam = [] @@ -80,7 +90,7 @@ def _create_ollama_response( } for tool in tool_schemas ] - return ollama_chat( + return self.ollama_client.chat( messages=self.message_history, model=model_config.model, tools=tools_param, diff --git a/code_agent/utils/llm_clients/retry_utils.py b/code_agent/utils/llm_clients/retry_utils.py index e259d78..bb40331 100644 --- a/code_agent/utils/llm_clients/retry_utils.py +++ b/code_agent/utils/llm_clients/retry_utils.py @@ -21,6 +21,8 @@ def retry_with( max_retries: int = 3, cancel_event: threading.Event | None = None, should_retry: Callable[[Exception], bool] | None = None, + min_sleep: float = 3.0, + max_sleep: float = 30.0, ) -> Callable[..., T]: """Retry logic with randomized backoff. @@ -54,9 +56,13 @@ def wrapper(*args: Any, **kwargs: Any) -> T: if should_retry is not None and not should_retry(e): raise - sleep_time = random.randint(3, 30) + lo = float(min_sleep) + hi = float(max_sleep) + if hi < lo: + lo, hi = hi, lo + sleep_time = random.uniform(lo, hi) logger.warning( - "%s API call failed: %s. Will sleep for %d seconds and retry.\n%s", + "%s API call failed: %s. Will sleep for %.2f seconds and retry.\n%s", provider_name, e, sleep_time, traceback.format_exc(), ) deadline = time.monotonic() + sleep_time