diff --git a/code_agent/utils/llm_clients/llm_client.py b/code_agent/utils/llm_clients/llm_client.py
index e70bb5e..1bf630e 100644
--- a/code_agent/utils/llm_clients/llm_client.py
+++ b/code_agent/utils/llm_clients/llm_client.py
@@ -80,9 +80,12 @@ def __init__(self, model_config: ModelConfig) -> None:
 
                 self.client = OpenRouterClient(model_config)
             case LLMProvider.OLLAMA:
-                from .ollama_client import OllamaClient
+                # Use Ollama's OpenAI-compatible /v1/chat/completions endpoint so that:
+                # - tool calling follows the same code path as other compatible providers
+                # - requests are visible in Ollama server logs (HTTP)
+                from .openai_compat_client import OpenAICompatClient
 
-                self.client = OllamaClient(model_config)
+                self.client = OpenAICompatClient(model_config, self.provider.value)
             case (
                 LLMProvider.DOUBAO
                 | LLMProvider.DEEPSEEK
diff --git a/code_agent/utils/llm_clients/ollama_client.py b/code_agent/utils/llm_clients/ollama_client.py
index 2283696..b6f3fb0 100644
--- a/code_agent/utils/llm_clients/ollama_client.py
+++ b/code_agent/utils/llm_clients/ollama_client.py
@@ -7,6 +7,7 @@
 """
 
 import json
+import os
 import uuid
 try:
     from typing import override
@@ -15,7 +16,7 @@ def override(func):
         return func
 
 import openai
-from ollama import chat as ollama_chat  # pyright: ignore[reportUnknownVariableType]
+from ollama import Client as OllamaSDKClient  # pyright: ignore[reportUnknownVariableType]
 from openai.types.responses import (
     FunctionToolParam,
     ResponseFunctionToolCallParam,
@@ -34,13 +35,22 @@ class OllamaClient(BaseLLMClient):
     def __init__(self, model_config: ModelConfig):
         super().__init__(model_config)
 
+        configured_base_url = (
+            model_config.model_provider.base_url
+            or os.getenv("OLLAMA_HOST")
+            or "http://localhost:11434"
+        )
+        # The OpenAI-compatible endpoint often uses /v1, while ollama.Client
+        # expects the service host root.
+        if configured_base_url.endswith("/v1"):
+            configured_base_url = configured_base_url[:-3]
+
         self.client: openai.OpenAI = openai.OpenAI(
             # by default ollama doesn't require any api key. It should set to be "ollama".
             api_key=self.api_key,
-            base_url=model_config.model_provider.base_url
-            if model_config.model_provider.base_url
-            else "http://localhost:11434/v1",
+            base_url=(configured_base_url.rstrip("/") + "/v1"),
         )
+        self.ollama_client = OllamaSDKClient(host=configured_base_url)
 
         self.message_history: ResponseInputParam = []
 
@@ -80,7 +90,7 @@ def _create_ollama_response(
                 }
                 for tool in tool_schemas
             ]
-        return ollama_chat(
+        return self.ollama_client.chat(
             messages=self.message_history,
             model=model_config.model,
             tools=tools_param,
diff --git a/code_agent/utils/llm_clients/retry_utils.py b/code_agent/utils/llm_clients/retry_utils.py
index e259d78..bb40331 100644
--- a/code_agent/utils/llm_clients/retry_utils.py
+++ b/code_agent/utils/llm_clients/retry_utils.py
@@ -21,6 +21,8 @@ def retry_with(
     max_retries: int = 3,
     cancel_event: threading.Event | None = None,
     should_retry: Callable[[Exception], bool] | None = None,
+    min_sleep: float = 3.0,
+    max_sleep: float = 30.0,
 ) -> Callable[..., T]:
     """Retry logic with randomized backoff.
 
@@ -54,9 +56,13 @@ def wrapper(*args: Any, **kwargs: Any) -> T:
                 if should_retry is not None and not should_retry(e):
                     raise
 
-                sleep_time = random.randint(3, 30)
+                lo = float(min_sleep)
+                hi = float(max_sleep)
+                if hi < lo:
+                    lo, hi = hi, lo
+                sleep_time = random.uniform(lo, hi)
                 logger.warning(
-                    "%s API call failed: %s. Will sleep for %d seconds and retry.\n%s",
+                    "%s API call failed: %s. Will sleep for %.2f seconds and retry.\n%s",
                     provider_name, e, sleep_time, traceback.format_exc(),
                 )
                 deadline = time.monotonic() + sleep_time