From 2c35b393c82dd740c60db8f2f74777f487847974 Mon Sep 17 00:00:00 2001 From: Tai An Date: Mon, 20 Apr 2026 17:00:15 -0700 Subject: [PATCH] fix: improve error message when model loading fails When llama_model_load_from_file returns None, the error was a generic 'Failed to load model from file'. With verbose=False (the default), the llama.cpp log is suppressed, leaving users with no actionable information. The error message now: - includes the file size in GB to help diagnose OOM - lists common causes (insufficient RAM/VRAM, unsupported format, corrupt file) - suggests verbose=True to see the full llama.cpp error log - suggests n_gpu_layers=-1 for GPU offloading Fixes #2145 --- llama_cpp/_internals.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index cde52c8c8..9ffabdf46 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -56,7 +56,19 @@ def __init__( ) if model is None: - raise ValueError(f"Failed to load model from file: {path_model}") + try: + size_hint = f" (file size: {os.path.getsize(path_model) / (1024**3):.1f} GB)" + except OSError: + size_hint = "" + raise ValueError( + f"Failed to load model from file: {path_model}{size_hint}. +" + "Common causes: insufficient RAM or VRAM for the model size, " + "unsupported quantization format, or corrupt file. +" + "Tip: set verbose=True to see the full llama.cpp log, " + "or use n_gpu_layers=-1 to offload layers to GPU." + ) vocab = llama_cpp.llama_model_get_vocab(model)