diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index cde52c8c8..9ffabdf46 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -56,7 +56,19 @@ def __init__( ) if model is None: - raise ValueError(f"Failed to load model from file: {path_model}") + try: + size_hint = f" (file size: {os.path.getsize(path_model) / (1024**3):.1f} GB)" + except OSError: + size_hint = "" + raise ValueError( + f"Failed to load model from file: {path_model}{size_hint}. +" + "Common causes: insufficient RAM or VRAM for the model size, " + "unsupported quantization format, or corrupt file. +" + "Tip: set verbose=True to see the full llama.cpp log, " + "or use n_gpu_layers=-1 to offload layers to GPU." + ) vocab = llama_cpp.llama_model_get_vocab(model)