Latest version of Unsloth Studio - Linux Fedora43
Attached Video for easier understanding of the issue.
Ctx can't be pushed further than 8k although nvidia-smi reports 14/16GB usage + System RAM spill acknowledged but logs show its running with --fit off
Screencast.From.2026-04-11.09-49-27.mp4
Server Logs start to end:
{"timestamp": "2026-04-11T06:15:59.751543Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/auth/login", "status_code": 401, "process_time_ms": 7.59}
{"timestamp": "2026-04-11T06:16:04.165769Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/auth/login", "status_code": 200, "process_time_ms": 38.72}
{"timestamp": "2026-04-11T06:16:04.171886Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.44}
{"timestamp": "2026-04-11T06:16:04.173385Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.34}
{"timestamp": "2026-04-11T06:16:04.178944Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/auth/refresh", "status_code": 200, "process_time_ms": 0.72}
{"timestamp": "2026-04-11T06:16:04.185375Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.56}
{"timestamp": "2026-04-11T06:16:06.663939Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/studio", "status_code": 200, "process_time_ms": 0.47}
{"timestamp": "2026-04-11T06:16:06.686881Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/health", "status_code": 200, "process_time_ms": 0.25}
{"timestamp": "2026-04-11T06:16:06.697734Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.56}
{"timestamp": "2026-04-11T06:16:07.068395Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 4.04}
{"timestamp": "2026-04-11T06:16:07.069207Z", "level": "info", "event": "TrainingBackend initialized (subprocess mode)"}
{"timestamp": "2026-04-11T06:16:07.083851Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.53}
{"timestamp": "2026-04-11T06:16:12.253230Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.63}
{"timestamp": "2026-04-11T06:16:12.275022Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.39}
{"timestamp": "2026-04-11T06:16:12.335652Z", "level": "info", "event": "InferenceOrchestrator initialized (subprocess mode)"}
{"timestamp": "2026-04-11T06:16:12.621841Z", "level": "info", "event": "Top GGUF models: ['unsloth/Qwen3.5-35B-A3B-GGUF', 'unsloth/gemma-4-26B-A4B-it-GGUF', 'unsloth/Qwen3.5-9B-GGUF', 'unsloth/gemma-4-31B-it-GGUF', 'unsloth/gemma-4-E4B-it-GGUF', 'unsloth/Qwen3.5-27B-GGUF', 'unsloth/Qwen3.5-4B-GGUF', 'unsloth/gemma-4-E2B-it-GGUF', 'unsloth/Qwen3.5-122B-A10B-GGUF', 'unsloth/Qwen3.5-2B-GGUF', 'unsloth/Qwen3.5-0.8B-GGUF', 'unsloth/LTX-2.3-GGUF', 'unsloth/Qwen3-Coder-Next-GGUF', 'unsloth/gpt-oss-20b-GGUF', 'unsloth/Nemotron-3-Nano-30B-A3B-GGUF', 'unsloth/gpt-oss-120b-GGUF', 'unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF', 'unsloth/GLM-4.7-Flash-GGUF', 'unsloth/gemma-3-27b-it-GGUF', 'unsloth/gemma-3-12b-it-GGUF', 'unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-GGUF', 'unsloth/Qwen-Image-Edit-2511-GGUF', 'unsloth/Qwen3-VL-4B-Instruct-GGUF', 'unsloth/Qwen3.5-397B-A17B-GGUF', 'unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF', 'unsloth/Qwen2.5-VL-7B-Instruct-GGUF', 'unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF', 'unsloth/gemma-3-4b-it-GGUF', 'unsloth/Llama-3.2-1B-Instruct-GGUF', 'unsloth/gemma-3-270m-it-GGUF', 'unsloth/MiniMax-M2.5-GGUF', 'unsloth/Qwen3-VL-8B-Instruct-GGUF', 'unsloth/Qwen3-4B-Instruct-2507-GGUF']"}
{"timestamp": "2026-04-11T06:16:12.621894Z", "level": "info", "event": "Top hub models: ['unsloth/mistral-7b-v0.3-bnb-4bit', 'unsloth/Llama-3.1-8B-Instruct', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit', 'unsloth/Qwen3-0.6B', 'unsloth/Qwen3-0.6B-unsloth-bnb-4bit', 'unsloth/GLM-4.7-Flash', 'unsloth/DeepSeek-OCR-2', 'unsloth/Qwen2.5-7B-Instruct', 'unsloth/Qwen3.5-9B', 'unsloth/gpt-oss-20b-unsloth-bnb-4bit', 'unsloth/Qwen2.5-7B-Instruct-bnb-4bit', 'unsloth/Qwen3-14B-unsloth-bnb-4bit', 'unsloth/Qwen3.5-4B', 'unsloth/Mistral-Small-3.2-24B-Instruct-2506-bnb-4bit', 'unsloth/Meta-Llama-3.1-8B-Instruct', 'unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit', 'unsloth/GLM-4.7-Flash-FP8-Dynamic', 'unsloth/Qwen2.5-7B', 'unsloth/Qwen3.5-2B', 'unsloth/Qwen3-1.7B-unsloth-bnb-4bit', 'unsloth/Llama-3.2-1B-Instruct', 'unsloth/gpt-oss-20b', 'unsloth/Llama-3.2-3B-Instruct', 'unsloth/Qwen3-VL-4B-Instruct', 'unsloth/Qwen2.5-3B-Instruct-unsloth-bnb-4bit', 'unsloth/Qwen3-8B-unsloth-bnb-4bit', 'unsloth/gpt-oss-120b-BF16', 'unsloth/Qwen3.5-0.8B', 'unsloth/Qwen2-7B', 'unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit', 'unsloth/Qwen2.5-7B-Instruct-unsloth-bnb-4bit', 'unsloth/Qwen2.5-0.5B-unsloth-bnb-4bit', 'unsloth/gpt-oss-20b-BF16', 'unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit', 'unsloth/Llama-3.2-1B', 'unsloth/Qwen3-4B-bnb-4bit', 'unsloth/Qwen3-VL-2B-Instruct-unsloth-bnb-4bit', 'unsloth/llama-3-8b-bnb-4bit', 'unsloth/Qwen3-1.7B', 'unsloth/gemma-4-31B-it-unsloth-bnb-4bit']"}
{"timestamp": "2026-04-11T06:16:12.624416Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:16:12.624468Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:16:12.624627Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 289.74}
{"timestamp": "2026-04-11T06:16:12.624729Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 289.81}
{"timestamp": "2026-04-11T06:16:12.626276Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 2.39}
{"timestamp": "2026-04-11T06:16:12.626439Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.88}
{"timestamp": "2026-04-11T06:16:17.675784Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 14.65}
{"timestamp": "2026-04-11T06:16:17.682411Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 19.18}
{"timestamp": "2026-04-11T06:16:17.682533Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-gguf", "status_code": 200, "process_time_ms": 19.27}
{"timestamp": "2026-04-11T06:16:17.682719Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-models", "status_code": 200, "process_time_ms": 6.97}
{"timestamp": "2026-04-11T06:16:17.687398Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.29}
{"timestamp": "2026-04-11T06:16:17.687588Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.7}
^[n^N{"timestamp": "2026-04-11T06:16:45.404505Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.11}
{"timestamp": "2026-04-11T06:16:45.404678Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.64}
{"timestamp": "2026-04-11T06:16:49.187256Z", "level": "info", "event": "Scan folder added: /mnt/Data/lmstudio"}
{"timestamp": "2026-04-11T06:16:49.187407Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/models/scan-folders", "status_code": 201, "process_time_ms": 37.6}
{"timestamp": "2026-04-11T06:16:49.191744Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.27}
{"timestamp": "2026-04-11T06:16:49.191911Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.64}
{"timestamp": "2026-04-11T06:16:49.194391Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.4}
{"timestamp": "2026-04-11T06:16:54.248245Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/chat", "status_code": 200, "process_time_ms": 0.58}
{"timestamp": "2026-04-11T06:16:54.262840Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/health", "status_code": 200, "process_time_ms": 0.24}
{"timestamp": "2026-04-11T06:16:54.272677Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.48}
{"timestamp": "2026-04-11T06:16:54.593967Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:16:54.594032Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:16:54.594160Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 2.67}
{"timestamp": "2026-04-11T06:16:54.594267Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 2.74}
{"timestamp": "2026-04-11T06:16:54.596579Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 3.05}
{"timestamp": "2026-04-11T06:16:54.596781Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.64}
{"timestamp": "2026-04-11T06:16:55.484133Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 33.06}
{"timestamp": "2026-04-11T06:16:55.489699Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 12.07}
{"timestamp": "2026-04-11T06:16:55.489813Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-gguf", "status_code": 200, "process_time_ms": 12.15}
{"timestamp": "2026-04-11T06:16:55.490011Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-models", "status_code": 200, "process_time_ms": 6.03}
{"timestamp": "2026-04-11T06:16:55.494807Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.69}
{"timestamp": "2026-04-11T06:16:55.495000Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.75}
{"timestamp": "2026-04-11T06:17:07.686963Z", "level": "info", "event": "Scan folder removed: id=1"}
{"timestamp": "2026-04-11T06:17:07.687118Z", "level": "info", "event": "request_completed", "method": "DELETE", "path": "/api/models/scan-folders/1", "status_code": 200, "process_time_ms": 34.32}
{"timestamp": "2026-04-11T06:17:07.690429Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.87}
{"timestamp": "2026-04-11T06:17:07.692327Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.93}
{"timestamp": "2026-04-11T06:17:07.694257Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.61}
{"timestamp": "2026-04-11T06:17:17.808649Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.99}
{"timestamp": "2026-04-11T06:17:17.808774Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 2.09}
{"timestamp": "2026-04-11T06:17:22.892741Z", "level": "info", "event": "Scan folder added: /mnt/Data/lmstudio/models"}
{"timestamp": "2026-04-11T06:17:22.892893Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/models/scan-folders", "status_code": 201, "process_time_ms": 36.13}
{"timestamp": "2026-04-11T06:17:22.897465Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.23}
{"timestamp": "2026-04-11T06:17:22.898101Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.66}
{"timestamp": "2026-04-11T06:17:22.900574Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.82}
{"timestamp": "2026-04-11T06:17:25.659418Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/gguf-variants", "status_code": 200, "process_time_ms": 1.56}
{"timestamp": "2026-04-11T06:17:27.120652Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:27.120813Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.89}
{"timestamp": "2026-04-11T06:17:27.129673Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:27.192127Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:17:27.192164Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:17:27.192177Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:17:27.192197Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:17:27.213913Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 56.7 GB, context: 262144, GPUs free: [(0, 15838)], selected: None, fit: True"}
{"timestamp": "2026-04-11T06:17:27.213978Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:17:27.214000Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 56991 -c 262144 --parallel 1 --flash-attn on --fit on --jinja --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:17:33.303649Z", "level": "info", "event": "llama-server ready on port 56991 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:17:33.303788Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:33.310313Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:33.311843Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 6183.29}
{"timestamp": "2026-04-11T06:17:33.317161Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:33.318260Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 3.78}
{"timestamp": "2026-04-11T06:17:33.318624Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:17:33.318658Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:17:33.318789Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.0}
{"timestamp": "2026-04-11T06:17:33.318913Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.68}
{"timestamp": "2026-04-11T06:17:46.136237Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:46.136386Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.72}
{"timestamp": "2026-04-11T06:17:46.306021Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:46.306076Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:46.306219Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 164.33}
{"timestamp": "2026-04-11T06:17:46.308688Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:46.375421Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:17:46.375453Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:17:46.375464Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:17:46.375481Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:17:46.387124Z", "level": "info", "event": "Context auto-reduced: 262144 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:17:46.387175Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:17:46.387191Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:17:46.387206Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:17:46.387221Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 47417 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:17:48.407667Z", "level": "info", "event": "llama-server ready on port 47417 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:17:48.407807Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:48.414302Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:48.415694Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2107.55}
{"timestamp": "2026-04-11T06:17:48.426497Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:48.428029Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 9.33}
{"timestamp": "2026-04-11T06:17:48.428546Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:17:48.428591Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:17:48.428763Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 8.43}
{"timestamp": "2026-04-11T06:17:48.428924Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.93}
{"timestamp": "2026-04-11T06:18:09.203741Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/v1/chat/completions", "status_code": 200, "process_time_ms": 17.75}
{"timestamp": "2026-04-11T06:45:47.128534Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:45:47.128738Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.97}
{"timestamp": "2026-04-11T06:45:48.202268Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:45:48.202341Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:45:48.202504Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 1065.8}
{"timestamp": "2026-04-11T06:45:48.205336Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:45:48.254023Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:45:48.254062Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:45:48.254075Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:45:48.254094Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:45:48.267038Z", "level": "info", "event": "Context auto-reduced: 48128 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:45:48.267088Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:45:48.267103Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:45:48.267116Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:45:48.267131Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 56807 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:45:50.287039Z", "level": "info", "event": "llama-server ready on port 56807 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:45:50.287176Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:45:50.293336Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:45:50.294727Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2090.09}
{"timestamp": "2026-04-11T06:45:50.300724Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:45:50.302173Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 4.83}
{"timestamp": "2026-04-11T06:45:50.302676Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:45:50.302722Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:45:50.302894Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.9}
{"timestamp": "2026-04-11T06:45:50.303056Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.92}
{"timestamp": "2026-04-11T06:46:20.204042Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:46:20.204225Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.93}
{"timestamp": "2026-04-11T06:46:21.276104Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:46:21.276170Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:46:21.276324Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 1065.73}
{"timestamp": "2026-04-11T06:46:21.279187Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:46:21.346696Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:46:21.346733Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:46:21.346747Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:46:21.346766Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:46:21.358417Z", "level": "info", "event": "Context auto-reduced: 49152 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:46:21.358469Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:46:21.358484Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:46:21.358499Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:46:21.358515Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 58769 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:46:22.871975Z", "level": "info", "event": "llama-server ready on port 58769 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:46:22.872112Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:46:22.878117Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:46:22.879522Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 1601.03}
{"timestamp": "2026-04-11T06:46:22.885358Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:46:22.887041Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:46:22.887085Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:46:22.887179Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 5.18}
{"timestamp": "2026-04-11T06:46:22.887368Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.69}
{"timestamp": "2026-04-11T06:46:22.887453Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 3.74}
{"timestamp": "2026-04-11T06:47:41.070396Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/v1/chat/completions", "status_code": 200, "process_time_ms": 0.81}
{"timestamp": "2026-04-11T06:47:46.621347Z", "level": "info", "event": "Parsed 1 tool call(s) from structured delta"}
{"timestamp": "2026-04-11T06:47:46.621992Z", "level": "info", "event": "execute_tool: name=web_search, session_id=None, timeout=300"}
{"timestamp": "2026-04-11T06:49:07.110109Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/chat", "status_code": 200, "process_time_ms": 0.47}
{"timestamp": "2026-04-11T06:49:07.137798Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/health", "status_code": 200, "process_time_ms": 0.22}
{"timestamp": "2026-04-11T06:49:07.149265Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.58}
{"timestamp": "2026-04-11T06:49:07.474435Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:07.475900Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 5.01}
{"timestamp": "2026-04-11T06:49:07.476407Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:49:07.476459Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:49:07.479277Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 6.66}
{"timestamp": "2026-04-11T06:49:07.479498Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 3.65}
{"timestamp": "2026-04-11T06:49:07.479590Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 3.72}
{"timestamp": "2026-04-11T06:49:16.406132Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:16.406283Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.73}
{"timestamp": "2026-04-11T06:49:17.477461Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:17.477511Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:17.477631Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 1065.29}
{"timestamp": "2026-04-11T06:49:17.479900Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:17.527174Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:49:17.527208Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:49:17.527221Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:49:17.527241Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:49:17.538491Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:49:17.538533Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:49:17.538552Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:49:17.538570Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 37253 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:49:19.557901Z", "level": "info", "event": "llama-server ready on port 37253 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:49:19.558043Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:19.564084Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:19.565467Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2086.1}
{"timestamp": "2026-04-11T06:49:19.570389Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:19.571471Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 3.63}
{"timestamp": "2026-04-11T06:49:19.571868Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:49:19.571907Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:49:19.572042Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 2.98}
{"timestamp": "2026-04-11T06:49:19.572167Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.72}
{"timestamp": "2026-04-11T06:49:32.304883Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/v1/chat/completions", "status_code": 200, "process_time_ms": 0.83}
{"timestamp": "2026-04-11T06:49:40.263912Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:40.264062Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.73}
{"timestamp": "2026-04-11T06:49:40.434767Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:40.434819Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:40.434950Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 164.26}
{"timestamp": "2026-04-11T06:49:40.437735Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:40.504999Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:49:40.505035Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:49:40.505049Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:49:40.505068Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:49:40.516316Z", "level": "info", "event": "Context auto-reduced: 20480 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:49:40.516357Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:49:40.516373Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:49:40.516387Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:49:40.516402Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 37459 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:49:42.535261Z", "level": "info", "event": "llama-server ready on port 37459 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:49:42.535404Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:42.541588Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:42.543017Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2105.87}
{"timestamp": "2026-04-11T06:49:42.548926Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:42.550337Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 4.7}
{"timestamp": "2026-04-11T06:49:42.550860Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:49:42.550906Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:49:42.551072Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.82}
{"timestamp": "2026-04-11T06:49:42.551230Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.93}
Latest version of Unsloth Studio - Linux Fedora43
Attached Video for easier understanding of the issue.
Ctx can't be pushed further than 8k although nvidia-smi reports 14/16GB usage + System RAM spill acknowledged but logs show its running with --fit off
Screencast.From.2026-04-11.09-49-27.mp4
Server Logs start to end:
{"timestamp": "2026-04-11T06:15:59.751543Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/auth/login", "status_code": 401, "process_time_ms": 7.59}
{"timestamp": "2026-04-11T06:16:04.165769Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/auth/login", "status_code": 200, "process_time_ms": 38.72}
{"timestamp": "2026-04-11T06:16:04.171886Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.44}
{"timestamp": "2026-04-11T06:16:04.173385Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.34}
{"timestamp": "2026-04-11T06:16:04.178944Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/auth/refresh", "status_code": 200, "process_time_ms": 0.72}
{"timestamp": "2026-04-11T06:16:04.185375Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.56}
{"timestamp": "2026-04-11T06:16:06.663939Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/studio", "status_code": 200, "process_time_ms": 0.47}
{"timestamp": "2026-04-11T06:16:06.686881Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/health", "status_code": 200, "process_time_ms": 0.25}
{"timestamp": "2026-04-11T06:16:06.697734Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.56}
{"timestamp": "2026-04-11T06:16:07.068395Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 4.04}
{"timestamp": "2026-04-11T06:16:07.069207Z", "level": "info", "event": "TrainingBackend initialized (subprocess mode)"}
{"timestamp": "2026-04-11T06:16:07.083851Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.53}
{"timestamp": "2026-04-11T06:16:12.253230Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.63}
{"timestamp": "2026-04-11T06:16:12.275022Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.39}
{"timestamp": "2026-04-11T06:16:12.335652Z", "level": "info", "event": "InferenceOrchestrator initialized (subprocess mode)"}
{"timestamp": "2026-04-11T06:16:12.621841Z", "level": "info", "event": "Top GGUF models: ['unsloth/Qwen3.5-35B-A3B-GGUF', 'unsloth/gemma-4-26B-A4B-it-GGUF', 'unsloth/Qwen3.5-9B-GGUF', 'unsloth/gemma-4-31B-it-GGUF', 'unsloth/gemma-4-E4B-it-GGUF', 'unsloth/Qwen3.5-27B-GGUF', 'unsloth/Qwen3.5-4B-GGUF', 'unsloth/gemma-4-E2B-it-GGUF', 'unsloth/Qwen3.5-122B-A10B-GGUF', 'unsloth/Qwen3.5-2B-GGUF', 'unsloth/Qwen3.5-0.8B-GGUF', 'unsloth/LTX-2.3-GGUF', 'unsloth/Qwen3-Coder-Next-GGUF', 'unsloth/gpt-oss-20b-GGUF', 'unsloth/Nemotron-3-Nano-30B-A3B-GGUF', 'unsloth/gpt-oss-120b-GGUF', 'unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF', 'unsloth/GLM-4.7-Flash-GGUF', 'unsloth/gemma-3-27b-it-GGUF', 'unsloth/gemma-3-12b-it-GGUF', 'unsloth/NVIDIA-Nemotron-3-Super-120B-A12B-GGUF', 'unsloth/Qwen-Image-Edit-2511-GGUF', 'unsloth/Qwen3-VL-4B-Instruct-GGUF', 'unsloth/Qwen3.5-397B-A17B-GGUF', 'unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF', 'unsloth/Qwen2.5-VL-7B-Instruct-GGUF', 'unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF', 'unsloth/gemma-3-4b-it-GGUF', 'unsloth/Llama-3.2-1B-Instruct-GGUF', 'unsloth/gemma-3-270m-it-GGUF', 'unsloth/MiniMax-M2.5-GGUF', 'unsloth/Qwen3-VL-8B-Instruct-GGUF', 'unsloth/Qwen3-4B-Instruct-2507-GGUF']"}
{"timestamp": "2026-04-11T06:16:12.621894Z", "level": "info", "event": "Top hub models: ['unsloth/mistral-7b-v0.3-bnb-4bit', 'unsloth/Llama-3.1-8B-Instruct', 'unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit', 'unsloth/Qwen3-0.6B', 'unsloth/Qwen3-0.6B-unsloth-bnb-4bit', 'unsloth/GLM-4.7-Flash', 'unsloth/DeepSeek-OCR-2', 'unsloth/Qwen2.5-7B-Instruct', 'unsloth/Qwen3.5-9B', 'unsloth/gpt-oss-20b-unsloth-bnb-4bit', 'unsloth/Qwen2.5-7B-Instruct-bnb-4bit', 'unsloth/Qwen3-14B-unsloth-bnb-4bit', 'unsloth/Qwen3.5-4B', 'unsloth/Mistral-Small-3.2-24B-Instruct-2506-bnb-4bit', 'unsloth/Meta-Llama-3.1-8B-Instruct', 'unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit', 'unsloth/GLM-4.7-Flash-FP8-Dynamic', 'unsloth/Qwen2.5-7B', 'unsloth/Qwen3.5-2B', 'unsloth/Qwen3-1.7B-unsloth-bnb-4bit', 'unsloth/Llama-3.2-1B-Instruct', 'unsloth/gpt-oss-20b', 'unsloth/Llama-3.2-3B-Instruct', 'unsloth/Qwen3-VL-4B-Instruct', 'unsloth/Qwen2.5-3B-Instruct-unsloth-bnb-4bit', 'unsloth/Qwen3-8B-unsloth-bnb-4bit', 'unsloth/gpt-oss-120b-BF16', 'unsloth/Qwen3.5-0.8B', 'unsloth/Qwen2-7B', 'unsloth/Llama-3.2-1B-Instruct-unsloth-bnb-4bit', 'unsloth/Qwen2.5-7B-Instruct-unsloth-bnb-4bit', 'unsloth/Qwen2.5-0.5B-unsloth-bnb-4bit', 'unsloth/gpt-oss-20b-BF16', 'unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit', 'unsloth/Llama-3.2-1B', 'unsloth/Qwen3-4B-bnb-4bit', 'unsloth/Qwen3-VL-2B-Instruct-unsloth-bnb-4bit', 'unsloth/llama-3-8b-bnb-4bit', 'unsloth/Qwen3-1.7B', 'unsloth/gemma-4-31B-it-unsloth-bnb-4bit']"}
{"timestamp": "2026-04-11T06:16:12.624416Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:16:12.624468Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:16:12.624627Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 289.74}
{"timestamp": "2026-04-11T06:16:12.624729Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 289.81}
{"timestamp": "2026-04-11T06:16:12.626276Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 2.39}
{"timestamp": "2026-04-11T06:16:12.626439Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.88}
{"timestamp": "2026-04-11T06:16:17.675784Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 14.65}
{"timestamp": "2026-04-11T06:16:17.682411Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 19.18}
{"timestamp": "2026-04-11T06:16:17.682533Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-gguf", "status_code": 200, "process_time_ms": 19.27}
{"timestamp": "2026-04-11T06:16:17.682719Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-models", "status_code": 200, "process_time_ms": 6.97}
{"timestamp": "2026-04-11T06:16:17.687398Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.29}
{"timestamp": "2026-04-11T06:16:17.687588Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.7}
^[n^N{"timestamp": "2026-04-11T06:16:45.404505Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.11}
{"timestamp": "2026-04-11T06:16:45.404678Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.64}
{"timestamp": "2026-04-11T06:16:49.187256Z", "level": "info", "event": "Scan folder added: /mnt/Data/lmstudio"}
{"timestamp": "2026-04-11T06:16:49.187407Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/models/scan-folders", "status_code": 201, "process_time_ms": 37.6}
{"timestamp": "2026-04-11T06:16:49.191744Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.27}
{"timestamp": "2026-04-11T06:16:49.191911Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.64}
{"timestamp": "2026-04-11T06:16:49.194391Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.4}
{"timestamp": "2026-04-11T06:16:54.248245Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/chat", "status_code": 200, "process_time_ms": 0.58}
{"timestamp": "2026-04-11T06:16:54.262840Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/health", "status_code": 200, "process_time_ms": 0.24}
{"timestamp": "2026-04-11T06:16:54.272677Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.48}
{"timestamp": "2026-04-11T06:16:54.593967Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:16:54.594032Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:16:54.594160Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 2.67}
{"timestamp": "2026-04-11T06:16:54.594267Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 2.74}
{"timestamp": "2026-04-11T06:16:54.596579Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 3.05}
{"timestamp": "2026-04-11T06:16:54.596781Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.64}
{"timestamp": "2026-04-11T06:16:55.484133Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 33.06}
{"timestamp": "2026-04-11T06:16:55.489699Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 12.07}
{"timestamp": "2026-04-11T06:16:55.489813Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-gguf", "status_code": 200, "process_time_ms": 12.15}
{"timestamp": "2026-04-11T06:16:55.490011Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/cached-models", "status_code": 200, "process_time_ms": 6.03}
{"timestamp": "2026-04-11T06:16:55.494807Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.69}
{"timestamp": "2026-04-11T06:16:55.495000Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.75}
{"timestamp": "2026-04-11T06:17:07.686963Z", "level": "info", "event": "Scan folder removed: id=1"}
{"timestamp": "2026-04-11T06:17:07.687118Z", "level": "info", "event": "request_completed", "method": "DELETE", "path": "/api/models/scan-folders/1", "status_code": 200, "process_time_ms": 34.32}
{"timestamp": "2026-04-11T06:17:07.690429Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.87}
{"timestamp": "2026-04-11T06:17:07.692327Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.93}
{"timestamp": "2026-04-11T06:17:07.694257Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.61}
{"timestamp": "2026-04-11T06:17:17.808649Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.99}
{"timestamp": "2026-04-11T06:17:17.808774Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 2.09}
{"timestamp": "2026-04-11T06:17:22.892741Z", "level": "info", "event": "Scan folder added: /mnt/Data/lmstudio/models"}
{"timestamp": "2026-04-11T06:17:22.892893Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/models/scan-folders", "status_code": 201, "process_time_ms": 36.13}
{"timestamp": "2026-04-11T06:17:22.897465Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 2.23}
{"timestamp": "2026-04-11T06:17:22.898101Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/scan-folders", "status_code": 200, "process_time_ms": 0.66}
{"timestamp": "2026-04-11T06:17:22.900574Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 1.82}
{"timestamp": "2026-04-11T06:17:25.659418Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/gguf-variants", "status_code": 200, "process_time_ms": 1.56}
{"timestamp": "2026-04-11T06:17:27.120652Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:27.120813Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.89}
{"timestamp": "2026-04-11T06:17:27.129673Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:27.192127Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:17:27.192164Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:17:27.192177Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:17:27.192197Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:17:27.213913Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 56.7 GB, context: 262144, GPUs free: [(0, 15838)], selected: None, fit: True"}
{"timestamp": "2026-04-11T06:17:27.213978Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:17:27.214000Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 56991 -c 262144 --parallel 1 --flash-attn on --fit on --jinja --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:17:33.303649Z", "level": "info", "event": "llama-server ready on port 56991 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:17:33.303788Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:33.310313Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:33.311843Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 6183.29}
{"timestamp": "2026-04-11T06:17:33.317161Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:33.318260Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 3.78}
{"timestamp": "2026-04-11T06:17:33.318624Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:17:33.318658Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:17:33.318789Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.0}
{"timestamp": "2026-04-11T06:17:33.318913Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.68}
{"timestamp": "2026-04-11T06:17:46.136237Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:46.136386Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.72}
{"timestamp": "2026-04-11T06:17:46.306021Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:46.306076Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:46.306219Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 164.33}
{"timestamp": "2026-04-11T06:17:46.308688Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:17:46.375421Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:17:46.375453Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:17:46.375464Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:17:46.375481Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:17:46.387124Z", "level": "info", "event": "Context auto-reduced: 262144 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:17:46.387175Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:17:46.387191Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:17:46.387206Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:17:46.387221Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 47417 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:17:48.407667Z", "level": "info", "event": "llama-server ready on port 47417 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:17:48.407807Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:17:48.414302Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:48.415694Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2107.55}
{"timestamp": "2026-04-11T06:17:48.426497Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:17:48.428029Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 9.33}
{"timestamp": "2026-04-11T06:17:48.428546Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:17:48.428591Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:17:48.428763Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 8.43}
{"timestamp": "2026-04-11T06:17:48.428924Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.93}
{"timestamp": "2026-04-11T06:18:09.203741Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/v1/chat/completions", "status_code": 200, "process_time_ms": 17.75}
{"timestamp": "2026-04-11T06:45:47.128534Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:45:47.128738Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.97}
{"timestamp": "2026-04-11T06:45:48.202268Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:45:48.202341Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:45:48.202504Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 1065.8}
{"timestamp": "2026-04-11T06:45:48.205336Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:45:48.254023Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:45:48.254062Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:45:48.254075Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:45:48.254094Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:45:48.267038Z", "level": "info", "event": "Context auto-reduced: 48128 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:45:48.267088Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:45:48.267103Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:45:48.267116Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:45:48.267131Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 56807 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:45:50.287039Z", "level": "info", "event": "llama-server ready on port 56807 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:45:50.287176Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:45:50.293336Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:45:50.294727Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2090.09}
{"timestamp": "2026-04-11T06:45:50.300724Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:45:50.302173Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 4.83}
{"timestamp": "2026-04-11T06:45:50.302676Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:45:50.302722Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:45:50.302894Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.9}
{"timestamp": "2026-04-11T06:45:50.303056Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.92}
{"timestamp": "2026-04-11T06:46:20.204042Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:46:20.204225Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.93}
{"timestamp": "2026-04-11T06:46:21.276104Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:46:21.276170Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:46:21.276324Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 1065.73}
{"timestamp": "2026-04-11T06:46:21.279187Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:46:21.346696Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:46:21.346733Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:46:21.346747Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:46:21.346766Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:46:21.358417Z", "level": "info", "event": "Context auto-reduced: 49152 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:46:21.358469Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:46:21.358484Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:46:21.358499Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:46:21.358515Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 58769 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:46:22.871975Z", "level": "info", "event": "llama-server ready on port 58769 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:46:22.872112Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:46:22.878117Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:46:22.879522Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 1601.03}
{"timestamp": "2026-04-11T06:46:22.885358Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:46:22.887041Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:46:22.887085Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:46:22.887179Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 5.18}
{"timestamp": "2026-04-11T06:46:22.887368Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.69}
{"timestamp": "2026-04-11T06:46:22.887453Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 3.74}
{"timestamp": "2026-04-11T06:47:41.070396Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/v1/chat/completions", "status_code": 200, "process_time_ms": 0.81}
{"timestamp": "2026-04-11T06:47:46.621347Z", "level": "info", "event": "Parsed 1 tool call(s) from structured delta"}
{"timestamp": "2026-04-11T06:47:46.621992Z", "level": "info", "event": "execute_tool: name=web_search, session_id=None, timeout=300"}
{"timestamp": "2026-04-11T06:49:07.110109Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/chat", "status_code": 200, "process_time_ms": 0.47}
{"timestamp": "2026-04-11T06:49:07.137798Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/health", "status_code": 200, "process_time_ms": 0.22}
{"timestamp": "2026-04-11T06:49:07.149265Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/auth/status", "status_code": 200, "process_time_ms": 0.58}
{"timestamp": "2026-04-11T06:49:07.474435Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:07.475900Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 5.01}
{"timestamp": "2026-04-11T06:49:07.476407Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:49:07.476459Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:49:07.479277Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 6.66}
{"timestamp": "2026-04-11T06:49:07.479498Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 3.65}
{"timestamp": "2026-04-11T06:49:07.479590Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/local", "status_code": 200, "process_time_ms": 3.72}
{"timestamp": "2026-04-11T06:49:16.406132Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:16.406283Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.73}
{"timestamp": "2026-04-11T06:49:17.477461Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:17.477511Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:17.477631Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 1065.29}
{"timestamp": "2026-04-11T06:49:17.479900Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:17.527174Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:49:17.527208Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:49:17.527221Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:49:17.527241Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:49:17.538491Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:49:17.538533Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:49:17.538552Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:49:17.538570Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 37253 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:49:19.557901Z", "level": "info", "event": "llama-server ready on port 37253 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:49:19.558043Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:19.564084Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:19.565467Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2086.1}
{"timestamp": "2026-04-11T06:49:19.570389Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:19.571471Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 3.63}
{"timestamp": "2026-04-11T06:49:19.571868Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:49:19.571907Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:49:19.572042Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 2.98}
{"timestamp": "2026-04-11T06:49:19.572167Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.72}
{"timestamp": "2026-04-11T06:49:32.304883Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/v1/chat/completions", "status_code": 200, "process_time_ms": 0.83}
{"timestamp": "2026-04-11T06:49:40.263912Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:40.264062Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/validate", "status_code": 200, "process_time_ms": 0.73}
{"timestamp": "2026-04-11T06:49:40.434767Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:40.434819Z", "level": "info", "event": "Unloaded GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:40.434950Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/unload", "status_code": 200, "process_time_ms": 164.26}
{"timestamp": "2026-04-11T06:49:40.437735Z", "level": "info", "event": "Detected local GGUF model: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf"}
{"timestamp": "2026-04-11T06:49:40.504999Z", "level": "info", "event": "GGUF metadata: context_length=262144"}
{"timestamp": "2026-04-11T06:49:40.505035Z", "level": "info", "event": "GGUF metadata: chat_template=12045 chars"}
{"timestamp": "2026-04-11T06:49:40.505049Z", "level": "info", "event": "GGUF metadata: model supports reasoning (enable_thinking)"}
{"timestamp": "2026-04-11T06:49:40.505068Z", "level": "info", "event": "GGUF metadata: model supports tool calling"}
{"timestamp": "2026-04-11T06:49:40.516316Z", "level": "info", "event": "Context auto-reduced: 20480 -> 8960 (model: 12.5 GB, est. KV cache: 1.4 GB)"}
{"timestamp": "2026-04-11T06:49:40.516357Z", "level": "info", "event": "GGUF size: 12.5 GB, est. KV cache: 1.4 GB, context: 8960, GPUs free: [(0, 15838)], selected: [0], fit: False"}
{"timestamp": "2026-04-11T06:49:40.516373Z", "level": "info", "event": "KV cache type: q8_0"}
{"timestamp": "2026-04-11T06:49:40.516387Z", "level": "info", "event": "Reasoning model: enable_thinking=True by default"}
{"timestamp": "2026-04-11T06:49:40.516402Z", "level": "info", "event": "Starting llama-server: /home/yk/.unsloth/llama.cpp/llama-server -m /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF/gemma-4-26B-A4B-it-UD-IQ4_NL.gguf --port 37459 -c 8960 --parallel 1 --flash-attn on -ngl -1 --jinja --cache-type-k q8_0 --cache-type-v q8_0 --spec-type ngram-mod --spec-ngram-size-n 24 --draft-min 48 --draft-max 64 --chat-template-kwargs {"enable_thinking": true}"}
{"timestamp": "2026-04-11T06:49:42.535261Z", "level": "info", "event": "llama-server ready on port 37459 for model '/mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF'"}
{"timestamp": "2026-04-11T06:49:42.535404Z", "level": "info", "event": "Loaded GGUF model via llama-server: /mnt/Data/lmstudio/models/unsloth/gemma-4-26B-A4B-it-GGUF"}
{"timestamp": "2026-04-11T06:49:42.541588Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:42.543017Z", "level": "info", "event": "request_completed", "method": "POST", "path": "/api/inference/load", "status_code": 200, "process_time_ms": 2105.87}
{"timestamp": "2026-04-11T06:49:42.548926Z", "level": "info", "event": "Loaded default model defaults from /home/yk/.unsloth/studio/unsloth_studio/lib/python3.13/site-packages/studio/backend/assets/configs/model_defaults/default.yaml"}
{"timestamp": "2026-04-11T06:49:42.550337Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/list", "status_code": 200, "process_time_ms": 4.7}
{"timestamp": "2026-04-11T06:49:42.550860Z", "level": "info", "event": "Found 0 trained LoRA adapters in /home/yk/.unsloth/studio/outputs"}
{"timestamp": "2026-04-11T06:49:42.550906Z", "level": "info", "event": "Found 0 exported models in /home/yk/.unsloth/studio/exports"}
{"timestamp": "2026-04-11T06:49:42.551072Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/inference/status", "status_code": 200, "process_time_ms": 3.82}
{"timestamp": "2026-04-11T06:49:42.551230Z", "level": "info", "event": "request_completed", "method": "GET", "path": "/api/models/loras", "status_code": 200, "process_time_ms": 0.93}