{"meta":{"opus-4.8":{"disp":"Opus 4.8","kind":"API ref","compose":null,"hf":null,"dl":"claude -p --model claude-opus-4-8 (Claude Code as agent)"},"sonnet-4.5":{"disp":"Sonnet 4.5","kind":"API ref","compose":null,"hf":null,"dl":"claude -p --model claude-sonnet-4-5-20250929"},"sonnet-5":{"disp":"Sonnet 5","kind":"API ref","compose":null,"hf":null,"dl":"claude -p --model claude-sonnet-5"},"fable-5":{"disp":"Fable 5","kind":"API ref","compose":null,"hf":null,"dl":"claude -p --model claude-fable-5"},"deepseek-v4-flash":{"disp":"DeepSeek V4 Flash (Think-High)","kind":"vLLM b12x","compose":"~/vllm/compose-DeepSeek-V4-Flash-b12x.yml","hf":"deepseek-ai/DeepSeek-V4-Flash","dl":"hf download deepseek-ai/DeepSeek-V4-Flash --local-dir ~/models/DeepSeek-V4-Flash"},"deepseek-v4-max":{"disp":"DeepSeek V4 Flash (Think-Max)","kind":"vLLM b12x","compose":"~/vllm/compose-DeepSeek-V4-Flash-b12x-MAX.yml","hf":"deepseek-ai/DeepSeek-V4-Flash","dl":"(same weights as High; reasoning_effort=max)"},"mimo-v2.5":{"disp":"MiMo-V2.5","kind":"llama.cpp","compose":"~/llama/compose-MiMo-V2.5-IQ4_XS.yml","hf":"unsloth/MiMo-V2.5-GGUF (UD-IQ4_XS)","dl":"hf download unsloth/MiMo-V2.5-GGUF --include 'UD-IQ4_XS/*' --local-dir ~/models/MiMo-V2.5"},"step-3.7-flash":{"disp":"Step 3.7 Flash","kind":"llama.cpp","compose":"~/llama/compose-Step-3.7-Flash-Q6_K.yml","hf":"unsloth/Step-3.7-Flash-GGUF (UD-Q6_K)","dl":"hf download unsloth/Step-3.7-Flash-GGUF --include 'UD-Q6_K/*' --local-dir ~/models/Step-3.7-Flash"},"qwen3.6-27b-int4":{"disp":"Qwen3.6-27B INT4","kind":"vLLM","compose":"~/vllm/compose-Qwen3.6-27B-AutoRound-INT4.yml","hf":"Lorbus/Qwen3.6-27B-int4-AutoRound","dl":"hf download Lorbus/Qwen3.6-27B-int4-AutoRound --local-dir ~/models/Qwen3.6-27B-AutoRound-INT4"},"qwen3.6-35b-awq":{"disp":"Qwen3.6-35B-A3B AWQ","kind":"vLLM","compose":"~/vllm/compose-Qwen3.6-35B-A3B-AWQ.yml","hf":"QuantTrio/Qwen3.6-35B-A3B-AWQ","dl":"hf download QuantTrio/Qwen3.6-35B-A3B-AWQ --local-dir ~/models/Qwen3.6-35B-A3B-AWQ"},"minimax-m3-iq3s":{"disp":"MiniMax M3 IQ3_S","kind":"llama.cpp","compose":"~/llama/compose-MiniMax-M3-IQ3_S.yml","hf":"unsloth/MiniMax-M3-GGUF (UD-IQ3_S)","dl":"hf download unsloth/MiniMax-M3-GGUF --include 'UD-IQ3_S/*' --local-dir ~/models/MiniMax-M3"},"minimax-m2.7-q5":{"disp":"MiniMax M2.7 (Q5)","kind":"llama.cpp","compose":"~/llama/compose-MiniMax-M2.7-Q5.yml","hf":"unsloth/MiniMax-M2.7-GGUF (UD-Q5_K_XL)","dl":"hf download unsloth/MiniMax-M2.7-GGUF --include 'UD-Q5_K_XL/*' --local-dir ~/models/MiniMax-M27"},"huihui-q8":{"disp":"Huihui Qwen3.6-35B abliterated (Q8)","kind":"llama.cpp","compose":"~/llama/compose-Huihui-Qwen3.6-35B-A3B-abliterated-Q8.yml","hf":"huihui-ai/Huihui-Qwen3.6-35B-A3B-Claude-4.7-Opus-abliterated-MTP-GGUF","dl":"hf download huihui-ai/Huihui-Qwen3.6-35B-A3B-Claude-4.7-Opus-abliterated-MTP-GGUF --include '*Q8_0*' --local-dir ~/models/Huihui-Qwen3.6-35B-A3B-Claude-4.7-Opus-abliterated-Q8"}},"order":["opus-4.8","fable-5","sonnet-5","deepseek-v4-flash","qwen3.6-35b-awq","huihui-q8","qwen3.6-27b-int4","mimo-v2.5","deepseek-v4-max","sonnet-4.5","minimax-m2.7-q5","minimax-m3-iq3s","step-3.7-flash"],"tasks":["cand-01","cand-02","cand-03","cand-04","cand-05","cand-06","cand-07","cand-08"],"weights":{"deepseek-v4-flash":150,"deepseek-v4-max":150,"mimo-v2.5":140,"step-3.7-flash":156,"qwen3.6-27b-int4":18,"qwen3.6-35b-awq":25,"minimax-m3-iq3s":163,"huihui-q8":38,"minimax-m2.7-q5":158},"disc":["cand-01","cand-02","cand-03","cand-05","cand-06","cand-08"],"pub":true,"sections":[{"id":"score","label":"Scorecard","group":"Results"},{"id":"models","label":"Models","group":"Results"},{"id":"speed","label":"Speed","group":"Results"},{"id":"corr","label":"Time per task","group":"Results"},{"id":"tokens","label":"Effort","group":"Results"},{"id":"vram","label":"VRAM","group":"Results"},{"id":"tasks","label":"The tasks","group":"Method"},{"id":"repro","label":"Reproducibility","group":"Method"},{"id":"coverage","label":"Coverage","group":"Method"},{"id":"about","label":"About","group":"Method"}],"short":{"opus-4.8":"Opus 4.8","sonnet-4.5":"Sonnet 4.5","sonnet-5":"Sonnet 5","fable-5":"Fable 5","deepseek-v4-flash":"DS V4 High","deepseek-v4-max":"DS V4 Max","mimo-v2.5":"MiMo 2.5","step-3.7-flash":"Step 3.7","qwen3.6-27b-int4":"Qwen 27B","qwen3.6-35b-awq":"Qwen 35B","huihui-q8":"HuiHui 35B","minimax-m3-iq3s":"M3","minimax-m2.7-q5":"M2.7"},"scorecard":{"opus-4.8":{"tasks":{"cand-01":{"tiers":["silver","silver","silver"],"best":"silver","n":3,"avg_wall":465},"cand-02":{"tiers":["gold","gold","gold","gold","gold","gold","gold","gold"],"best":"gold","n":8,"avg_wall":263},"cand-03":{"tiers":["gold","gold","gold","silver","gold","gold","gold","gold"],"best":"gold","n":8,"avg_wall":177},"cand-04":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":100},"cand-05":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":126},"cand-06":{"tiers":["gold","fail","fail","silver","fail","fail","gold","silver"],"best":"gold","n":8,"avg_wall":756},"cand-07":{"tiers":["core","gold","gold"],"best":"gold","n":3,"avg_wall":204},"cand-08":{"tiers":["gold","gold","gold","gold","gold","gold","gold","gold"],"best":"gold","n":8,"avg_wall":371}},"tally":{"silver":1,"gold":7},"avg_wall":308},"fable-5":{"tasks":{"cand-01":{"tiers":["silver","silver","gold"],"best":"gold","n":3,"avg_wall":280},"cand-02":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":232},"cand-03":{"tiers":["gold","silver","fail"],"best":"gold","n":3,"avg_wall":55},"cand-04":{"tiers":["gold"],"best":"gold","n":1,"avg_wall":92},"cand-05":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":203},"cand-06":{"tiers":["silver","silver"],"best":"silver","n":2,"avg_wall":197},"cand-07":{"tiers":["core"],"best":"core","n":1,"avg_wall":146},"cand-08":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":377}},"tally":{"gold":6,"silver":1,"core":1},"avg_wall":198},"sonnet-5":{"tasks":{"cand-01":{"tiers":["fail","gold","gold"],"best":"gold","n":3,"avg_wall":522},"cand-02":{"tiers":["gold","gold","silver"],"best":"gold","n":3,"avg_wall":314},"cand-03":{"tiers":["silver","fail","fail"],"best":"silver","n":3,"avg_wall":900},"cand-04":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":91},"cand-05":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":233},"cand-06":{"tiers":["silver","silver","silver"],"best":"silver","n":3,"avg_wall":724},"cand-07":{"tiers":["core","core","core"],"best":"core","n":3,"avg_wall":213},"cand-08":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":622}},"tally":{"gold":5,"silver":2,"core":1},"avg_wall":452},"deepseek-v4-flash":{"tasks":{"cand-01":{"tiers":["core","core","silver"],"best":"silver","n":3,"avg_wall":147},"cand-02":{"tiers":["gold","fail","gold","silver","silver","gold","gold","gold","silver"],"best":"gold","n":9,"avg_wall":150},"cand-03":{"tiers":["gold","gold","silver","silver","gold","gold","gold","fail"],"best":"gold","n":8,"avg_wall":86},"cand-04":{"tiers":["gold","gold","gold","gold"],"best":"gold","n":4,"avg_wall":70},"cand-05":{"tiers":["silver","gold","silver","gold"],"best":"gold","n":4,"avg_wall":77},"cand-06":{"tiers":["fail","fail","silver","silver","fail","fail","fail","fail","silver"],"best":"silver","n":9,"avg_wall":617},"cand-07":{"tiers":["core","core","core","core"],"best":"core","n":4,"avg_wall":104},"cand-08":{"tiers":["gold","gold","gold","gold","gold","gold","gold","gold"],"best":"gold","n":8,"avg_wall":156}},"tally":{"silver":2,"gold":5,"core":1},"avg_wall":176},"qwen3.6-35b-awq":{"tasks":{"cand-01":{"tiers":["core","core","silver"],"best":"silver","n":3,"avg_wall":231},"cand-02":{"tiers":["silver","silver","fail","gold","core","silver","silver","fail"],"best":"gold","n":8,"avg_wall":212},"cand-03":{"tiers":["silver","fail","gold","silver","silver","silver","gold","silver"],"best":"gold","n":8,"avg_wall":87},"cand-04":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":53},"cand-05":{"tiers":["gold","silver","silver"],"best":"gold","n":3,"avg_wall":217},"cand-06":{"tiers":["fail","gold","silver","fail","silver","fail","fail","fail","silver"],"best":"gold","n":9,"avg_wall":790},"cand-07":{"tiers":["core","core","gold"],"best":"gold","n":3,"avg_wall":145},"cand-08":{"tiers":["gold","fail","silver","fail","gold","fail","gold","fail"],"best":"gold","n":8,"avg_wall":777}},"tally":{"silver":1,"gold":7},"avg_wall":314},"huihui-q8":{"tasks":{"cand-01":{"tiers":["fail","fail","core"],"best":"core","n":3,"avg_wall":137},"cand-02":{"tiers":["silver","silver","fail"],"best":"silver","n":3,"avg_wall":92},"cand-03":{"tiers":["fail","silver","silver"],"best":"silver","n":3,"avg_wall":104},"cand-04":{"tiers":["gold","silver","gold"],"best":"gold","n":3,"avg_wall":46},"cand-05":{"tiers":["gold","silver","silver"],"best":"gold","n":3,"avg_wall":168},"cand-06":{"tiers":["fail","fail","fail"],"best":"fail","n":3,"avg_wall":889},"cand-07":{"tiers":["core","fail","fail"],"best":"core","n":3,"avg_wall":344},"cand-08":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":689}},"tally":{"core":2,"silver":2,"gold":3,"fail":1},"avg_wall":309},"qwen3.6-27b-int4":{"tasks":{"cand-01":{"tiers":["core","core","silver"],"best":"silver","n":3,"avg_wall":384},"cand-02":{"tiers":["gold","gold","gold","silver","gold","gold","gold","gold"],"best":"gold","n":8,"avg_wall":410},"cand-03":{"tiers":["silver","gold","silver","silver","fail","gold","fail","fail"],"best":"gold","n":8,"avg_wall":164},"cand-04":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":103},"cand-05":{"tiers":["gold","gold","silver"],"best":"gold","n":3,"avg_wall":166},"cand-06":{"tiers":["fail","fail","fail","silver","silver","silver","fail","fail"],"best":"silver","n":8,"avg_wall":1249},"cand-07":{"tiers":["core","core","gold"],"best":"gold","n":3,"avg_wall":376},"cand-08":{"tiers":["gold","gold","fail","gold","fail","silver","gold","gold"],"best":"gold","n":8,"avg_wall":311}},"tally":{"silver":2,"gold":6},"avg_wall":395},"mimo-v2.5":{"tasks":{"cand-01":{"tiers":["fail","fail","fail"],"best":"fail","n":3,"avg_wall":1051},"cand-02":{"tiers":["gold","fail","gold","silver","fail","gold","gold","fail"],"best":"gold","n":8,"avg_wall":742},"cand-03":{"tiers":["fail","gold","fail","gold","gold","gold","silver","fail"],"best":"gold","n":8,"avg_wall":480},"cand-04":{"tiers":["gold","gold","gold","gold","gold"],"best":"gold","n":5,"avg_wall":126},"cand-05":{"tiers":["gold","gold","gold","gold","fail"],"best":"gold","n":5,"avg_wall":341},"cand-06":{"tiers":["fail","fail","fail","fail","fail","fail","fail","fail"],"best":"fail","n":8,"avg_wall":1176},"cand-07":{"tiers":["gold","core","core","core","core"],"best":"gold","n":5,"avg_wall":443},"cand-08":{"tiers":["fail","fail","gold","gold","gold","fail","fail","gold"],"best":"gold","n":8,"avg_wall":600}},"tally":{"fail":2,"gold":6},"avg_wall":620},"deepseek-v4-max":{"tasks":{"cand-01":{"tiers":["silver","core","silver","core"],"best":"silver","n":4,"avg_wall":189},"cand-02":{"tiers":["silver","gold","gold","silver"],"best":"gold","n":4,"avg_wall":207},"cand-03":{"tiers":["gold","gold","gold","silver"],"best":"gold","n":4,"avg_wall":194},"cand-04":{"tiers":["core","gold","gold","gold"],"best":"gold","n":4,"avg_wall":85},"cand-05":{"tiers":["gold","gold","gold","gold"],"best":"gold","n":4,"avg_wall":85},"cand-06":{"tiers":["silver","fail","fail","fail"],"best":"silver","n":4,"avg_wall":675},"cand-07":{"tiers":["core","core","core","gold"],"best":"gold","n":4,"avg_wall":156},"cand-08":{"tiers":["gold","gold","gold","gold"],"best":"gold","n":4,"avg_wall":269}},"tally":{"silver":2,"gold":6},"avg_wall":232},"sonnet-4.5":{"tasks":{"cand-01":{"tiers":["core","silver","core"],"best":"silver","n":3,"avg_wall":174},"cand-02":{"tiers":["gold","gold","core","silver","gold","silver","gold","silver"],"best":"gold","n":8,"avg_wall":151},"cand-03":{"tiers":["silver","silver","silver","silver","silver","silver","silver","gold"],"best":"gold","n":8,"avg_wall":112},"cand-04":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":80},"cand-05":{"tiers":["silver","gold","gold"],"best":"gold","n":3,"avg_wall":121},"cand-06":{"tiers":["fail","fail","fail","fail","fail","silver"],"best":"silver","n":6,"avg_wall":383},"cand-07":{"tiers":["gold","core","core"],"best":"gold","n":3,"avg_wall":192},"cand-08":{"tiers":["gold","gold","gold"],"best":"gold","n":3,"avg_wall":155}},"tally":{"silver":2,"gold":6},"avg_wall":171},"minimax-m2.7-q5":{"tasks":{"cand-01":{"tiers":["silver","core"],"best":"silver","n":2,"avg_wall":620},"cand-02":{"tiers":["core","silver"],"best":"silver","n":2,"avg_wall":224},"cand-03":{"tiers":["fail","silver"],"best":"silver","n":2,"avg_wall":268},"cand-04":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":96},"cand-05":{"tiers":["silver","silver"],"best":"silver","n":2,"avg_wall":178},"cand-06":{"tiers":["fail","silver"],"best":"silver","n":2,"avg_wall":1396},"cand-07":{"tiers":["silver","gold"],"best":"gold","n":2,"avg_wall":417},"cand-08":{"tiers":["gold","silver"],"best":"gold","n":2,"avg_wall":240}},"tally":{"silver":5,"gold":3},"avg_wall":430},"minimax-m3-iq3s":{"tasks":{"cand-01":{"tiers":["core","silver"],"best":"silver","n":2,"avg_wall":2764},"cand-02":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":1179},"cand-03":{"tiers":["fail","silver","gold"],"best":"gold","n":3,"avg_wall":2362},"cand-04":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":151},"cand-05":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":1036},"cand-06":{"tiers":["fail","fail"],"best":"fail","n":2,"avg_wall":4800},"cand-07":{"tiers":["gold","core"],"best":"gold","n":2,"avg_wall":1501},"cand-08":{"tiers":["silver","gold"],"best":"gold","n":2,"avg_wall":1058}},"tally":{"silver":1,"gold":6,"fail":1},"avg_wall":1856},"step-3.7-flash":{"tasks":{"cand-01":{"tiers":["core","silver"],"best":"silver","n":2,"avg_wall":432},"cand-02":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":1122},"cand-03":{"tiers":["fail","fail"],"best":"fail","n":2,"avg_wall":312},"cand-04":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":458},"cand-05":{"tiers":["gold","gold"],"best":"gold","n":2,"avg_wall":435},"cand-06":{"tiers":["silver","silver","fail"],"best":"silver","n":3,"avg_wall":2518},"cand-07":{"tiers":["fail","core","core"],"best":"core","n":3,"avg_wall":827},"cand-08":{"tiers":["fail","fail","fail"],"best":"fail","n":3,"avg_wall":939}},"tally":{"silver":2,"gold":3,"fail":2,"core":1},"avg_wall":880}},"ranking":{"opus-4.8":{"goldrate":74,"mean":2.51,"sd":0.95,"se":0.15,"lo":0,"hi":3,"n":38,"dist":{"gold":28,"silver":6,"core":0,"fail":4}},"fable-5":{"goldrate":65,"mean":2.53,"sd":0.78,"se":0.19,"lo":0,"hi":3,"n":17,"dist":{"gold":11,"silver":5,"core":0,"fail":1}},"sonnet-5":{"goldrate":56,"mean":2.22,"sd":1.08,"se":0.26,"lo":0,"hi":3,"n":18,"dist":{"gold":10,"silver":5,"core":0,"fail":3}},"deepseek-v4-flash":{"goldrate":49,"mean":2.04,"sd":1.14,"se":0.18,"lo":0,"hi":3,"n":41,"dist":{"gold":20,"silver":11,"core":2,"fail":8}},"qwen3.6-35b-awq":{"goldrate":21,"mean":1.46,"sd":1.11,"se":0.18,"lo":0,"hi":3,"n":39,"dist":{"gold":8,"silver":16,"core":3,"fail":12}},"huihui-q8":{"goldrate":22,"mean":1.33,"sd":1.17,"se":0.27,"lo":0,"hi":3,"n":18,"dist":{"gold":4,"silver":6,"core":1,"fail":7}},"qwen3.6-27b-int4":{"goldrate":42,"mean":1.84,"sd":1.23,"se":0.2,"lo":0,"hi":3,"n":38,"dist":{"gold":16,"silver":10,"core":2,"fail":10}},"mimo-v2.5":{"goldrate":40,"mean":1.3,"sd":1.45,"se":0.23,"lo":0,"hi":3,"n":40,"dist":{"gold":16,"silver":2,"core":0,"fail":22}},"deepseek-v4-max":{"goldrate":54,"mean":2.21,"sd":1.04,"se":0.21,"lo":0,"hi":3,"n":24,"dist":{"gold":13,"silver":6,"core":2,"fail":3}},"sonnet-4.5":{"goldrate":32,"mean":1.9,"sd":1.03,"se":0.18,"lo":0,"hi":3,"n":31,"dist":{"gold":10,"silver":13,"core":3,"fail":5}},"minimax-m2.7-q5":{"goldrate":8,"mean":1.58,"sd":0.86,"se":0.25,"lo":0,"hi":3,"n":12,"dist":{"gold":1,"silver":7,"core":2,"fail":2}},"minimax-m3-iq3s":{"goldrate":46,"mean":1.92,"sd":1.21,"se":0.33,"lo":0,"hi":3,"n":13,"dist":{"gold":6,"silver":3,"core":1,"fail":3}},"step-3.7-flash":{"goldrate":29,"mean":1.36,"sd":1.29,"se":0.34,"lo":0,"hi":3,"n":14,"dist":{"gold":4,"silver":3,"core":1,"fail":6}}},"ranking_all":{"opus-4.8":{"goldrate":75,"mean":2.53,"sd":0.93,"se":0.14,"lo":0,"hi":3,"n":44,"dist":{"gold":33,"silver":6,"core":1,"fail":4}},"fable-5":{"goldrate":63,"mean":2.47,"sd":0.82,"se":0.19,"lo":0,"hi":3,"n":19,"dist":{"gold":12,"silver":5,"core":1,"fail":1}},"sonnet-5":{"goldrate":54,"mean":2.17,"sd":1.07,"se":0.22,"lo":0,"hi":3,"n":24,"dist":{"gold":13,"silver":5,"core":3,"fail":3}},"deepseek-v4-flash":{"goldrate":49,"mean":2.03,"sd":1.12,"se":0.16,"lo":0,"hi":3,"n":49,"dist":{"gold":24,"silver":11,"core":6,"fail":8}},"qwen3.6-35b-awq":{"goldrate":27,"mean":1.58,"sd":1.13,"se":0.17,"lo":0,"hi":3,"n":45,"dist":{"gold":12,"silver":16,"core":5,"fail":12}},"huihui-q8":{"goldrate":25,"mean":1.38,"sd":1.19,"se":0.24,"lo":0,"hi":3,"n":24,"dist":{"gold":6,"silver":7,"core":2,"fail":9}},"qwen3.6-27b-int4":{"goldrate":45,"mean":1.91,"sd":1.2,"se":0.18,"lo":0,"hi":3,"n":44,"dist":{"gold":20,"silver":10,"core":4,"fail":10}},"mimo-v2.5":{"goldrate":44,"mean":1.48,"sd":1.42,"se":0.2,"lo":0,"hi":3,"n":50,"dist":{"gold":22,"silver":2,"core":4,"fail":22}},"deepseek-v4-max":{"goldrate":53,"mean":2.16,"sd":1.03,"se":0.18,"lo":0,"hi":3,"n":32,"dist":{"gold":17,"silver":6,"core":6,"fail":3}},"sonnet-4.5":{"goldrate":38,"mean":1.97,"sd":1.03,"se":0.17,"lo":0,"hi":3,"n":37,"dist":{"gold":14,"silver":13,"core":5,"fail":5}},"minimax-m2.7-q5":{"goldrate":25,"mean":1.88,"sd":0.93,"se":0.23,"lo":0,"hi":3,"n":16,"dist":{"gold":4,"silver":8,"core":2,"fail":2}},"minimax-m3-iq3s":{"goldrate":53,"mean":2.06,"sd":1.16,"se":0.28,"lo":0,"hi":3,"n":17,"dist":{"gold":9,"silver":3,"core":2,"fail":3}},"step-3.7-flash":{"goldrate":32,"mean":1.42,"sd":1.27,"se":0.29,"lo":0,"hi":3,"n":19,"dist":{"gold":6,"silver":3,"core":3,"fail":7}}},"tapers":{"qwen3.6-27b-int4":[{"target":4000,"prompt_tokens":3933,"completion_tokens":128,"ttft_s":2.47,"pp_tps":1593.7,"decode_tps":79.9},{"target":16000,"prompt_tokens":15699,"completion_tokens":128,"ttft_s":3.01,"pp_tps":5213.4,"decode_tps":77.8},{"target":32000,"prompt_tokens":31384,"completion_tokens":128,"ttft_s":4.41,"pp_tps":7119.9,"decode_tps":75.6},{"target":64000,"prompt_tokens":62756,"completion_tokens":128,"ttft_s":10.98,"pp_tps":5716.3,"decode_tps":71.3},{"target":100000,"prompt_tokens":98051,"completion_tokens":128,"ttft_s":15.98,"pp_tps":6137.6,"decode_tps":67.2},{"target":125000,"prompt_tokens":122560,"completion_tokens":128,"ttft_s":13.9,"pp_tps":8819.7,"decode_tps":64.8}],"qwen3.6-35b-awq":[{"target":4000,"prompt_tokens":3933,"completion_tokens":128,"ttft_s":1.89,"pp_tps":2077.8,"decode_tps":199.3},{"target":32000,"prompt_tokens":31384,"completion_tokens":128,"ttft_s":1.65,"pp_tps":19048.9,"decode_tps":192.6},{"target":64000,"prompt_tokens":62756,"completion_tokens":128,"ttft_s":2.89,"pp_tps":21708.5,"decode_tps":184.8},{"target":128000,"prompt_tokens":125503,"completion_tokens":128,"ttft_s":8.74,"pp_tps":14366.8,"decode_tps":176.2},{"target":200000,"prompt_tokens":196091,"completion_tokens":128,"ttft_s":14.99,"pp_tps":13083.5,"decode_tps":165.7},{"target":250000,"prompt_tokens":245110,"completion_tokens":128,"ttft_s":13.93,"pp_tps":17600.4,"decode_tps":158.2}],"deepseek-v4-flash":[{"target":4000,"prompt_tokens":3933,"completion_tokens":128,"ttft_s":1.12,"pp_tps":3510.1,"decode_tps":167.0},{"target":32000,"prompt_tokens":31384,"completion_tokens":128,"ttft_s":7.19,"pp_tps":4364.0,"decode_tps":170.8},{"target":64000,"prompt_tokens":62756,"completion_tokens":128,"ttft_s":8.73,"pp_tps":7191.3,"decode_tps":172.7},{"target":128000,"prompt_tokens":125503,"completion_tokens":128,"ttft_s":19.2,"pp_tps":6536.4,"decode_tps":175.5},{"target":256000,"prompt_tokens":250993,"completion_tokens":128,"ttft_s":44.44,"pp_tps":5648.2,"decode_tps":186.5},{"target":400000,"prompt_tokens":392170,"completion_tokens":128,"ttft_s":58.68,"pp_tps":6683.1,"decode_tps":180.6},{"target":490000,"prompt_tokens":480404,"completion_tokens":128,"ttft_s":42.49,"pp_tps":11307.0,"decode_tps":171.4}],"mimo-v2.5":[{"target":4000,"prompt_tokens":3933,"completion_tokens":128,"ttft_s":1.6,"pp_tps":2450.5,"decode_tps":107.1},{"target":16000,"prompt_tokens":15699,"completion_tokens":128,"ttft_s":3.53,"pp_tps":4449.2,"decode_tps":103.6},{"target":32000,"prompt_tokens":31384,"completion_tokens":128,"ttft_s":4.9,"pp_tps":6399.9,"decode_tps":100.3},{"target":64000,"prompt_tokens":62756,"completion_tokens":128,"ttft_s":10.67,"pp_tps":5884.1,"decode_tps":89.7},{"target":128000,"prompt_tokens":125503,"completion_tokens":128,"ttft_s":27.99,"pp_tps":4483.4,"decode_tps":72.3},{"target":180000,"prompt_tokens":176481,"completion_tokens":128,"ttft_s":31.13,"pp_tps":5668.7,"decode_tps":62.7}],"minimax-m3-iq3s":[{"target":4000,"prompt_tokens":3933,"completion_tokens":128,"ttft_s":3.24,"pp_tps":1214.9,"decode_tps":64.8},{"target":16000,"prompt_tokens":15699,"completion_tokens":128,"ttft_s":7.89,"pp_tps":1989.3,"decode_tps":50.0},{"target":32000,"prompt_tokens":31384,"completion_tokens":128,"ttft_s":11.4,"pp_tps":2752.9,"decode_tps":38.3},{"target":64000,"prompt_tokens":62756,"completion_tokens":128,"ttft_s":27.57,"pp_tps":2275.9,"decode_tps":26.3},{"target":128000,"prompt_tokens":125503,"completion_tokens":128,"ttft_s":95.98,"pp_tps":1307.6,"decode_tps":16.3},{"target":180000,"prompt_tokens":176481,"completion_tokens":128,"ttft_s":114.57,"pp_tps":1540.4,"decode_tps":12.3}],"huihui-q8":[{"target":4000,"prompt_tokens":3933,"completion_tokens":128,"ttft_s":0.76,"pp_tps":5205.3,"decode_tps":200.9},{"target":16000,"prompt_tokens":15699,"completion_tokens":128,"ttft_s":1.67,"pp_tps":9396.6,"decode_tps":190.6},{"target":32000,"prompt_tokens":31384,"completion_tokens":128,"ttft_s":2.43,"pp_tps":12938.0,"decode_tps":177.1},{"target":64000,"prompt_tokens":62756,"completion_tokens":128,"ttft_s":5.47,"pp_tps":11474.7,"decode_tps":156.1},{"target":128000,"prompt_tokens":125503,"completion_tokens":128,"ttft_s":15.16,"pp_tps":8279.3,"decode_tps":119.8},{"target":180000,"prompt_tokens":176481,"completion_tokens":128,"ttft_s":17.0,"pp_tps":10382.6,"decode_tps":99.9}],"minimax-m2.7-q5":[{"target":4000,"prompt_tokens":3934,"completion_tokens":128,"ttft_s":1.51,"pp_tps":2606.1,"decode_tps":94.5},{"target":16000,"prompt_tokens":15700,"completion_tokens":128,"ttft_s":4.13,"pp_tps":3802.5,"decode_tps":71.2},{"target":32000,"prompt_tokens":31385,"completion_tokens":128,"ttft_s":6.95,"pp_tps":4514.0,"decode_tps":53.5},{"target":64000,"prompt_tokens":62757,"completion_tokens":128,"ttft_s":23.75,"pp_tps":2642.8,"decode_tps":34.8},{"target":128000,"prompt_tokens":125504,"completion_tokens":128,"ttft_s":103.4,"pp_tps":1213.7,"decode_tps":20.3},{"target":180000,"prompt_tokens":176482,"completion_tokens":128,"ttft_s":132.71,"pp_tps":1329.8,"decode_tps":15.3}],"step-3.7-flash":[{"target":4000,"prompt_tokens":3934,"completion_tokens":128,"ttft_s":1.57,"pp_tps":2505.7,"decode_tps":86.9},{"target":16000,"prompt_tokens":15700,"completion_tokens":128,"ttft_s":3.7,"pp_tps":4243.4,"decode_tps":81.0},{"target":32000,"prompt_tokens":31385,"completion_tokens":128,"ttft_s":5.23,"pp_tps":5995.9,"decode_tps":74.3},{"target":64000,"prompt_tokens":62757,"completion_tokens":128,"ttft_s":12.43,"pp_tps":5046.8,"decode_tps":63.2},{"target":128000,"prompt_tokens":125504,"completion_tokens":128,"ttft_s":37.86,"pp_tps":3314.9,"decode_tps":47.9},{"target":180000,"prompt_tokens":176482,"completion_tokens":128,"ttft_s":42.63,"pp_tps":4139.6,"decode_tps":39.8}]},"vram":{"deepseek-v4-flash":{"total_gb":186.7,"g0_gb":93.4,"g1_gb":93.3,"cards":2},"mimo-v2.5":{"total_gb":144.2,"g0_gb":76.6,"g1_gb":67.6,"cards":2},"qwen3.6-27b-int4":{"total_gb":87.0,"g0_gb":87.0,"g1_gb":0.0,"cards":1},"qwen3.6-35b-awq":{"total_gb":86.2,"g0_gb":86.2,"g1_gb":0.0,"cards":1},"minimax-m3-iq3s":{"total_gb":177.9,"g0_gb":86.5,"g1_gb":91.4,"cards":2},"minimax-m2.7-q5":{"total_gb":185.1,"g0_gb":94.9,"g1_gb":90.2,"cards":2},"huihui-q8":{"total_gb":38.0,"g0_gb":38.0,"g1_gb":0.0,"cards":1},"step-3.7-flash":{"total_gb":161.1,"g0_gb":77.1,"g1_gb":84.0,"cards":2}},"composes":{"deepseek-v4-flash":{"path":"~/vllm/compose-DeepSeek-V4-Flash-b12x.yml","content":"# DeepSeek-V4-Flash - vLLM b12x (SM120 sparse MLA) on 2x RTX PRO 6000.\n# Image: voipmonitor/vllm b12x build (Martin Vit / local-inference-lab, upstream vLLM contributor).\n#   Third-party image, VETTED before use: statically inspected, entrypoint/flags confirmed via\n#   `docker inspect`, and egress-verified (forced offline; no phone-home observed at runtime).\n# Hardened for defence-in-depth:\n#   - weights mounted READ-ONLY, dedicated writable cache only\n#   - NO /mnt or home bind mounts (limits blast radius)\n#   - published port instead of --network host\n#   - HF/telemetry forced OFFLINE (any egress at runtime would be a red flag)\n# Needs both GPUs - stop any other model first. MTP=1 is the stable baseline.\n\nservices:\n  vllm:\n    image: voipmonitor/vllm:chthonic-consecration-f1190eab-b12x0ff2847-pr20-cu132\n    restart: \"no\"            # manual control during evaluation, no auto-restart\n    ipc: host\n    shm_size: \"32gb\"\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n      stack: 67108864\n    ports:\n      - \"8080:8080\"\n    environment:\n      # --- device / topology (from recipe) ---\n      - CUDA_VISIBLE_DEVICES=0,1\n      - CUDA_DEVICE_ORDER=PCI_BUS_ID\n      - CUTE_DSL_ARCH=sm_120a\n      - NCCL_IB_DISABLE=1\n      - NCCL_P2P_LEVEL=SYS\n      - NCCL_PROTO=LL,LL128,Simple\n      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\n      # --- b12x backend switches (required for the fast path) ---\n      - USES_B12X=True\n      - VLLM_USE_B12X_MOE=1\n      - VLLM_USE_B12X_FP8_GEMM=1\n      - VLLM_USE_B12X_WO_PROJECTION=1\n      - VLLM_USE_B12X_MHC=1\n      - VLLM_USE_B12X_SPARSE_INDEXER=1\n      - B12X_MHC_MAX_TOKENS=16384\n      - B12X_MLA_SM120_UNIFIED=1\n      - B12X_DENSE_SPLITK_TURBO=1\n      - B12X_W4A16_TC_DECODE=1\n      - VLLM_PCIE_ALLREDUCE_BACKEND=b12x\n      - VLLM_ENABLE_PCIE_ALLREDUCE=1\n      - VLLM_USE_V2_MODEL_RUNNER=1\n      - VLLM_USE_AOT_COMPILE=1\n      - VLLM_USE_MEGA_AOT_ARTIFACT=1\n      - VLLM_USE_BREAKABLE_CUDAGRAPH=0\n      - VLLM_USE_FLASHINFER_SAMPLER=1\n      - VLLM_MEMORY_PROFILE_INCLUDE_ATTN=1\n      - VLLM_PREFIX_CACHE_RETENTION_INTERVAL=4096\n      # --- hardening: force offline so any outbound connection is suspicious ---\n      - HF_HUB_OFFLINE=1\n      - TRANSFORMERS_OFFLINE=1\n      - HF_HUB_DISABLE_TELEMETRY=1\n      - DO_NOT_TRACK=1\n      - VLLM_NO_USAGE_STATS=1\n      - VLLM_DO_NOT_TRACK=1\n      - HF_HOME=/cache/hf   # writable; offline anyway. JIT caches use image's /cache/jit/* paths.\n    volumes:\n      - ~/models/DeepSeek-V4-Flash:/models/ds4:ro\n      - ~/vllm/ds4-cache:/cache\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0', '1']\n              capabilities: [gpu]\n    # Keep the image's nvidia_entrypoint.sh (CUDA setup); only replace the command it execs,\n    # exactly how Nepherpitu invokes it (image CMD run-kimi26-vllm is just a stub).\n    command:\n      - /bin/bash\n      - -lc\n      - >\n        unset NCCL_GRAPH_FILE NCCL_GRAPH_DUMP_FILE VLLM_B12X_MLA_EXTEND_MAX_CHUNKS;\n        exec /opt/venv/bin/python -m vllm.entrypoints.cli.main serve /models/ds4\n        --served-model-name deepseek-v4-flash latest /latest\n        --host 0.0.0.0 --port 8080\n        --tensor-parallel-size 2\n        --kv-cache-dtype fp8\n        --block-size 256\n        --load-format safetensors\n        --moe-backend b12x\n        --linear-backend b12x\n        --attention-backend B12X_MLA_SPARSE\n        --gpu-memory-utilization 0.95\n        --max-model-len 512000\n        --max-num-seqs 16\n        --max-num-batched-tokens 4096\n        --max-cudagraph-capture-size 192\n        --async-scheduling\n        --no-scheduler-reserve-full-isl\n        --enable-chunked-prefill\n        --enable-prefix-caching\n        --enable-flashinfer-autotune\n        --tokenizer-mode deepseek_v4\n        --tool-call-parser deepseek_v4\n        --enable-auto-tool-choice\n        --reasoning-parser deepseek_v4\n        --default-chat-template-kwargs.thinking=true\n        --default-chat-template-kwargs.reasoning_effort=high\n        --compilation-config='{\"cudagraph_mode\":\"FULL_AND_PIECEWISE\",\"custom_ops\":[\"all\"]}'\n        --speculative-config='{\"method\":\"mtp\",\"num_speculative_tokens\":1,\"draft_sample_method\":\"probabilistic\",\"moe_backend\":\"b12x\",\"use_local_argmax_reduction\":true}'\n","image":"voipmonitor/vllm:chthonic-consecration-f1190eab-b12x0ff2847-pr20-cu132"},"deepseek-v4-max":{"path":"~/vllm/compose-DeepSeek-V4-Flash-b12x-MAX.yml","content":"# DeepSeek-V4-Flash - vLLM b12x (SM120 sparse MLA) on 2x RTX PRO 6000\n# Image: voipmonitor/vllm b12x build (Martin Vit / local-inference-lab).\n#   -> UNVETTED THIRD-PARTY BINARY. Inspect statically before first run.\n# Adapted from u/Nepherpitu's llama-swap config (Reddit), HARDENED:\n#   - weights mounted READ-ONLY, dedicated writable cache only\n#   - NO /mnt or home bind mounts (limits blast radius)\n#   - published port instead of --network host\n#   - HF/telemetry forced OFFLINE -> any egress at runtime is a red flag\n# Run MTP=1 first (Nepherpitu's zero-issue config). Stop MiMo first (needs both GPUs).\n# NOTE: entrypoint/flags verified against `docker inspect` before first launch.\n\nservices:\n  vllm:\n    image: voipmonitor/vllm:chthonic-consecration-f1190eab-b12x0ff2847-pr20-cu132\n    restart: \"no\"            # manual control during evaluation, no auto-restart\n    ipc: host\n    shm_size: \"32gb\"\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n      stack: 67108864\n    ports:\n      - \"8080:8080\"\n    environment:\n      # --- device / topology (from recipe) ---\n      - CUDA_VISIBLE_DEVICES=0,1\n      - CUDA_DEVICE_ORDER=PCI_BUS_ID\n      - CUTE_DSL_ARCH=sm_120a\n      - NCCL_IB_DISABLE=1\n      - NCCL_P2P_LEVEL=SYS\n      - NCCL_PROTO=LL,LL128,Simple\n      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True\n      # --- b12x backend switches (required for the fast path) ---\n      - USES_B12X=True\n      - VLLM_USE_B12X_MOE=1\n      - VLLM_USE_B12X_FP8_GEMM=1\n      - VLLM_USE_B12X_WO_PROJECTION=1\n      - VLLM_USE_B12X_MHC=1\n      - VLLM_USE_B12X_SPARSE_INDEXER=1\n      - B12X_MHC_MAX_TOKENS=16384\n      - B12X_MLA_SM120_UNIFIED=1\n      - B12X_DENSE_SPLITK_TURBO=1\n      - B12X_W4A16_TC_DECODE=1\n      - VLLM_PCIE_ALLREDUCE_BACKEND=b12x\n      - VLLM_ENABLE_PCIE_ALLREDUCE=1\n      - VLLM_USE_V2_MODEL_RUNNER=1\n      - VLLM_USE_AOT_COMPILE=1\n      - VLLM_USE_MEGA_AOT_ARTIFACT=1\n      - VLLM_USE_BREAKABLE_CUDAGRAPH=0\n      - VLLM_USE_FLASHINFER_SAMPLER=1\n      - VLLM_MEMORY_PROFILE_INCLUDE_ATTN=1\n      - VLLM_PREFIX_CACHE_RETENTION_INTERVAL=4096\n      # --- hardening: force offline so any outbound connection is suspicious ---\n      - HF_HUB_OFFLINE=1\n      - TRANSFORMERS_OFFLINE=1\n      - HF_HUB_DISABLE_TELEMETRY=1\n      - DO_NOT_TRACK=1\n      - VLLM_NO_USAGE_STATS=1\n      - VLLM_DO_NOT_TRACK=1\n      - HF_HOME=/cache/hf   # writable; offline anyway. JIT caches use image's /cache/jit/* paths.\n    volumes:\n      - ~/models/DeepSeek-V4-Flash:/models/ds4:ro\n      - ~/vllm/ds4-cache:/cache\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0', '1']\n              capabilities: [gpu]\n    # Keep the image's nvidia_entrypoint.sh (CUDA setup); only replace the command it execs,\n    # exactly how Nepherpitu invokes it (image CMD run-kimi26-vllm is just a stub).\n    command:\n      - /bin/bash\n      - -lc\n      - >\n        unset NCCL_GRAPH_FILE NCCL_GRAPH_DUMP_FILE VLLM_B12X_MLA_EXTEND_MAX_CHUNKS;\n        exec /opt/venv/bin/python -m vllm.entrypoints.cli.main serve /models/ds4\n        --served-model-name deepseek-v4-flash latest /latest\n        --host 0.0.0.0 --port 8080\n        --tensor-parallel-size 2\n        --kv-cache-dtype fp8\n        --block-size 256\n        --load-format safetensors\n        --moe-backend b12x\n        --linear-backend b12x\n        --attention-backend B12X_MLA_SPARSE\n        --gpu-memory-utilization 0.95\n        --max-model-len 512000\n        --max-num-seqs 16\n        --max-num-batched-tokens 4096\n        --max-cudagraph-capture-size 192\n        --async-scheduling\n        --no-scheduler-reserve-full-isl\n        --enable-chunked-prefill\n        --enable-prefix-caching\n        --enable-flashinfer-autotune\n        --tokenizer-mode deepseek_v4\n        --tool-call-parser deepseek_v4\n        --enable-auto-tool-choice\n        --reasoning-parser deepseek_v4\n        --default-chat-template-kwargs.thinking=true\n        --default-chat-template-kwargs.reasoning_effort=max\n        --compilation-config='{\"cudagraph_mode\":\"FULL_AND_PIECEWISE\",\"custom_ops\":[\"all\"]}'\n        --speculative-config='{\"method\":\"mtp\",\"num_speculative_tokens\":1,\"draft_sample_method\":\"probabilistic\",\"moe_backend\":\"b12x\",\"use_local_argmax_reduction\":true}'\n","image":"voipmonitor/vllm:chthonic-consecration-f1190eab-b12x0ff2847-pr20-cu132"},"mimo-v2.5":{"path":"~/llama/compose-MiMo-V2.5-IQ4_XS.yml","content":"# MiMo-V2.5 UD-IQ4_XS\n# 310B total params, 15B active (MoE: 256 experts, 8 per token)\n# Hybrid attention: 5:1 SWA:GA ratio, 128 sliding window, GQA on GA / 4-KV on SWA\n# Native context limit is 1M (1,048,576), using 262144 (256K) here for memory budget\n# Recommended: temp=1.0, top_p=0.95 (per vLLM docs)\n# Unsloth Dynamic 2.0 quant with chat template fixes - requires --jinja\n# No vision/audio mmproj in this release - text only (omnimodal encoders not shipped as GGUF)\n# Quant ~40GB across 4 shards\n\nservices:\n  llama:\n    image: llama-cpp-minimax-m3:latest\n    restart: unless-stopped\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0', '1']\n              capabilities: [gpu]\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    volumes:\n      - ~/models/MiMo-V2.5:/models:ro\n    environment:\n      - CUDA_VISIBLE_DEVICES=0,1\n    command: >\n      --model /models/MiMo-V2.5-UD-IQ4_XS-00001-of-00004.gguf\n      --host 0.0.0.0\n      --port 8080\n      --alias \"mimo-v2.5\"\n      --jinja\n      --reasoning-format deepseek\n      --predict 30000\n      --threads 28\n      --n-gpu-layers 999\n      --split-mode layer\n      --tensor-split 1,1\n      --parallel 1\n      --ctx-size 200000\n      --cache-type-k q8_0\n      --cache-type-v q8_0\n      --flash-attn on\n      --no-mmap\n      --mlock\n      --temp 1.0\n      --top-p 0.95\n    ports:\n      - \"8080:8080\"\n","image":"llama-cpp-minimax-m3:latest","dockerfile":{"path":"~/llama/dockerfile-minimax-m3","content":"FROM nvidia/cuda:13.0.0-devel-ubuntu24.04 AS builder\n\nRUN apt-get update && apt-get install -y \\\n    build-essential \\\n    cmake \\\n    git \\\n    curl \\\n    libcurl4-openssl-dev \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /build\n# MiniMax M3 support is not yet upstream - it lives in PR #24523\n# (https://github.com/ggml-org/llama.cpp/pull/24523). Note: the experimental\n# GGUF is text-only and MiniMax Sparse Attention (MSA) is NOT supported, so\n# inference falls back to dense attention (keep --ctx-size modest).\nRUN git clone https://github.com/ggml-org/llama.cpp.git\nWORKDIR /build/llama.cpp\nRUN git fetch origin pull/24523/head:minimax-m3 && git checkout minimax-m3\n\n# sm_120 = RTX PRO 6000 Blackwell (CUDA toolkit >= 12.8; base image is 13.0).\nRUN cmake -B build \\\n        -DCMAKE_BUILD_TYPE=Release \\\n        -DGGML_CUDA=ON \\\n        -DCMAKE_CUDA_ARCHITECTURES=120 \\\n        -DLLAMA_CURL=ON \\\n        -DBUILD_SHARED_LIBS=OFF && \\\n    cmake --build build --config Release -j$(nproc) --target llama-server llama-cli\n\n# Runtime image\nFROM nvidia/cuda:13.0.0-runtime-ubuntu24.04\n\nRUN apt-get update && apt-get install -y \\\n    libgomp1 \\\n    libcurl4 \\\n    && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /build/llama.cpp/build/bin/llama* /usr/local/bin/\n\nVOLUME /models\nWORKDIR /app\n\nEXPOSE 8080\n\nENTRYPOINT [\"llama-server\"]\nCMD [\"--help\"]\n"}},"step-3.7-flash":{"path":"~/llama/compose-Step-3.7-Flash-Q6_K.yml","content":"# Step-3.7-Flash UD-Q6_K\n# 198B total params, ~11B active (sparse MoE) + 1.8B vision encoder\n# Native context limit is 262144 (256K)\n# Vision enabled via mmproj-step3.7-flash-f16.gguf (upload images via OpenAI API)\n# Three selectable reasoning levels (low / medium / high) - set client-side via system prompt\n# Recommended: temp=1.0 (per llama.cpp Quickstart)\n# Built from the stepfun-ai/llama.cpp fork on branch step3.7 (not yet upstream) - see dockerfile-step3.7\n# Quant ~165GB across 5 shards - needs significant VRAM + RAM budget\n\nservices:\n  llama:\n    image: llama-cpp-minimax-m3:latest\n    restart: unless-stopped\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0', '1']\n              capabilities: [gpu]\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    volumes:\n      - ~/models/Step-3.7-Flash:/models:ro\n    environment:\n      - CUDA_VISIBLE_DEVICES=0,1\n    command: >\n      --model /models/Step-3.7-Flash-UD-Q6_K-00001-of-00005.gguf\n      --host 0.0.0.0\n      --port 8080\n      --alias \"step-3.7-flash\"\n      --jinja\n      --reasoning-format deepseek\n      --predict 30000\n      --threads 28\n      --n-gpu-layers 999\n      --split-mode layer\n      --tensor-split 1,1\n      --parallel 1\n      --ctx-size 200000\n      --cache-type-k q8_0\n      --cache-type-v q8_0\n      --flash-attn on\n      --no-mmap\n      --mlock\n      --temp 1.0\n    ports:\n      - \"8080:8080\"\n","image":"llama-cpp-minimax-m3:latest","dockerfile":{"path":"~/llama/dockerfile-minimax-m3","content":"FROM nvidia/cuda:13.0.0-devel-ubuntu24.04 AS builder\n\nRUN apt-get update && apt-get install -y \\\n    build-essential \\\n    cmake \\\n    git \\\n    curl \\\n    libcurl4-openssl-dev \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /build\n# MiniMax M3 support is not yet upstream - it lives in PR #24523\n# (https://github.com/ggml-org/llama.cpp/pull/24523). Note: the experimental\n# GGUF is text-only and MiniMax Sparse Attention (MSA) is NOT supported, so\n# inference falls back to dense attention (keep --ctx-size modest).\nRUN git clone https://github.com/ggml-org/llama.cpp.git\nWORKDIR /build/llama.cpp\nRUN git fetch origin pull/24523/head:minimax-m3 && git checkout minimax-m3\n\n# sm_120 = RTX PRO 6000 Blackwell (CUDA toolkit >= 12.8; base image is 13.0).\nRUN cmake -B build \\\n        -DCMAKE_BUILD_TYPE=Release \\\n        -DGGML_CUDA=ON \\\n        -DCMAKE_CUDA_ARCHITECTURES=120 \\\n        -DLLAMA_CURL=ON \\\n        -DBUILD_SHARED_LIBS=OFF && \\\n    cmake --build build --config Release -j$(nproc) --target llama-server llama-cli\n\n# Runtime image\nFROM nvidia/cuda:13.0.0-runtime-ubuntu24.04\n\nRUN apt-get update && apt-get install -y \\\n    libgomp1 \\\n    libcurl4 \\\n    && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /build/llama.cpp/build/bin/llama* /usr/local/bin/\n\nVOLUME /models\nWORKDIR /app\n\nEXPOSE 8080\n\nENTRYPOINT [\"llama-server\"]\nCMD [\"--help\"]\n"}},"qwen3.6-27b-int4":{"path":"~/vllm/compose-Qwen3.6-27B-AutoRound-INT4.yml","content":"# Qwen3.6-27B AutoRound INT4 - vLLM 0.17.1 single GPU\n# Source model: Lorbus/Qwen3.6-27B-int4-AutoRound\n# Reference: Important_Quote_1180 reddit comment (RTX 3090, 82 TPS)\n#\n# Stable-flags-first approach. Bleeding-edge features from the reference\n# (turboquant_3bit_nc KV, MTP speculative) need vLLM nightly + model heads\n# we can't verify without trying. Establish baseline first, iterate after.\n\nservices:\n  vllm:\n    image: vllm/vllm-openai:v0.20.0-cu130\n    restart: unless-stopped\n    ipc: host\n    shm_size: \"16gb\"\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    ports:\n      - \"8080:8000\"\n    environment:\n      - NVIDIA_VISIBLE_DEVICES=0\n      - CUDA_VISIBLE_DEVICES=0\n      - VLLM_USE_V1=1\n      - OMP_NUM_THREADS=8\n      - PYTORCH_ALLOC_CONF=expandable_segments:True\n    volumes:\n      - ~/models/Qwen3.6-27B-AutoRound-INT4:/models:ro\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0']\n              capabilities: [gpu]\n    command: >\n      --model /models\n      --served-model-name latest /latest\n      --host 0.0.0.0\n      --port 8000\n      --max-model-len 131072\n      --max-num-seqs 1\n      --max-num-batched-tokens 4128\n      --gpu-memory-utilization 0.92\n      --enable-chunked-prefill\n      --enable-prefix-caching\n      --kv-cache-dtype fp8_e4m3\n      --calculate-kv-scales\n      --reasoning-parser qwen3\n      --tool-call-parser qwen3_coder\n      --enable-auto-tool-choice\n      --trust-remote-code\n","image":"vllm/vllm-openai:v0.20.0-cu130"},"qwen3.6-35b-awq":{"path":"~/vllm/compose-Qwen3.6-35B-A3B-AWQ.yml","content":"# Qwen3.6-35B-A3B AWQ-4bit - vLLM single GPU (MoE 35B/3B-active, native 262K)\n# Source: QuantTrio/Qwen3.6-35B-A3B-AWQ (~25.5GB). AWQ ~20-25GB weights + fp8 KV\n# (~13GB @262K) fits one 96GB card -> single GPU avoids the no-NVLink TP penalty.\nservices:\n  vllm:\n    image: vllm/vllm-openai:v0.20.0-cu130\n    restart: unless-stopped\n    ipc: host\n    shm_size: \"16gb\"\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    ports:\n      - \"8080:8000\"\n    environment:\n      - NVIDIA_VISIBLE_DEVICES=0\n      - CUDA_VISIBLE_DEVICES=0\n      - VLLM_USE_V1=1\n      - OMP_NUM_THREADS=8\n      - PYTORCH_ALLOC_CONF=expandable_segments:True\n    volumes:\n      - ~/models/Qwen3.6-35B-A3B-AWQ:/models:ro\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0']\n              capabilities: [gpu]\n    command: >\n      --model /models\n      --served-model-name latest /latest\n      --host 0.0.0.0\n      --port 8000\n      --quantization awq_marlin\n      --max-model-len 262144\n      --max-num-seqs 1\n      --max-num-batched-tokens 4128\n      --gpu-memory-utilization 0.92\n      --enable-chunked-prefill\n      --enable-prefix-caching\n      --kv-cache-dtype fp8_e4m3\n      --calculate-kv-scales\n      --reasoning-parser qwen3\n      --tool-call-parser qwen3_coder\n      --enable-auto-tool-choice\n      --trust-remote-code\n","image":"vllm/vllm-openai:v0.20.0-cu130"},"minimax-m3-iq3s":{"path":"~/llama/compose-MiniMax-M3-IQ3_S.yml","content":"# MiniMax-M3 UD-IQ3_S\n# ~428B total params, 23B active (MoE). Text-only experimental GGUF.\n# Built from llama.cpp PR #24523 (not upstream) - see dockerfile-minimax-m3.\n# Recommended sampling (MiniMax): temp=1.0, top_p=0.95, top_k=40\n# Native max context 1,048,576 (1M).\n#\n# IMPORTANT: MiniMax Sparse Attention (MSA) is NOT supported in this GGUF yet,\n# so inference falls back to DENSE attention -> KV grows fast with context.\n# Weights ~162.8 GiB (vs IQ3_XXS 148.5). At 200K + KV q8_0 this is TIGHT:\n# predicted ~5 GiB free on the fuller card (GPU1). If it OOMs at load, either\n# drop --ctx-size (~180000) or rebalance --tensor-split (e.g. 1.05,0.95 to shift\n# layers onto GPU0, which runs less full than GPU1 under even split).\n\nservices:\n  llama:\n    image: llama-cpp-minimax-m3:latest\n    restart: unless-stopped\n    build:\n      context: .\n      dockerfile: dockerfile-minimax-m3\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0', '1']\n              capabilities: [gpu]\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    volumes:\n      - ~/models/MiniMax-M3/UD-IQ3_S:/models:ro\n    environment:\n      - CUDA_VISIBLE_DEVICES=0,1\n    command: >\n      --model /models/MiniMax-M3-UD-IQ3_S-00001-of-00005.gguf\n      --host 0.0.0.0\n      --port 8080\n      --alias \"minimax-m3\"\n      --jinja\n      --reasoning-format deepseek\n      --predict 30000\n      --threads 28\n      --n-gpu-layers 999\n      --split-mode layer\n      --tensor-split 1,1\n      --parallel 1\n      --ctx-size 200000\n      --cache-type-k q8_0\n      --cache-type-v q8_0\n      --flash-attn on\n      --no-mmap\n      --mlock\n      --temp 1.0\n      --top-p 0.95\n      --top-k 40\n    ports:\n      - \"8080:8080\"\n","image":"llama-cpp-minimax-m3:latest","dockerfile":{"path":"~/llama/dockerfile-minimax-m3","content":"FROM nvidia/cuda:13.0.0-devel-ubuntu24.04 AS builder\n\nRUN apt-get update && apt-get install -y \\\n    build-essential \\\n    cmake \\\n    git \\\n    curl \\\n    libcurl4-openssl-dev \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /build\n# MiniMax M3 support is not yet upstream - it lives in PR #24523\n# (https://github.com/ggml-org/llama.cpp/pull/24523). Note: the experimental\n# GGUF is text-only and MiniMax Sparse Attention (MSA) is NOT supported, so\n# inference falls back to dense attention (keep --ctx-size modest).\nRUN git clone https://github.com/ggml-org/llama.cpp.git\nWORKDIR /build/llama.cpp\nRUN git fetch origin pull/24523/head:minimax-m3 && git checkout minimax-m3\n\n# sm_120 = RTX PRO 6000 Blackwell (CUDA toolkit >= 12.8; base image is 13.0).\nRUN cmake -B build \\\n        -DCMAKE_BUILD_TYPE=Release \\\n        -DGGML_CUDA=ON \\\n        -DCMAKE_CUDA_ARCHITECTURES=120 \\\n        -DLLAMA_CURL=ON \\\n        -DBUILD_SHARED_LIBS=OFF && \\\n    cmake --build build --config Release -j$(nproc) --target llama-server llama-cli\n\n# Runtime image\nFROM nvidia/cuda:13.0.0-runtime-ubuntu24.04\n\nRUN apt-get update && apt-get install -y \\\n    libgomp1 \\\n    libcurl4 \\\n    && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /build/llama.cpp/build/bin/llama* /usr/local/bin/\n\nVOLUME /models\nWORKDIR /app\n\nEXPOSE 8080\n\nENTRYPOINT [\"llama-server\"]\nCMD [\"--help\"]\n"}},"minimax-m2.7-q5":{"path":"~/llama/compose-MiniMax-M2.7-Q5.yml","content":"# MiniMax-M2.5 UD-Q5_K_XL\n# 230B total params, 10B active (MoE architecture)\n# Native context limit is 196608 (~192K)\n# Recommended settings: temp=1.0, top_p=0.95, top_k=40, min_p=0.01\n\n# 196K context 94% memory use,  88 TPS\n# 100K context 87% memory use, 97 TPS\n# 30K context 87% memory use, 88 TPS\n\nservices:\n  llama:\n    image: llama-cpp-minimax-m3:latest\n    restart: unless-stopped\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0', '1']\n              capabilities: [gpu]\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    volumes:\n      - ~/models/MiniMax-M27:/models:ro\n    environment:\n      - CUDA_VISIBLE_DEVICES=0,1\n    command: >\n      --model /models/MiniMax-M2.7-UD-Q5_K_XL-00001-of-00005.gguf\n      --alias \"minimax-m2.7\"\n      --host 0.0.0.0\n      --port 8080\n      --jinja\n      --reasoning-format deepseek\n      --predict 30000\n      --threads 28\n      --n-gpu-layers 999\n      --split-mode layer\n      --tensor-split 1,1\n      --parallel 1\n      --ctx-size 196000\n      --cache-type-k q8_0\n      --cache-type-v q8_0\n      --flash-attn on\n      --no-mmap\n      --mlock\n      --temp 1.0\n      --min-p 0.01\n      --top-p 0.95\n      --top-k 40\n    ports:\n      - \"8080:8080\"\n","image":"llama-cpp-minimax-m3:latest","dockerfile":{"path":"~/llama/dockerfile-minimax-m3","content":"FROM nvidia/cuda:13.0.0-devel-ubuntu24.04 AS builder\n\nRUN apt-get update && apt-get install -y \\\n    build-essential \\\n    cmake \\\n    git \\\n    curl \\\n    libcurl4-openssl-dev \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /build\n# MiniMax M3 support is not yet upstream - it lives in PR #24523\n# (https://github.com/ggml-org/llama.cpp/pull/24523). Note: the experimental\n# GGUF is text-only and MiniMax Sparse Attention (MSA) is NOT supported, so\n# inference falls back to dense attention (keep --ctx-size modest).\nRUN git clone https://github.com/ggml-org/llama.cpp.git\nWORKDIR /build/llama.cpp\nRUN git fetch origin pull/24523/head:minimax-m3 && git checkout minimax-m3\n\n# sm_120 = RTX PRO 6000 Blackwell (CUDA toolkit >= 12.8; base image is 13.0).\nRUN cmake -B build \\\n        -DCMAKE_BUILD_TYPE=Release \\\n        -DGGML_CUDA=ON \\\n        -DCMAKE_CUDA_ARCHITECTURES=120 \\\n        -DLLAMA_CURL=ON \\\n        -DBUILD_SHARED_LIBS=OFF && \\\n    cmake --build build --config Release -j$(nproc) --target llama-server llama-cli\n\n# Runtime image\nFROM nvidia/cuda:13.0.0-runtime-ubuntu24.04\n\nRUN apt-get update && apt-get install -y \\\n    libgomp1 \\\n    libcurl4 \\\n    && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /build/llama.cpp/build/bin/llama* /usr/local/bin/\n\nVOLUME /models\nWORKDIR /app\n\nEXPOSE 8080\n\nENTRYPOINT [\"llama-server\"]\nCMD [\"--help\"]\n"}},"huihui-q8":{"path":"~/llama/compose-Huihui-Qwen3.6-35B-A3B-abliterated-Q8.yml","content":"# Huihui Qwen3.6-35B-A3B Claude-4.7-Opus abliterated - Q8_0 GGUF (llama.cpp), single card.\n# Same architecture as our Qwen3.6-35B-A3B-AWQ baseline, but Q8 + abliterated finetune\n# (two variables vs baseline: quant 8-bit-vs-AWQ AND finetune). MTP heads shipped but NOT used\n# here (no speculative) to match the AWQ baseline's no-MTP config. ~36GB weights -> 1 card.\nservices:\n  llama:\n    image: llama-cpp-minimax-m3:latest\n    restart: unless-stopped\n    deploy:\n      resources:\n        reservations:\n          devices:\n            - driver: nvidia\n              device_ids: ['0']\n              capabilities: [gpu]\n    ulimits:\n      memlock:\n        soft: -1\n        hard: -1\n    volumes:\n      - ~/models/Huihui-Qwen3.6-35B-A3B-Claude-4.7-Opus-abliterated-Q8:/models:ro\n    environment:\n      - CUDA_VISIBLE_DEVICES=0\n    command: >\n      --model /models/Huihui-Qwen3.6-35B-A3B-Claude-4.7-Opus-abliterated-ggml-model-Q8_0.gguf\n      --host 0.0.0.0\n      --port 8080\n      --alias \"huihui-q8\"\n      --jinja\n      --reasoning-format deepseek\n      --predict 30000\n      --threads 28\n      --n-gpu-layers 999\n      --parallel 1\n      --ctx-size 200000\n      --cache-type-k q8_0\n      --cache-type-v q8_0\n      --flash-attn on\n      --no-mmap\n      --mlock\n      --temp 1.0\n      --top-p 0.95\n    ports:\n      - \"8080:8080\"\n","image":"llama-cpp-minimax-m3:latest","dockerfile":{"path":"~/llama/dockerfile-minimax-m3","content":"FROM nvidia/cuda:13.0.0-devel-ubuntu24.04 AS builder\n\nRUN apt-get update && apt-get install -y \\\n    build-essential \\\n    cmake \\\n    git \\\n    curl \\\n    libcurl4-openssl-dev \\\n    && rm -rf /var/lib/apt/lists/*\n\nWORKDIR /build\n# MiniMax M3 support is not yet upstream - it lives in PR #24523\n# (https://github.com/ggml-org/llama.cpp/pull/24523). Note: the experimental\n# GGUF is text-only and MiniMax Sparse Attention (MSA) is NOT supported, so\n# inference falls back to dense attention (keep --ctx-size modest).\nRUN git clone https://github.com/ggml-org/llama.cpp.git\nWORKDIR /build/llama.cpp\nRUN git fetch origin pull/24523/head:minimax-m3 && git checkout minimax-m3\n\n# sm_120 = RTX PRO 6000 Blackwell (CUDA toolkit >= 12.8; base image is 13.0).\nRUN cmake -B build \\\n        -DCMAKE_BUILD_TYPE=Release \\\n        -DGGML_CUDA=ON \\\n        -DCMAKE_CUDA_ARCHITECTURES=120 \\\n        -DLLAMA_CURL=ON \\\n        -DBUILD_SHARED_LIBS=OFF && \\\n    cmake --build build --config Release -j$(nproc) --target llama-server llama-cli\n\n# Runtime image\nFROM nvidia/cuda:13.0.0-runtime-ubuntu24.04\n\nRUN apt-get update && apt-get install -y \\\n    libgomp1 \\\n    libcurl4 \\\n    && rm -rf /var/lib/apt/lists/*\n\nCOPY --from=builder /build/llama.cpp/build/bin/llama* /usr/local/bin/\n\nVOLUME /models\nWORKDIR /app\n\nEXPOSE 8080\n\nENTRYPOINT [\"llama-server\"]\nCMD [\"--help\"]\n"}}},"corr":{"opus-4.8":{"mean":2.51,"lo":0,"hi":3,"speed":null,"vram_w":null,"vram_total":null,"wall_avg":346,"wall_lo":82,"wall_hi":900,"tokens":15750,"ctx":null,"api":true},"fable-5":{"mean":2.53,"lo":0,"hi":3,"speed":null,"vram_w":null,"vram_total":null,"wall_avg":65,"wall_lo":2,"wall_hi":1454,"tokens":37333,"ctx":null,"api":true},"sonnet-5":{"mean":2.22,"lo":0,"hi":3,"speed":null,"vram_w":null,"vram_total":null,"wall_avg":452,"wall_lo":63,"wall_hi":900,"tokens":15434,"ctx":null,"api":true},"deepseek-v4-flash":{"mean":2.04,"lo":0,"hi":3,"speed":172.7,"vram_w":150,"vram_total":186.7,"wall_avg":236,"wall_lo":42,"wall_hi":2214,"tokens":13327,"ctx":54145,"api":false},"qwen3.6-35b-awq":{"mean":1.46,"lo":0,"hi":3,"speed":184.8,"vram_w":25,"vram_total":86.2,"wall_avg":392,"wall_lo":37,"wall_hi":1931,"tokens":36141,"ctx":50031,"api":false},"huihui-q8":{"mean":1.33,"lo":0,"hi":3,"speed":156.1,"vram_w":38,"vram_total":38.0,"wall_avg":309,"wall_lo":22,"wall_hi":1918,"tokens":16352,"ctx":18472,"api":false},"qwen3.6-27b-int4":{"mean":1.84,"lo":0,"hi":3,"speed":71.3,"vram_w":18,"vram_total":87.0,"wall_avg":458,"wall_lo":98,"wall_hi":1855,"tokens":19020,"ctx":43748,"api":false},"mimo-v2.5":{"mean":1.3,"lo":0,"hi":3,"speed":89.7,"vram_w":140,"vram_total":144.2,"wall_avg":625,"wall_lo":68,"wall_hi":1497,"tokens":32620,"ctx":14460,"api":false},"deepseek-v4-max":{"mean":2.21,"lo":0,"hi":3,"speed":null,"vram_w":150,"vram_total":null,"wall_avg":232,"wall_lo":58,"wall_hi":728,"tokens":16641,"ctx":64969,"api":false},"sonnet-4.5":{"mean":1.9,"lo":0,"hi":3,"speed":null,"vram_w":null,"vram_total":null,"wall_avg":178,"wall_lo":57,"wall_hi":603,"tokens":7775,"ctx":null,"api":true},"minimax-m2.7-q5":{"mean":1.58,"lo":0,"hi":3,"speed":34.8,"vram_w":158,"vram_total":185.1,"wall_avg":430,"wall_lo":64,"wall_hi":1542,"tokens":10496,"ctx":13518,"api":false},"minimax-m3-iq3s":{"mean":1.92,"lo":0,"hi":3,"speed":26.3,"vram_w":163,"vram_total":177.9,"wall_avg":1886,"wall_lo":131,"wall_hi":4800,"tokens":22049,"ctx":15039,"api":false},"step-3.7-flash":{"mean":1.36,"lo":0,"hi":3,"speed":63.2,"vram_w":156,"vram_total":161.1,"wall_avg":1162,"wall_lo":287,"wall_hi":3406,"tokens":22781,"ctx":17339,"api":false}},"runpts":{"opus-4.8":[[476,2],[458,2],[460,2],[211,3],[234,3],[236,3],[366,3],[270,3],[231,3],[261,3],[296,3],[178,3],[188,3],[121,3],[299,2],[146,3],[188,3],[142,3],[151,3],[92,3],[125,3],[82,3],[132,3],[104,3],[142,3],[673,3],[777,0],[668,0],[896,2],[555,0],[682,0],[900,3],[900,1.5],[244,1],[150,3],[218,3],[364,3],[462,3],[400,3],[463,3],[339,3],[306,3],[332,3],[300,3]],"fable-5":[[278,2],[272,2],[289,3],[214,3],[243,3],[238,3],[728,3],[1440,2],[1302,0],[92,3],[181,3],[230,3],[199,3],[1454,2],[970,2],[146,1],[351,3],[474,3],[306,3]],"sonnet-5":[[734,0],[435,3],[395,3],[446,3],[265,3],[231,2],[900,2],[900,0],[900,0],[146,3],[63,3],[64,3],[266,3],[156,3],[279,3],[665,2],[760,2],[746,2],[174,1],[230,1],[234,1],[635,3],[360,3],[870,3]],"deepseek-v4-flash":[[183,1],[145,1],[115,2],[176,3],[178,0],[174,3],[140,2],[110,2],[95,3],[163,3],[173,3],[145,2],[53,3],[42,3],[102,2],[72,2],[104,3],[105,3],[124,3],[82,0],[57,3],[69,3],[76,3],[77,3],[53,2],[86,3],[72,2],[94,3],[494,0],[345,0],[335,2],[472,2],[350,0],[452,0],[444,0],[2214,0],[444,2],[81,1],[126,1],[95,1],[113,1],[140,3],[182,3],[124,3],[136,3],[232,3],[147,3],[117,2.5],[169,3]],"qwen3.6-35b-awq":[[234,1],[212,1],[246,2],[353,1.5],[190,2],[138,0],[212,3],[85,1],[212,1.5],[387,2],[121,0],[136,2],[68,0],[63,3],[90,2],[86,2],[77,2],[69,3],[103,2],[62,3],[60,3],[37,3],[378,3],[185,2],[86,2],[458,0],[712,2.5],[586,2],[958,0],[1358,1.5],[602,0],[761,0],[1024,0],[651,2],[204,1],[106,1],[126,3],[195,3],[99,0],[91,2],[1800,0],[141,3],[1931,0],[1848,3],[115,0]],"huihui-q8":[[125,0],[61,0],[225,1],[49,1.5],[99,2],[129,0],[51,0],[122,2],[139,2],[64,3],[22,2],[51,3],[47,3],[356,2],[100,2],[554,0],[1915,0],[197,0],[594,1],[122,0],[315,0],[1918,3],[100,2.5],[48,3]],"qwen3.6-27b-int4":[[161,1],[528,1],[463,2],[271,3],[491,3],[174,3],[677,2],[486,3],[420,3],[484,3],[276,3],[172,2],[161,3],[171,2],[175,2],[155,0],[167,3],[127,0],[188,0],[111,3],[98,3],[99,3],[119,3],[193,3],[187,2],[1200,0],[1200,0],[1420,0],[1039,2],[1531,2],[879,2],[1855,0],[866,0],[249,1],[190,1],[690,3],[309,3],[274,3],[191,0],[500,3],[346,0],[292,2],[294,3],[284,3]],"mimo-v2.5":[[999,0],[1043,0],[1110,0],[380,3],[998,0],[1410,3],[243,2],[923,0],[791,3],[490,3],[700,0],[290,0],[341,3],[910,0],[333,3],[428,3],[340,3],[711,2],[487,0],[68,3],[83,3],[163,3],[168,3],[147,3],[151,3],[260,3],[307,3],[152,3],[837,0],[1077,0],[1497,0],[1079,0],[1218,0],[1306,0],[1303,0],[968,0],[962,0],[416,3],[776,1],[285,1],[258,1],[480,1],[239,0],[1040,0],[331,3],[310,3],[324,3],[987,0],[1162,0],[404,3]],"deepseek-v4-max":[[180,2],[206,1],[177,2],[193,1],[213,2],[169,3],[286,3],[161,2],[168,3],[248,3],[139,3],[218,2],[82,1],[88,3],[58,3],[110,3],[96,3],[95,3],[73,3],[78,3],[633,2],[617,0],[721,0],[728,0],[181,1],[119,1],[110,1],[212,3],[159,3],[243,3],[329,3],[343,3]],"sonnet-4.5":[[213,1],[142,2],[167,1],[143,3],[164,3],[150,1],[124,2],[168,3],[170,2],[168,3],[122,2],[152,2],[136,2],[81,2],[106,2],[146,2],[125,2],[94,2],[57,3],[60,3],[74,3],[106,3],[106,2],[120,3],[138,3],[203,0],[424,0],[421,0],[603,0],[319,0],[330,2],[207,3],[139,1],[228,1],[176,3],[141,3],[148,3]],"minimax-m2.7-q5":[[1082,2],[157,1],[197,1],[251,2],[298,0],[237,2],[129,3],[64,3],[272,2],[85,2],[1542,0],[1251,2],[501,2],[333,3],[220,3],[260,2]],"minimax-m3-iq3s":[[4296,1],[1233,2],[780,3],[1579,3],[4800,0],[1958,2],[326,3],[131,3],[171,3],[1214,3],[859,3],[4800,0],[4800,0],[1335,3],[1667,1],[920,2],[1197,3]],"step-3.7-flash":[[564,1],[300,2],[688,3],[1556,3],[287,0],[336,0],[502,3],[415,3],[480,3],[391,3],[1200,2],[3406,2],[2947,0],[1200,0],[639,1],[642,1],[1200,0],[775,0],[843,0]]},"toktask":{"cand-01":{"out":21426,"ctx":32162},"cand-02":{"out":19120,"ctx":32048},"cand-03":{"out":8778,"ctx":26904},"cand-04":{"out":3700,"ctx":17948},"cand-05":{"out":14973,"ctx":35444},"cand-06":{"out":65566,"ctx":64881},"cand-07":{"out":10120,"ctx":38102},"cand-08":{"out":14816,"ctx":43067}},"taskinfo":{"cand-01":{"title":"Idempotent-update guard","role":"Discriminator","tests":"A save that changes nothing still fires an expensive downstream state transition; separately, a class of legacy records silently skips an audit step. Two orthogonal defects, graded independently - and a blanket guard that also suppresses legitimate updates doesn't count as a fix.","tiers":["Doesn't compile, guts the flow, or a no-op save still triggers the transition.","Compiles and the path still works, but neither defect is fixed cleanly (or a fix adds a regression that cancels it out).","Exactly one of the two independent defects fixed cleanly, no regressions.","Both fixed cleanly - real field-level change-detection for the no-op case AND gating the audit step on the record's own state rather than a fragile proxy."],"prompt":""},"cand-02":{"title":"Change-detection before a side effect","role":"Discriminator","tests":"Re-saving a parent record re-runs a costly re-processing side effect on a child field that didn't actually change. The guard has to compare on normalized values (whitespace / line-ending-only differences shouldn't count as a change) and run before the side effect fires.","tiers":["Doesn't compile, or an unchanged value still re-triggers the re-processing.","Compiles and the flow works, but no effective guard is added.","Short-circuits when the value is unchanged - but compares un-normalized text (or still resets state), so cosmetic-only edits can slip through.","Normalizes before comparing AND leaves the existing processing state intact on a true no-op - the subtle ordering + conditional-status requirement."],"prompt":""},"cand-03":{"title":"Search tokenization","role":"Discriminator","tests":"Terms with internal punctuation (initials, separators) return nothing, because the query and the index tokenize punctuation differently. The real fix aligns the two at the tokenizer; patching the single failing query string is brittle.","tiers":["Doesn't compile, or punctuated terms still can't match.","Makes the one reported query work, but by special-casing that exact input (brittle).","A whole class of punctuated queries becomes findable (separators handled consistently), not special-cased to one example.","Fixed generally at the tokenizer so all separators are handled uniformly - the understanding signal, not a punctuation special-case or a schema hack."],"prompt":""},"cand-04":{"title":"Query scoping filter","role":"Floor (excluded from headline)","tests":"A status filter is scoped too broadly and surfaces records that should be excluded - re-scope the underlying query condition. Straightforward; every model solves it, so it's kept only as a floor (tiers barely separate).","tiers":["Doesn't compile, or still surfaces the records that should be excluded.","Re-scopes so the wrong records are excluded - the behavioral fix (this task's core is already near-complete).","Narrows toward the right scope but in application code after the query, rather than in the query itself.","Re-scopes at the query AND removes the now-redundant UI/branches left behind."],"prompt":""},"cand-05":{"title":"Over-strict invariant","role":"Near-floor","tests":"A safety assertion is too strict and fires on a legitimate edge case, blocking an operation that should succeed. The fix has to narrow the assertion to what it's actually meant to guard - without deleting the safety check.","tiers":["Doesn't compile, or the assertion still throws on the valid edge case.","Compiles and the calc path still works, but the assertion behavior isn't meaningfully fixed.","The assertion no longer fires on the edge case - but via deleting or blanket-disabling it, losing the real invariant.","Narrows the assertion so it applies only when the guarded condition truly holds - keeping the safety check for the cases it's meant to catch."],"prompt":""},"cand-06":{"title":"Cross-transaction atomicity","role":"Hardest discriminator","tests":"Two related writes run in separate transactions; if the second fails (timeout, contention) they're left permanently out of sync. It compiles and passes the happy path - the desync only manifests under failure, so you have to reason about the failure path, not the common case.","tiers":["Doesn't compile, or the two-transaction window is untouched (only a retry loop or failure logging).","Compiles and the flow works, but the atomicity hazard is unaddressed.","Identifies the hazard and partially mitigates it (reorder, retry, narrow the window) - but a failure can still desync; or achieves atomicity with a correctness flaw.","Makes the two writes atomic (one transaction) so a mid-way failure can no longer desync them - by either valid mechanism, and correct (no double-count, self-corrects on later recompute)."],"prompt":""},"cand-07":{"title":"Boundary / off-by-one","role":"Dead floor (excluded)","tests":"An unclamped boundary value produces wrong behaviour right at the limit (a rounding / comparison-direction bug). A one-line boundary fix - every model localizes it, so it's kept only as a floor (core is already near-complete, no real split).","tiers":["Doesn't compile, or the boundary is still wrong at the limit.","Localizes and fixes the boundary correctly (this task's core is already near-complete).","Touches the right area / understands the off-by-one but applies an inexact fix.","Correct, exact boundary fix."],"prompt":""},"cand-08":{"title":"Right-surface + recompute","role":"Discriminator (surface trap)","tests":"The change must land on the correct one of two similar internal surfaces (a routing trap), then trigger a follow-up recompute + refresh - a second step that's easy to miss. Putting it on the wrong surface compiles fine but is wrong.","tiers":["Doesn't compile, no working edit path, or wrong surface with no recompute.","A working, wired-up edit path exists - but on the wrong surface.","Right surface, working edit path - but no follow-up recompute, so the change silently doesn't take effect.","Right surface AND recomputes / refreshes afterward, so the change actually takes effect."],"prompt":""}},"speedcfg":{"deepseek-v4-flash":"MTP=1 ON (speculative, b12x)","deepseek-v4-max":"MTP=1 ON (speculative)","qwen3.6-35b-awq":"MTP OFF","qwen3.6-27b-int4":"MTP OFF (needs vLLM nightly)","mimo-v2.5":"no speculative (llama.cpp)","minimax-m3-iq3s":"no speculative (llama.cpp)","minimax-m2.7-q5":"no speculative (llama.cpp)","step-3.7-flash":"no speculative (llama.cpp)","huihui-q8":"no speculative (MTP heads present, unused - matches AWQ baseline)"},"stylemap":{"fable-5":{"ctx":89239,"tokens":25677,"steps":36.2,"qual":2.38,"tasks":8,"api":true},"sonnet-5":{"ctx":66549,"tokens":17198,"steps":33.6,"qual":2.0,"tasks":5,"api":true},"deepseek-v4-flash":{"ctx":53201,"tokens":10522,"steps":16.5,"qual":2.12,"tasks":8,"api":false},"qwen3.6-35b-awq":{"ctx":48536,"tokens":25920,"steps":16.4,"qual":1.56,"tasks":8,"api":false},"huihui-q8":{"ctx":15839,"tokens":11544,"steps":13.1,"qual":1.44,"tasks":8,"api":false},"qwen3.6-27b-int4":{"ctx":40496,"tokens":16483,"steps":12.4,"qual":2.0,"tasks":8,"api":false},"mimo-v2.5":{"ctx":14124,"tokens":29092,"steps":14.8,"qual":2.0,"tasks":8,"api":false},"deepseek-v4-max":{"ctx":65844,"tokens":17019,"steps":20.4,"qual":2.25,"tasks":8,"api":false},"minimax-m2.7-q5":{"ctx":18998,"tokens":14471,"steps":28.4,"qual":2.38,"tasks":8,"api":false},"minimax-m3-iq3s":{"ctx":17951,"tokens":24913,"steps":46.5,"qual":2.38,"tasks":8,"api":false},"step-3.7-flash":{"ctx":20418,"tokens":21916,"steps":15.8,"qual":1.75,"tasks":8,"api":false}},"costmap":{"opus-4.8":{"speed":null,"vram":null,"wall":236,"qual":2.51,"cards":null,"api":true},"fable-5":{"speed":null,"vram":null,"wall":243,"qual":2.53,"cards":null,"api":true},"sonnet-5":{"speed":null,"vram":null,"wall":360,"qual":2.22,"cards":null,"api":true},"deepseek-v4-flash":{"speed":173,"vram":186.7,"wall":113,"qual":2.04,"cards":2,"api":false},"qwen3.6-35b-awq":{"speed":185,"vram":86.2,"wall":185,"qual":1.46,"cards":1,"api":false},"huihui-q8":{"speed":156,"vram":38.0,"wall":122,"qual":1.33,"cards":1,"api":false},"qwen3.6-27b-int4":{"speed":71,"vram":87.0,"wall":249,"qual":1.84,"cards":1,"api":false},"mimo-v2.5":{"speed":90,"vram":144.2,"wall":480,"qual":1.3,"cards":2,"api":false},"deepseek-v4-max":{"speed":173,"vram":186.7,"wall":177,"qual":2.21,"cards":null,"api":false},"sonnet-4.5":{"speed":null,"vram":null,"wall":142,"qual":1.9,"cards":null,"api":true},"minimax-m2.7-q5":{"speed":35,"vram":185.1,"wall":251,"qual":1.58,"cards":2,"api":false},"minimax-m3-iq3s":{"speed":26,"vram":177.9,"wall":1233,"qual":1.92,"cards":2,"api":false},"step-3.7-flash":{"speed":63,"vram":161.1,"wall":564,"qual":1.36,"cards":2,"api":false}},"runtimedist":{"grid":[0.35,0.366,0.384,0.401,0.42,0.44,0.46,0.482,0.505,0.528,0.553,0.579,0.606,0.634,0.664,0.695,0.727,0.761,0.797,0.834,0.873,0.914,0.957,1.002,1.048,1.097,1.149,1.202,1.259,1.318,1.379,1.444,1.511,1.582,1.656,1.733,1.814,1.899,1.988,2.081,2.178,2.28,2.387,2.499,2.616,2.738,2.866,3.0],"models":{"opus-4.8":{"dens":[0.001,0.001,0.002,0.005,0.008,0.015,0.025,0.04,0.06,0.087,0.12,0.16,0.206,0.259,0.32,0.388,0.465,0.551,0.644,0.739,0.829,0.908,0.966,0.998,1.0,0.973,0.922,0.854,0.778,0.703,0.636,0.579,0.532,0.493,0.458,0.421,0.38,0.334,0.283,0.23,0.179,0.133,0.094,0.063,0.04,0.024,0.014,0.007],"medrel":1.06,"avg_wall":346,"n":44,"api":true},"fable-5":{"dens":[0.002,0.003,0.006,0.012,0.021,0.034,0.052,0.076,0.105,0.137,0.169,0.198,0.222,0.239,0.251,0.262,0.28,0.312,0.364,0.439,0.536,0.648,0.764,0.868,0.949,0.994,1.0,0.967,0.901,0.813,0.715,0.616,0.524,0.442,0.373,0.316,0.268,0.23,0.199,0.177,0.164,0.16,0.166,0.178,0.194,0.21,0.22,0.222],"medrel":1.11,"avg_wall":495,"n":19,"api":true},"sonnet-5":{"dens":[0.0,0.0,0.0,0.001,0.002,0.005,0.01,0.018,0.032,0.054,0.086,0.13,0.188,0.26,0.345,0.439,0.539,0.64,0.739,0.829,0.905,0.963,0.995,1.0,0.976,0.927,0.861,0.788,0.72,0.668,0.637,0.63,0.643,0.667,0.693,0.711,0.714,0.702,0.676,0.643,0.611,0.589,0.578,0.58,0.587,0.592,0.585,0.561],"medrel":1.19,"avg_wall":452,"n":24,"api":true},"deepseek-v4-flash":{"dens":[0.579,0.661,0.736,0.8,0.854,0.899,0.934,0.961,0.982,0.995,1.0,0.997,0.984,0.961,0.928,0.885,0.833,0.772,0.704,0.63,0.553,0.474,0.396,0.322,0.254,0.195,0.146,0.106,0.075,0.051,0.034,0.023,0.015,0.009,0.006,0.004,0.004,0.005,0.007,0.011,0.016,0.023,0.031,0.04,0.048,0.054,0.058,0.058],"medrel":0.57,"avg_wall":236,"n":61,"api":false},"qwen3.6-35b-awq":{"dens":[0.364,0.438,0.513,0.584,0.649,0.706,0.752,0.788,0.814,0.831,0.842,0.85,0.86,0.874,0.895,0.922,0.952,0.98,0.999,1.0,0.979,0.935,0.869,0.787,0.696,0.606,0.521,0.447,0.384,0.333,0.292,0.26,0.233,0.212,0.194,0.18,0.169,0.161,0.159,0.161,0.169,0.18,0.193,0.205,0.213,0.214,0.208,0.195],"medrel":0.76,"avg_wall":392,"n":45,"api":false},"huihui-q8":{"dens":[0.986,1.0,0.976,0.924,0.859,0.793,0.738,0.699,0.677,0.671,0.678,0.692,0.709,0.726,0.738,0.741,0.734,0.712,0.674,0.621,0.555,0.482,0.406,0.334,0.272,0.222,0.187,0.163,0.148,0.138,0.131,0.123,0.116,0.11,0.109,0.116,0.133,0.162,0.201,0.248,0.297,0.343,0.378,0.399,0.401,0.386,0.355,0.312],"medrel":0.58,"avg_wall":309,"n":24,"api":false},"qwen3.6-27b-int4":{"dens":[0.001,0.003,0.005,0.009,0.015,0.025,0.038,0.056,0.078,0.106,0.137,0.172,0.211,0.254,0.3,0.351,0.407,0.467,0.533,0.604,0.678,0.755,0.83,0.898,0.954,0.989,1.0,0.983,0.941,0.878,0.803,0.725,0.653,0.593,0.546,0.511,0.485,0.463,0.441,0.415,0.386,0.353,0.319,0.286,0.255,0.227,0.201,0.176],"medrel":1.19,"avg_wall":458,"n":44,"api":false},"mimo-v2.5":{"dens":[0.0,0.0,0.0,0.0,0.0,0.001,0.002,0.003,0.006,0.011,0.019,0.03,0.046,0.068,0.097,0.132,0.176,0.227,0.285,0.347,0.412,0.475,0.532,0.579,0.614,0.636,0.646,0.65,0.652,0.659,0.678,0.71,0.757,0.813,0.874,0.929,0.971,0.995,1.0,0.99,0.974,0.96,0.954,0.96,0.971,0.979,0.971,0.936],"medrel":1.95,"avg_wall":625,"n":53,"api":false},"deepseek-v4-max":{"dens":[0.053,0.081,0.118,0.163,0.215,0.271,0.329,0.387,0.443,0.498,0.554,0.613,0.676,0.743,0.81,0.874,0.929,0.97,0.994,1.0,0.987,0.957,0.911,0.854,0.789,0.718,0.646,0.574,0.506,0.441,0.38,0.323,0.27,0.221,0.176,0.136,0.101,0.073,0.05,0.033,0.021,0.013,0.007,0.004,0.002,0.001,0.0,0.0],"medrel":0.8,"avg_wall":232,"n":32,"api":false},"sonnet-4.5":{"dens":[0.191,0.238,0.291,0.351,0.415,0.484,0.555,0.626,0.696,0.761,0.82,0.871,0.915,0.951,0.977,0.994,1.0,0.992,0.967,0.924,0.864,0.787,0.699,0.604,0.509,0.419,0.338,0.269,0.211,0.164,0.126,0.096,0.071,0.051,0.036,0.024,0.015,0.009,0.005,0.003,0.002,0.001,0.0,0.0,0.0,0.0,0.0,0.0],"medrel":0.7,"avg_wall":178,"n":37,"api":true},"minimax-m2.7-q5":{"dens":[0.01,0.018,0.031,0.051,0.079,0.117,0.164,0.219,0.281,0.346,0.412,0.474,0.532,0.582,0.625,0.658,0.679,0.688,0.682,0.662,0.631,0.591,0.55,0.515,0.492,0.487,0.503,0.543,0.604,0.68,0.764,0.847,0.919,0.972,1.0,1.0,0.972,0.918,0.845,0.758,0.665,0.572,0.486,0.411,0.349,0.299,0.259,0.225],"medrel":1.45,"avg_wall":430,"n":16,"api":false},"minimax-m3-iq3s":{"dens":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.001,0.002,0.004,0.007,0.012,0.019,0.028,0.039,0.053,0.069,0.086,0.104,0.122,0.141,0.161,0.184,0.212,0.251,0.304,0.376,0.467,0.578,0.699,0.818,0.918,0.983,1.0],"medrel":5.82,"avg_wall":1886,"n":17,"api":false},"step-3.7-flash":{"dens":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.001,0.002,0.004,0.008,0.013,0.02,0.029,0.041,0.054,0.068,0.081,0.093,0.103,0.11,0.116,0.123,0.132,0.144,0.162,0.185,0.215,0.251,0.294,0.345,0.406,0.477,0.559,0.652,0.75,0.845,0.927,0.982,1.0,0.975],"medrel":3.1,"avg_wall":1162,"n":22,"api":false}}},"efftps":{"opus-4.8":{"med":48.9,"lo":27.2,"hi":73.0,"n":42,"api":true},"fable-5":{"med":52.7,"lo":36.7,"hi":62.5,"n":41,"api":true},"sonnet-5":{"med":42.5,"lo":17.3,"hi":74.1,"n":21,"api":true},"deepseek-v4-flash":{"med":64.1,"lo":0.2,"hi":127.1,"n":61,"api":false},"qwen3.6-35b-awq":{"med":98.9,"lo":2.5,"hi":160.7,"n":45,"api":false},"huihui-q8":{"med":88.1,"lo":2.5,"hi":138.8,"n":24,"api":false},"qwen3.6-27b-int4":{"med":39.1,"lo":3.4,"hi":59.0,"n":44,"api":false},"mimo-v2.5":{"med":56.6,"lo":3.3,"hi":80.9,"n":53,"api":false},"deepseek-v4-max":{"med":64.9,"lo":10.9,"hi":110.7,"n":32,"api":false},"sonnet-4.5":{"med":44.7,"lo":33.2,"hi":58.2,"n":37,"api":true},"minimax-m2.7-q5":{"med":29.4,"lo":8.3,"hi":41.3,"n":16,"api":false},"minimax-m3-iq3s":{"med":19.8,"lo":0.9,"hi":32.8,"n":16,"api":false},"step-3.7-flash":{"med":11.7,"lo":0.3,"hi":45.2,"n":19,"api":false}},"plan":{"models":[{"model":"opus-4.8","disp":"Opus 4.8","api":true,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":8,"target":3,"remaining":0,"status":"complete"},{"task":"cand-03","have":8,"target":3,"remaining":0,"status":"complete"},{"task":"cand-04","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":8,"target":3,"remaining":0,"status":"complete"},{"task":"cand-07","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":8,"target":3,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"n/a","remaining":0,"eta":0},"vram":{"status":"n/a","remaining":0,"eta":0},"eta_min":0,"avg_wall":346},{"model":"fable-5","disp":"Fable 5","api":true,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-03","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-04","have":1,"target":3,"remaining":2,"status":"partial"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":2,"target":3,"remaining":1,"status":"partial"},{"task":"cand-07","have":1,"target":3,"remaining":2,"status":"partial"},{"task":"cand-08","have":3,"target":3,"remaining":0,"status":"complete"}],"rem_runs":5,"taper":{"status":"n/a","remaining":0,"eta":0},"vram":{"status":"n/a","remaining":0,"eta":0},"eta_min":9,"avg_wall":65},{"model":"sonnet-5","disp":"Sonnet 5","api":true,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-03","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-04","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-07","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":3,"target":3,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"n/a","remaining":0,"eta":0},"vram":{"status":"n/a","remaining":0,"eta":0},"eta_min":0,"avg_wall":452},{"model":"deepseek-v4-flash","disp":"DeepSeek V4 Flash (Think-High)","api":false,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":9,"target":8,"remaining":0,"status":"complete"},{"task":"cand-03","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-04","have":4,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":4,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":9,"target":8,"remaining":0,"status":"complete"},{"task":"cand-07","have":4,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":8,"target":8,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"complete","pts":7,"remaining":0,"eta":0},"vram":{"status":"complete","remaining":0,"eta":0,"gb":186.7},"eta_min":0,"avg_wall":236},{"model":"qwen3.6-35b-awq","disp":"Qwen3.6-35B-A3B AWQ","api":false,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-03","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-04","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":9,"target":8,"remaining":0,"status":"complete"},{"task":"cand-07","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":8,"target":8,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"complete","pts":6,"remaining":0,"eta":0},"vram":{"status":"complete","remaining":0,"eta":0,"gb":86.2},"eta_min":0,"avg_wall":392},{"model":"huihui-q8","disp":"Huihui Qwen3.6-35B abliterated (Q8)","api":false,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-03","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-04","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-07","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":3,"target":3,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"complete","pts":6,"remaining":0,"eta":0},"vram":{"status":"complete","remaining":0,"eta":0,"gb":38.0},"eta_min":0,"avg_wall":309},{"model":"qwen3.6-27b-int4","disp":"Qwen3.6-27B INT4","api":false,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-03","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-04","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-07","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":8,"target":8,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"complete","pts":6,"remaining":0,"eta":0},"vram":{"status":"complete","remaining":0,"eta":0,"gb":87.0},"eta_min":0,"avg_wall":458},{"model":"mimo-v2.5","disp":"MiMo-V2.5","api":false,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-03","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-04","have":5,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":5,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":8,"target":8,"remaining":0,"status":"complete"},{"task":"cand-07","have":5,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":8,"target":8,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"complete","pts":6,"remaining":0,"eta":0},"vram":{"status":"complete","remaining":0,"eta":0,"gb":144.2},"eta_min":0,"avg_wall":625},{"model":"sonnet-4.5","disp":"Sonnet 4.5","api":true,"tasks":[{"task":"cand-01","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-02","have":8,"target":3,"remaining":0,"status":"complete"},{"task":"cand-03","have":8,"target":3,"remaining":0,"status":"complete"},{"task":"cand-04","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-05","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-06","have":6,"target":3,"remaining":0,"status":"complete"},{"task":"cand-07","have":3,"target":3,"remaining":0,"status":"complete"},{"task":"cand-08","have":3,"target":3,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"n/a","remaining":0,"eta":0},"vram":{"status":"n/a","remaining":0,"eta":0},"eta_min":0,"avg_wall":178},{"model":"minimax-m2.7-q5","disp":"MiniMax M2.7 (Q5)","api":false,"tasks":[{"task":"cand-01","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-02","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-03","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-04","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-05","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-06","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-07","have":2,"target":2,"remaining":0,"status":"complete"},{"task":"cand-08","have":2,"target":2,"remaining":0,"status":"complete"}],"rem_runs":0,"taper":{"status":"complete","pts":6,"remaining":0,"eta":0},"vram":{"status":"complete","remaining":0,"eta":0,"gb":185.1},"eta_min":0,"avg_wall":430}],"global":[{"name":"Uniform re-judge (one-time: old patches → current rubric)","status":"pending","detail":"79 old patches still on the pre-update rubric","remaining":79,"eta":33}],"totals":{"rem_runs":5,"rem_judge":79,"eta_min":42}},"completeness":{"opus-4.8":{"disp":"Opus 4.8","api":true,"tasks":[3,8,8,3,3,8,3,8],"nruns":44,"taper":{"s":"na","d":"N/A"},"vram":{"s":"na","d":"N/A"},"tokens":{"s":"stale","d":"pre-fix"},"gaps":[["token style (re-measure)","re-run the suite, API $"]]},"fable-5":{"disp":"Fable 5","api":true,"tasks":[3,3,426,1,3,14,1,3],"nruns":454,"taper":{"s":"na","d":"N/A"},"vram":{"s":"na","d":"N/A"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"sonnet-5":{"disp":"Sonnet 5","api":true,"tasks":[3,3,3,3,3,3,3,3],"nruns":24,"taper":{"s":"na","d":"N/A"},"vram":{"s":"na","d":"N/A"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"deepseek-v4-flash":{"disp":"DeepSeek V4 Flash (Think-High)","api":false,"tasks":[3,9,8,4,4,9,4,8],"nruns":49,"taper":{"s":"ok","d":"7 pts"},"vram":{"s":"ok","d":"186.7 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"qwen3.6-35b-awq":{"disp":"Qwen3.6-35B-A3B AWQ","api":false,"tasks":[3,8,8,3,3,9,3,8],"nruns":45,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"86.2 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"huihui-q8":{"disp":"Huihui Qwen3.6-35B abliterated (Q8)","api":false,"tasks":[3,3,3,3,3,3,3,3],"nruns":24,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"38.0 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"qwen3.6-27b-int4":{"disp":"Qwen3.6-27B INT4","api":false,"tasks":[3,8,8,3,3,8,3,8],"nruns":44,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"87.0 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"mimo-v2.5":{"disp":"MiMo-V2.5","api":false,"tasks":[3,8,8,5,5,8,5,8],"nruns":50,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"144.2 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"deepseek-v4-max":{"disp":"DeepSeek V4 Flash (Think-Max)","api":false,"tasks":[4,4,4,4,4,4,4,4],"nruns":32,"taper":{"s":"proxy","d":"≡ Flash"},"vram":{"s":"proxy","d":"≡ Flash"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"sonnet-4.5":{"disp":"Sonnet 4.5","api":true,"tasks":[3,8,8,3,3,6,3,3],"nruns":37,"taper":{"s":"na","d":"N/A"},"vram":{"s":"na","d":"N/A"},"tokens":{"s":"stale","d":"pre-fix"},"gaps":[["token style (re-measure)","re-run the suite, API $"]]},"minimax-m2.7-q5":{"disp":"MiniMax M2.7 (Q5)","api":false,"tasks":[2,2,2,2,2,2,2,2],"nruns":16,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"185.1 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"minimax-m3-iq3s":{"disp":"MiniMax M3 IQ3_S","api":false,"tasks":[2,2,3,2,2,2,2,2],"nruns":17,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"177.9 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]},"step-3.7-flash":{"disp":"Step 3.7 Flash","api":false,"tasks":[2,2,2,2,2,4,4,4],"nruns":22,"taper":{"s":"ok","d":"6 pts"},"vram":{"s":"ok","d":"161.1 GB"},"tokens":{"s":"ok","d":"measured"},"gaps":[]}},"totals":{"models":13,"tasks":8,"runs":445},"medwall":{"step-3.7-flash":731,"sonnet-4.5":146,"mimo-v2.5":481,"minimax-m2.7-q5":251,"huihui-q8":122,"deepseek-v4-flash":140,"deepseek-v4-max":178,"fable-5":278,"qwen3.6-35b-awq":188,"qwen3.6-27b-int4":276,"minimax-m3-iq3s":1223,"sonnet-5":377,"opus-4.8":283},"walls":{"step-3.7-flash":{"cand-01":[564,300],"cand-02":[688,1556],"cand-03":[287,336],"cand-04":[502,415],"cand-05":[480,391],"cand-06":[2400,1200,3406,2947],"cand-07":[2400,1200,639,642],"cand-08":[2400,1200,775,843]},"sonnet-4.5":{"cand-07":[207,139,228],"cand-01":[213,142,167],"cand-02":[143,164,150,124,168,170,168,122],"cand-03":[152,136,81,106,146,125,94,57],"cand-04":[60,74,106],"cand-05":[106,120,138],"cand-06":[203,424,421,603,319,330],"cand-08":[176,141,148]},"mimo-v2.5":{"cand-01":[481,999,1043,1110],"cand-02":[427,537,380,998,1410,243,923,791,490,700],"cand-03":[290,341,910,333,428,340,711,487],"cand-04":[68,83,163,168,147],"cand-05":[151,260,307,152,837],"cand-07":[416,776,285,258,480],"cand-08":[239,1040,331,310,324,987,1162,404],"cand-06":[1077,1497,1079,1218,1306,1303,968,962]},"minimax-m2.7-q5":{"cand-04":[108,129,64],"cand-01":[1082,157],"cand-02":[197,251],"cand-03":[298,237],"cand-05":[272,85],"cand-06":[1542,1251],"cand-07":[501,333],"cand-08":[220,260]},"huihui-q8":{"cand-04":[42,64,22,51],"cand-01":[125,61,225],"cand-02":[49,99,129],"cand-03":[51,122,139],"cand-05":[47,356,100],"cand-06":[554,1915,197],"cand-07":[594,122,315],"cand-08":[1918,100,48]},"deepseek-v4-flash":{"cand-01":[93,99,1200,215,183,145,115],"cand-02":[129,160,168,113,176,178,174,140,110,95,163,173,145],"cand-03":[53,42,102,72,104,105,124,82],"cand-04":[57,69,76,77],"cand-05":[53,86,72,94],"cand-06":[900,296,268,507,494,345,335,472,350,452,444,2214,444],"cand-07":[81,126,95,113],"cand-08":[140,182,124,136,232,147,117,169]},"deepseek-v4-max":{"cand-01":[180,206,177,193],"cand-02":[213,169,286,161],"cand-03":[168,248,139,218],"cand-04":[82,88,58,110],"cand-05":[96,95,73,78],"cand-06":[633,617,721,728],"cand-07":[181,119,110,212],"cand-08":[159,243,329,343]},"fable-5":{"cand-01":[278,272,289],"cand-02":[214,243,238],"cand-04":[92],"cand-05":[181,230,199],"cand-07":[146],"cand-08":[351,474,306],"cand-03":[728,1440,1302],"cand-06":[1454,970]},"qwen3.6-35b-awq":{"cand-04":[21,62,60,37],"cand-01":[234,212,246],"cand-02":[353,190,138,212,85,212,387,121],"cand-03":[136,68,63,90,86,77,69,103],"cand-05":[378,185,86],"cand-06":[458,712,586,958,1358,602,761,1024,651],"cand-07":[204,106,126],"cand-08":[195,99,91,1800,141,1931,1848,115]},"qwen3.6-27b-int4":{"cand-04":[132,111,98,99],"cand-01":[161,528,463],"cand-02":[271,491,174,677,486,420,484,276],"cand-03":[172,161,171,175,155,167,127,188],"cand-05":[119,193,187],"cand-06":[1200,1200,1420,1039,1531,879,1855,866],"cand-07":[249,190,690],"cand-08":[309,274,191,500,346,292,294,284]},"minimax-m3-iq3s":{"cand-04":[150,131,171],"cand-01":[4296,1233],"cand-02":[780,1579],"cand-03":[4800,1958,326],"cand-05":[1214,859],"cand-06":[4800,4800],"cand-07":[1335,1667],"cand-08":[920,1197]},"sonnet-5":{"cand-04":[146,63,64],"cand-01":[734,435,395],"cand-02":[446,265,231],"cand-03":[900,900,900],"cand-05":[266,156,279],"cand-06":[665,760,746],"cand-07":[174,230,234],"cand-08":[635,360,870]},"opus-4.8":{"cand-01":[476,458,460],"cand-02":[211,234,236,366,270,231,261,296],"cand-03":[178,188,121,299,146,188,142,151],"cand-04":[92,125,82],"cand-05":[132,104,142],"cand-06":[673,777,668,896,555,682,900,900],"cand-07":[244,150,218],"cand-08":[364,462,400,463,339,306,332,300]}},"colors":{"opus-4.8":"#D97757","sonnet-5":"#D97757","sonnet-4.5":"#D97757","fable-5":"#D97757","deepseek-v4-flash":"#3B82F6","deepseek-v4-max":"#3B82F6","qwen3.6-35b-awq":"#9257F5","qwen3.6-27b-int4":"#9257F5","huihui-q8":"#9257F5","minimax-m2.7-q5":"#E62E6E","minimax-m3-iq3s":"#E62E6E","mimo-v2.5":"#2DD4BF","step-3.7-flash":"#3FB950"},"tiercolors":{"fail":"#E05252","core":"#E0A93C","silver":"#4C9BE0","gold":"#3FA85F"}}