Skip to content

Instantly share code, notes, and snippets.

@sammcj
Created May 5, 2026 00:30
Show Gist options
  • Select an option

  • Save sammcj/81f8157957c241501bc0d428c2539574 to your computer and use it in GitHub Desktop.

Select an option

Save sammcj/81f8157957c241501bc0d428c2539574 to your computer and use it in GitHub Desktop.
Update a GGUF model's chat template from a source model's on huggingface, another file on disk or jinja template
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "gguf>=0.10",
# "jinja2>=3.1",
# "numpy",
# "tqdm",
# ]
# ///
"""Update tokenizer.chat_template in GGUF files in the current directory.
Run from a directory containing GGUF files. Rewrites each file with the new
template via gguf.scripts.gguf_new_metadata (ships with the gguf package).
uv run update_gguf_chat_template.py # uv handles deps
python update_gguf_chat_template.py # if gguf+jinja2 already installed
"""
from __future__ import annotations
import difflib
import os
import shutil
import struct
import subprocess
import sys
import tempfile
import urllib.request
from pathlib import Path
# Initial range-request size for remote GGUF headers. Tokenizer vocab can be
# tens of MB; bump if extraction fails on a particular model.
REMOTE_CHUNK_BYTES = 64 * 1024 * 1024
def list_gguf_files() -> list[Path]:
return sorted(p for p in Path.cwd().glob("*.gguf") if p.is_file())
def select_files(files: list[Path]) -> list[Path]:
if not files:
sys.exit("No .gguf files found in current directory.")
print("\nGGUF files in current directory:")
for i, f in enumerate(files, 1):
size_gb = f.stat().st_size / 1024**3
print(f" [{i:>2}] {f.name} ({size_gb:.2f} GB)")
print("\nSelect file(s) by number (comma or space separated; 'a' for all):")
raw = input("> ").strip()
if raw.lower() in ("a", "all"):
return files
chosen = []
for tok in raw.replace(",", " ").split():
idx = int(tok) - 1
if not 0 <= idx < len(files):
sys.exit(f"Index out of range: {tok}")
chosen.append(files[idx])
if not chosen:
sys.exit("No files selected.")
return chosen
# ---- GGUF header parser (just enough to find tokenizer.chat_template) -----
# GGUFValueType enum (gguf/constants.py)
_UINT8, _INT8, _UINT16, _INT16, _UINT32, _INT32, _FLOAT32, _BOOL, _STRING, \
_ARRAY, _UINT64, _INT64, _FLOAT64 = range(13)
_SCALAR_FMT = {
_UINT8: ("<B", 1), _INT8: ("<b", 1),
_UINT16: ("<H", 2), _INT16: ("<h", 2),
_UINT32: ("<I", 4), _INT32: ("<i", 4),
_FLOAT32: ("<f", 4), _BOOL: ("<?", 1),
_UINT64: ("<Q", 8), _INT64: ("<q", 8), _FLOAT64: ("<d", 8),
}
def _read_gguf_string(f) -> str:
(n,) = struct.unpack("<Q", f.read(8))
return f.read(n).decode("utf-8")
def _read_gguf_value(f, vtype: int):
if vtype == _STRING:
return _read_gguf_string(f)
if vtype == _ARRAY:
(etype,) = struct.unpack("<I", f.read(4))
(n,) = struct.unpack("<Q", f.read(8))
return [_read_gguf_value(f, etype) for _ in range(n)]
fmt, size = _SCALAR_FMT[vtype]
return struct.unpack(fmt, f.read(size))[0]
def extract_template_from_gguf(path: Path) -> str:
"""Parse GGUF header from a (possibly truncated) file, return the value of
tokenizer.chat_template. Works on partial downloads as long as the KV
section fits inside the truncated portion."""
with open(path, "rb") as f:
if f.read(4) != b"GGUF":
raise ValueError(f"Not a GGUF file: {path}")
f.read(4) # version
f.read(8) # tensor count
(kv_count,) = struct.unpack("<Q", f.read(8))
for _ in range(kv_count):
key = _read_gguf_string(f)
(vtype,) = struct.unpack("<I", f.read(4))
value = _read_gguf_value(f, vtype)
if key == "tokenizer.chat_template":
if not isinstance(value, str):
raise TypeError(
f"tokenizer.chat_template is {type(value).__name__}, expected str. "
"Multi-template GGUFs are not supported by this script."
)
return value
raise KeyError(
"tokenizer.chat_template not found. The KV section may extend past the "
"downloaded chunk; try increasing REMOTE_CHUNK_BYTES."
)
# ---- Template sources ------------------------------------------------------
def fetch_remote_template(url: str) -> str:
if "huggingface.co" in url and "/blob/" in url:
url = url.replace("/blob/", "/resolve/")
print(f"\nFetching up to {REMOTE_CHUNK_BYTES // 1024 // 1024} MB from:\n {url}")
fd, tmp_name = tempfile.mkstemp(suffix=".gguf.partial")
os.close(fd)
tmp_path = Path(tmp_name)
try:
req = urllib.request.Request(
url, headers={"Range": f"bytes=0-{REMOTE_CHUNK_BYTES - 1}"}
)
with urllib.request.urlopen(req) as resp, open(tmp_path, "wb") as out:
shutil.copyfileobj(resp, out)
return extract_template_from_gguf(tmp_path)
finally:
tmp_path.unlink(missing_ok=True)
def read_local_template(path_str: str) -> str:
p = Path(path_str).expanduser()
if not p.is_file():
sys.exit(f"File not found: {p}")
return p.read_text(encoding="utf-8")
def paste_template() -> str:
print("\nPaste the template. End with a line containing only 'EOF':")
lines: list[str] = []
while True:
try:
line = input()
except EOFError:
break
if line.strip() == "EOF":
break
lines.append(line)
return "\n".join(lines)
# ---- Template validation ---------------------------------------------------
def validate_template(template: str) -> None:
"""Parse the template with Jinja2 and try a dry render. Aborts on failure."""
try:
import jinja2
except ImportError:
print("WARNING: jinja2 not available, skipping validation.", file=sys.stderr)
return
env = jinja2.Environment(
trim_blocks=True, lstrip_blocks=True,
extensions=["jinja2.ext.loopcontrols"],
)
try:
compiled = env.from_string(template)
except jinja2.TemplateSyntaxError as e:
sys.exit(f"Jinja2 syntax error at line {e.lineno}: {e.message}")
sample = {
"messages": [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
],
"add_generation_prompt": True,
"bos_token": "<bos>", "eos_token": "<eos>",
"tools": None, "tool_choice": None,
}
try:
rendered = compiled.render(**sample)
except Exception as e:
print(f"WARNING: template parsed but failed to render with sample messages: "
f"{type(e).__name__}: {e}", file=sys.stderr)
if input("Continue anyway? [y/N] ").strip().lower() not in ("y", "yes"):
sys.exit("Aborted.")
return
print(f"Template validated (renders to {len(rendered)} chars on sample input).")
# ---- Diff display ----------------------------------------------------------
_USE_COLOUR = sys.stdout.isatty() and os.environ.get("NO_COLOR") is None
_RESET = "\033[0m"
_RED = "\033[31m"
_GREEN = "\033[32m"
_CYAN = "\033[36m"
_BOLD = "\033[1m"
def _paint(line: str) -> str:
if not _USE_COLOUR:
return line
if line.startswith("+++") or line.startswith("---"):
return f"{_BOLD}{line}{_RESET}"
if line.startswith("@@"):
return f"{_CYAN}{line}{_RESET}"
if line.startswith("+"):
return f"{_GREEN}{line}{_RESET}"
if line.startswith("-"):
return f"{_RED}{line}{_RESET}"
return line
def get_current_template(path: Path) -> str | None:
try:
return extract_template_from_gguf(path)
except KeyError:
return None
def show_diff(name: str, current: str | None, new: str, max_lines: int = 500) -> bool:
"""Print a unified diff. Returns True if there's a change, False if identical."""
current_str = current if current is not None else ""
if current_str == new:
print(f"\n=== {name}: no change ===")
return False
print(f"\n=== diff for {name} ===")
if current is None:
print("(no existing tokenizer.chat_template — adding new)")
diff = list(difflib.unified_diff(
current_str.splitlines(),
new.splitlines(),
fromfile=f"current: {name}",
tofile="new",
lineterm="",
))
shown = diff[:max_lines]
print("\n".join(_paint(line) for line in shown))
if len(diff) > max_lines:
print(f"... ({len(diff) - max_lines} more diff lines truncated) ...")
return True
# ---- GGUF rewrite ----------------------------------------------------------
def free_bytes(p: Path) -> int:
return shutil.disk_usage(p.parent).free
def update_gguf(input_path: Path, template_file: Path) -> None:
out_path = input_path.with_suffix(input_path.suffix + ".new")
needed = input_path.stat().st_size + 64 * 1024 * 1024
if free_bytes(input_path) < needed:
sys.exit(
f"Not enough free space on {input_path.parent} for {input_path.name} "
f"(need ~{needed // 1024**3} GB)."
)
print(f"\n→ {input_path.name}: writing {out_path.name}")
cmd = [
sys.executable, "-m", "gguf.scripts.gguf_new_metadata",
"--chat-template-file", str(template_file),
"--force",
str(input_path), str(out_path),
]
subprocess.run(cmd, check=True)
print(f"→ {input_path.name}: replacing original")
out_path.replace(input_path)
def confirm(prompt: str) -> bool:
return input(f"{prompt} [y/N] ").strip().lower() in ("y", "yes")
def main() -> int:
import importlib.util
if importlib.util.find_spec("gguf") is None:
sys.exit(
"gguf package not available. Run via 'uv run update_gguf_chat_template.py' "
"or install gguf into the active environment."
)
selected = select_files(list_gguf_files())
print(f"\nSelected {len(selected)} file(s):")
for f in selected:
print(f" - {f.name}")
print("\nTemplate source:")
print(" A) HuggingFace URL to a GGUF file (extract its chat_template)")
print(" B) Path to a local jinja file")
print(" C) Paste the template content directly")
choice = input("Choose [A/B/C]: ").strip().upper()
if choice == "A":
template = fetch_remote_template(input("HuggingFace GGUF URL: ").strip())
elif choice == "B":
template = read_local_template(input("Path to jinja file: ").strip())
elif choice == "C":
template = paste_template()
else:
sys.exit("Invalid choice.")
if not template.strip():
sys.exit("Empty template, aborting.")
validate_template(template)
preview = template[:300].replace("\n", "\\n")
print(f"\n--- Template preview ({len(template)} chars) ---")
print(preview + ("..." if len(template) > 300 else ""))
print("---")
to_update: list[Path] = []
for f in selected:
current = get_current_template(f)
if show_diff(f.name, current, template):
to_update.append(f)
if not to_update:
print("\nAll selected files already have this template, nothing to do.")
return 0
fd, tmpl_name = tempfile.mkstemp(suffix=".jinja", prefix="chat_template_")
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(template)
template_file = Path(tmpl_name)
try:
msg = (f"\nOverwrite {len(to_update)} file(s) with this template? "
f"(each will be rewritten to a .new file then renamed over the original)")
if not confirm(msg):
print("Aborted.")
return 0
for f in to_update:
update_gguf(f, template_file)
print("\nDone.")
finally:
template_file.unlink(missing_ok=True)
return 0
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment