Created
May 5, 2026 00:30
-
-
Save sammcj/81f8157957c241501bc0d428c2539574 to your computer and use it in GitHub Desktop.
Update a GGUF model's chat template from a source model's on huggingface, another file on disk or jinja template
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.11" | |
| # dependencies = [ | |
| # "gguf>=0.10", | |
| # "jinja2>=3.1", | |
| # "numpy", | |
| # "tqdm", | |
| # ] | |
| # /// | |
| """Update tokenizer.chat_template in GGUF files in the current directory. | |
| Run from a directory containing GGUF files. Rewrites each file with the new | |
| template via gguf.scripts.gguf_new_metadata (ships with the gguf package). | |
| uv run update_gguf_chat_template.py # uv handles deps | |
| python update_gguf_chat_template.py # if gguf+jinja2 already installed | |
| """ | |
| from __future__ import annotations | |
| import difflib | |
| import os | |
| import shutil | |
| import struct | |
| import subprocess | |
| import sys | |
| import tempfile | |
| import urllib.request | |
| from pathlib import Path | |
| # Initial range-request size for remote GGUF headers. Tokenizer vocab can be | |
| # tens of MB; bump if extraction fails on a particular model. | |
| REMOTE_CHUNK_BYTES = 64 * 1024 * 1024 | |
| def list_gguf_files() -> list[Path]: | |
| return sorted(p for p in Path.cwd().glob("*.gguf") if p.is_file()) | |
| def select_files(files: list[Path]) -> list[Path]: | |
| if not files: | |
| sys.exit("No .gguf files found in current directory.") | |
| print("\nGGUF files in current directory:") | |
| for i, f in enumerate(files, 1): | |
| size_gb = f.stat().st_size / 1024**3 | |
| print(f" [{i:>2}] {f.name} ({size_gb:.2f} GB)") | |
| print("\nSelect file(s) by number (comma or space separated; 'a' for all):") | |
| raw = input("> ").strip() | |
| if raw.lower() in ("a", "all"): | |
| return files | |
| chosen = [] | |
| for tok in raw.replace(",", " ").split(): | |
| idx = int(tok) - 1 | |
| if not 0 <= idx < len(files): | |
| sys.exit(f"Index out of range: {tok}") | |
| chosen.append(files[idx]) | |
| if not chosen: | |
| sys.exit("No files selected.") | |
| return chosen | |
| # ---- GGUF header parser (just enough to find tokenizer.chat_template) ----- | |
| # GGUFValueType enum (gguf/constants.py) | |
| _UINT8, _INT8, _UINT16, _INT16, _UINT32, _INT32, _FLOAT32, _BOOL, _STRING, \ | |
| _ARRAY, _UINT64, _INT64, _FLOAT64 = range(13) | |
| _SCALAR_FMT = { | |
| _UINT8: ("<B", 1), _INT8: ("<b", 1), | |
| _UINT16: ("<H", 2), _INT16: ("<h", 2), | |
| _UINT32: ("<I", 4), _INT32: ("<i", 4), | |
| _FLOAT32: ("<f", 4), _BOOL: ("<?", 1), | |
| _UINT64: ("<Q", 8), _INT64: ("<q", 8), _FLOAT64: ("<d", 8), | |
| } | |
| def _read_gguf_string(f) -> str: | |
| (n,) = struct.unpack("<Q", f.read(8)) | |
| return f.read(n).decode("utf-8") | |
| def _read_gguf_value(f, vtype: int): | |
| if vtype == _STRING: | |
| return _read_gguf_string(f) | |
| if vtype == _ARRAY: | |
| (etype,) = struct.unpack("<I", f.read(4)) | |
| (n,) = struct.unpack("<Q", f.read(8)) | |
| return [_read_gguf_value(f, etype) for _ in range(n)] | |
| fmt, size = _SCALAR_FMT[vtype] | |
| return struct.unpack(fmt, f.read(size))[0] | |
| def extract_template_from_gguf(path: Path) -> str: | |
| """Parse GGUF header from a (possibly truncated) file, return the value of | |
| tokenizer.chat_template. Works on partial downloads as long as the KV | |
| section fits inside the truncated portion.""" | |
| with open(path, "rb") as f: | |
| if f.read(4) != b"GGUF": | |
| raise ValueError(f"Not a GGUF file: {path}") | |
| f.read(4) # version | |
| f.read(8) # tensor count | |
| (kv_count,) = struct.unpack("<Q", f.read(8)) | |
| for _ in range(kv_count): | |
| key = _read_gguf_string(f) | |
| (vtype,) = struct.unpack("<I", f.read(4)) | |
| value = _read_gguf_value(f, vtype) | |
| if key == "tokenizer.chat_template": | |
| if not isinstance(value, str): | |
| raise TypeError( | |
| f"tokenizer.chat_template is {type(value).__name__}, expected str. " | |
| "Multi-template GGUFs are not supported by this script." | |
| ) | |
| return value | |
| raise KeyError( | |
| "tokenizer.chat_template not found. The KV section may extend past the " | |
| "downloaded chunk; try increasing REMOTE_CHUNK_BYTES." | |
| ) | |
| # ---- Template sources ------------------------------------------------------ | |
| def fetch_remote_template(url: str) -> str: | |
| if "huggingface.co" in url and "/blob/" in url: | |
| url = url.replace("/blob/", "/resolve/") | |
| print(f"\nFetching up to {REMOTE_CHUNK_BYTES // 1024 // 1024} MB from:\n {url}") | |
| fd, tmp_name = tempfile.mkstemp(suffix=".gguf.partial") | |
| os.close(fd) | |
| tmp_path = Path(tmp_name) | |
| try: | |
| req = urllib.request.Request( | |
| url, headers={"Range": f"bytes=0-{REMOTE_CHUNK_BYTES - 1}"} | |
| ) | |
| with urllib.request.urlopen(req) as resp, open(tmp_path, "wb") as out: | |
| shutil.copyfileobj(resp, out) | |
| return extract_template_from_gguf(tmp_path) | |
| finally: | |
| tmp_path.unlink(missing_ok=True) | |
| def read_local_template(path_str: str) -> str: | |
| p = Path(path_str).expanduser() | |
| if not p.is_file(): | |
| sys.exit(f"File not found: {p}") | |
| return p.read_text(encoding="utf-8") | |
| def paste_template() -> str: | |
| print("\nPaste the template. End with a line containing only 'EOF':") | |
| lines: list[str] = [] | |
| while True: | |
| try: | |
| line = input() | |
| except EOFError: | |
| break | |
| if line.strip() == "EOF": | |
| break | |
| lines.append(line) | |
| return "\n".join(lines) | |
| # ---- Template validation --------------------------------------------------- | |
| def validate_template(template: str) -> None: | |
| """Parse the template with Jinja2 and try a dry render. Aborts on failure.""" | |
| try: | |
| import jinja2 | |
| except ImportError: | |
| print("WARNING: jinja2 not available, skipping validation.", file=sys.stderr) | |
| return | |
| env = jinja2.Environment( | |
| trim_blocks=True, lstrip_blocks=True, | |
| extensions=["jinja2.ext.loopcontrols"], | |
| ) | |
| try: | |
| compiled = env.from_string(template) | |
| except jinja2.TemplateSyntaxError as e: | |
| sys.exit(f"Jinja2 syntax error at line {e.lineno}: {e.message}") | |
| sample = { | |
| "messages": [ | |
| {"role": "user", "content": "hello"}, | |
| {"role": "assistant", "content": "hi"}, | |
| ], | |
| "add_generation_prompt": True, | |
| "bos_token": "<bos>", "eos_token": "<eos>", | |
| "tools": None, "tool_choice": None, | |
| } | |
| try: | |
| rendered = compiled.render(**sample) | |
| except Exception as e: | |
| print(f"WARNING: template parsed but failed to render with sample messages: " | |
| f"{type(e).__name__}: {e}", file=sys.stderr) | |
| if input("Continue anyway? [y/N] ").strip().lower() not in ("y", "yes"): | |
| sys.exit("Aborted.") | |
| return | |
| print(f"Template validated (renders to {len(rendered)} chars on sample input).") | |
| # ---- Diff display ---------------------------------------------------------- | |
| _USE_COLOUR = sys.stdout.isatty() and os.environ.get("NO_COLOR") is None | |
| _RESET = "\033[0m" | |
| _RED = "\033[31m" | |
| _GREEN = "\033[32m" | |
| _CYAN = "\033[36m" | |
| _BOLD = "\033[1m" | |
| def _paint(line: str) -> str: | |
| if not _USE_COLOUR: | |
| return line | |
| if line.startswith("+++") or line.startswith("---"): | |
| return f"{_BOLD}{line}{_RESET}" | |
| if line.startswith("@@"): | |
| return f"{_CYAN}{line}{_RESET}" | |
| if line.startswith("+"): | |
| return f"{_GREEN}{line}{_RESET}" | |
| if line.startswith("-"): | |
| return f"{_RED}{line}{_RESET}" | |
| return line | |
| def get_current_template(path: Path) -> str | None: | |
| try: | |
| return extract_template_from_gguf(path) | |
| except KeyError: | |
| return None | |
| def show_diff(name: str, current: str | None, new: str, max_lines: int = 500) -> bool: | |
| """Print a unified diff. Returns True if there's a change, False if identical.""" | |
| current_str = current if current is not None else "" | |
| if current_str == new: | |
| print(f"\n=== {name}: no change ===") | |
| return False | |
| print(f"\n=== diff for {name} ===") | |
| if current is None: | |
| print("(no existing tokenizer.chat_template — adding new)") | |
| diff = list(difflib.unified_diff( | |
| current_str.splitlines(), | |
| new.splitlines(), | |
| fromfile=f"current: {name}", | |
| tofile="new", | |
| lineterm="", | |
| )) | |
| shown = diff[:max_lines] | |
| print("\n".join(_paint(line) for line in shown)) | |
| if len(diff) > max_lines: | |
| print(f"... ({len(diff) - max_lines} more diff lines truncated) ...") | |
| return True | |
| # ---- GGUF rewrite ---------------------------------------------------------- | |
| def free_bytes(p: Path) -> int: | |
| return shutil.disk_usage(p.parent).free | |
| def update_gguf(input_path: Path, template_file: Path) -> None: | |
| out_path = input_path.with_suffix(input_path.suffix + ".new") | |
| needed = input_path.stat().st_size + 64 * 1024 * 1024 | |
| if free_bytes(input_path) < needed: | |
| sys.exit( | |
| f"Not enough free space on {input_path.parent} for {input_path.name} " | |
| f"(need ~{needed // 1024**3} GB)." | |
| ) | |
| print(f"\n→ {input_path.name}: writing {out_path.name}") | |
| cmd = [ | |
| sys.executable, "-m", "gguf.scripts.gguf_new_metadata", | |
| "--chat-template-file", str(template_file), | |
| "--force", | |
| str(input_path), str(out_path), | |
| ] | |
| subprocess.run(cmd, check=True) | |
| print(f"→ {input_path.name}: replacing original") | |
| out_path.replace(input_path) | |
| def confirm(prompt: str) -> bool: | |
| return input(f"{prompt} [y/N] ").strip().lower() in ("y", "yes") | |
| def main() -> int: | |
| import importlib.util | |
| if importlib.util.find_spec("gguf") is None: | |
| sys.exit( | |
| "gguf package not available. Run via 'uv run update_gguf_chat_template.py' " | |
| "or install gguf into the active environment." | |
| ) | |
| selected = select_files(list_gguf_files()) | |
| print(f"\nSelected {len(selected)} file(s):") | |
| for f in selected: | |
| print(f" - {f.name}") | |
| print("\nTemplate source:") | |
| print(" A) HuggingFace URL to a GGUF file (extract its chat_template)") | |
| print(" B) Path to a local jinja file") | |
| print(" C) Paste the template content directly") | |
| choice = input("Choose [A/B/C]: ").strip().upper() | |
| if choice == "A": | |
| template = fetch_remote_template(input("HuggingFace GGUF URL: ").strip()) | |
| elif choice == "B": | |
| template = read_local_template(input("Path to jinja file: ").strip()) | |
| elif choice == "C": | |
| template = paste_template() | |
| else: | |
| sys.exit("Invalid choice.") | |
| if not template.strip(): | |
| sys.exit("Empty template, aborting.") | |
| validate_template(template) | |
| preview = template[:300].replace("\n", "\\n") | |
| print(f"\n--- Template preview ({len(template)} chars) ---") | |
| print(preview + ("..." if len(template) > 300 else "")) | |
| print("---") | |
| to_update: list[Path] = [] | |
| for f in selected: | |
| current = get_current_template(f) | |
| if show_diff(f.name, current, template): | |
| to_update.append(f) | |
| if not to_update: | |
| print("\nAll selected files already have this template, nothing to do.") | |
| return 0 | |
| fd, tmpl_name = tempfile.mkstemp(suffix=".jinja", prefix="chat_template_") | |
| with os.fdopen(fd, "w", encoding="utf-8") as f: | |
| f.write(template) | |
| template_file = Path(tmpl_name) | |
| try: | |
| msg = (f"\nOverwrite {len(to_update)} file(s) with this template? " | |
| f"(each will be rewritten to a .new file then renamed over the original)") | |
| if not confirm(msg): | |
| print("Aborted.") | |
| return 0 | |
| for f in to_update: | |
| update_gguf(f, template_file) | |
| print("\nDone.") | |
| finally: | |
| template_file.unlink(missing_ok=True) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment