sammcj · May 5, 2026 00:30
diff --git a/update-gguf-chat-template.py b/update-gguf-chat-template.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.11"
 # dependencies = [
 #     "gguf>=0.10",
 #     "jinja2>=3.1",
 #     "numpy",
 #     "tqdm",
 # ]
 # ///
 """Update tokenizer.chat_template in GGUF files in the current directory.

 Run from a directory containing GGUF files. Rewrites each file with the new
 template via gguf.scripts.gguf_new_metadata (ships with the gguf package).

  uv run update_gguf_chat_template.py        # uv handles deps
  python update_gguf_chat_template.py        # if gguf+jinja2 already installed
 """
 from __future__ import annotations

 import difflib
 import os
 import shutil
 import struct
 import subprocess
 import sys
 import tempfile
 import urllib.request
 from pathlib import Path

 # Initial range-request size for remote GGUF headers. Tokenizer vocab can be
 # tens of MB; bump if extraction fails on a particular model.
 REMOTE_CHUNK_BYTES = 64 * 1024 * 1024


 def list_gguf_files() -> list[Path]:
    return sorted(p for p in Path.cwd().glob("*.gguf") if p.is_file())


 def select_files(files: list[Path]) -> list[Path]:
    if not files:
        sys.exit("No .gguf files found in current directory.")
    print("\nGGUF files in current directory:")
    for i, f in enumerate(files, 1):
        size_gb = f.stat().st_size / 1024**3
        print(f"  [{i:>2}] {f.name}  ({size_gb:.2f} GB)")
    print("\nSelect file(s) by number (comma or space separated; 'a' for all):")
    raw = input("> ").strip()
    if raw.lower() in ("a", "all"):
        return files
    chosen = []
    for tok in raw.replace(",", " ").split():
        idx = int(tok) - 1
        if not 0 <= idx < len(files):
            sys.exit(f"Index out of range: {tok}")
        chosen.append(files[idx])
    if not chosen:
        sys.exit("No files selected.")
    return chosen


 # ---- GGUF header parser (just enough to find tokenizer.chat_template) -----

 # GGUFValueType enum (gguf/constants.py)
 _UINT8, _INT8, _UINT16, _INT16, _UINT32, _INT32, _FLOAT32, _BOOL, _STRING, \
    _ARRAY, _UINT64, _INT64, _FLOAT64 = range(13)

 _SCALAR_FMT = {
    _UINT8: ("<B", 1), _INT8: ("<b", 1),
    _UINT16: ("<H", 2), _INT16: ("<h", 2),
    _UINT32: ("<I", 4), _INT32: ("<i", 4),
    _FLOAT32: ("<f", 4), _BOOL: ("<?", 1),
    _UINT64: ("<Q", 8), _INT64: ("<q", 8), _FLOAT64: ("<d", 8),
 }


 def _read_gguf_string(f) -> str:
    (n,) = struct.unpack("<Q", f.read(8))
    return f.read(n).decode("utf-8")


 def _read_gguf_value(f, vtype: int):
    if vtype == _STRING:
        return _read_gguf_string(f)
    if vtype == _ARRAY:
        (etype,) = struct.unpack("<I", f.read(4))
        (n,) = struct.unpack("<Q", f.read(8))
        return [_read_gguf_value(f, etype) for _ in range(n)]
    fmt, size = _SCALAR_FMT[vtype]
    return struct.unpack(fmt, f.read(size))[0]


 def extract_template_from_gguf(path: Path) -> str:
    """Parse GGUF header from a (possibly truncated) file, return the value of
    tokenizer.chat_template. Works on partial downloads as long as the KV
    section fits inside the truncated portion."""
    with open(path, "rb") as f:
        if f.read(4) != b"GGUF":
            raise ValueError(f"Not a GGUF file: {path}")
        f.read(4)   # version
        f.read(8)   # tensor count
        (kv_count,) = struct.unpack("<Q", f.read(8))

        for _ in range(kv_count):
            key = _read_gguf_string(f)
            (vtype,) = struct.unpack("<I", f.read(4))
            value = _read_gguf_value(f, vtype)
            if key == "tokenizer.chat_template":
                if not isinstance(value, str):
                    raise TypeError(
                        f"tokenizer.chat_template is {type(value).__name__}, expected str. "
                        "Multi-template GGUFs are not supported by this script."
                    )
                return value
    raise KeyError(
        "tokenizer.chat_template not found. The KV section may extend past the "
        "downloaded chunk; try increasing REMOTE_CHUNK_BYTES."
    )


 # ---- Template sources ------------------------------------------------------

 def fetch_remote_template(url: str) -> str:
    if "huggingface.co" in url and "/blob/" in url:
        url = url.replace("/blob/", "/resolve/")
    print(f"\nFetching up to {REMOTE_CHUNK_BYTES // 1024 // 1024} MB from:\n  {url}")
    fd, tmp_name = tempfile.mkstemp(suffix=".gguf.partial")
    os.close(fd)
    tmp_path = Path(tmp_name)
    try:
        req = urllib.request.Request(
            url, headers={"Range": f"bytes=0-{REMOTE_CHUNK_BYTES - 1}"}
        )
        with urllib.request.urlopen(req) as resp, open(tmp_path, "wb") as out:
            shutil.copyfileobj(resp, out)
        return extract_template_from_gguf(tmp_path)
    finally:
        tmp_path.unlink(missing_ok=True)


 def read_local_template(path_str: str) -> str:
    p = Path(path_str).expanduser()
    if not p.is_file():
        sys.exit(f"File not found: {p}")
    return p.read_text(encoding="utf-8")


 def paste_template() -> str:
    print("\nPaste the template. End with a line containing only 'EOF':")
    lines: list[str] = []
    while True:
        try:
            line = input()
        except EOFError:
            break
        if line.strip() == "EOF":
            break
        lines.append(line)
    return "\n".join(lines)


 # ---- Template validation ---------------------------------------------------

 def validate_template(template: str) -> None:
    """Parse the template with Jinja2 and try a dry render. Aborts on failure."""
    try:
        import jinja2
    except ImportError:
        print("WARNING: jinja2 not available, skipping validation.", file=sys.stderr)
        return

    env = jinja2.Environment(
        trim_blocks=True, lstrip_blocks=True,
        extensions=["jinja2.ext.loopcontrols"],
    )
    try:
        compiled = env.from_string(template)
    except jinja2.TemplateSyntaxError as e:
        sys.exit(f"Jinja2 syntax error at line {e.lineno}: {e.message}")

    sample = {
        "messages": [
            {"role": "user", "content": "hello"},
            {"role": "assistant", "content": "hi"},
        ],
        "add_generation_prompt": True,
        "bos_token": "<bos>", "eos_token": "<eos>",
        "tools": None, "tool_choice": None,
    }
    try:
        rendered = compiled.render(**sample)
    except Exception as e:
        print(f"WARNING: template parsed but failed to render with sample messages: "
              f"{type(e).__name__}: {e}", file=sys.stderr)
        if input("Continue anyway? [y/N] ").strip().lower() not in ("y", "yes"):
            sys.exit("Aborted.")
        return
    print(f"Template validated (renders to {len(rendered)} chars on sample input).")


 # ---- Diff display ----------------------------------------------------------

 _USE_COLOUR = sys.stdout.isatty() and os.environ.get("NO_COLOR") is None
 _RESET = "\033[0m"
 _RED = "\033[31m"
 _GREEN = "\033[32m"
 _CYAN = "\033[36m"
 _BOLD = "\033[1m"


 def _paint(line: str) -> str:
    if not _USE_COLOUR:
        return line
    if line.startswith("+++") or line.startswith("---"):
        return f"{_BOLD}{line}{_RESET}"
    if line.startswith("@@"):
        return f"{_CYAN}{line}{_RESET}"
    if line.startswith("+"):
        return f"{_GREEN}{line}{_RESET}"
    if line.startswith("-"):
        return f"{_RED}{line}{_RESET}"
    return line


 def get_current_template(path: Path) -> str | None:
    try:
        return extract_template_from_gguf(path)
    except KeyError:
        return None


 def show_diff(name: str, current: str | None, new: str, max_lines: int = 500) -> bool:
    """Print a unified diff. Returns True if there's a change, False if identical."""
    current_str = current if current is not None else ""
    if current_str == new:
        print(f"\n=== {name}: no change ===")
        return False

    print(f"\n=== diff for {name} ===")
    if current is None:
        print("(no existing tokenizer.chat_template — adding new)")

    diff = list(difflib.unified_diff(
        current_str.splitlines(),
        new.splitlines(),
        fromfile=f"current: {name}",
        tofile="new",
        lineterm="",
    ))
    shown = diff[:max_lines]
    print("\n".join(_paint(line) for line in shown))
    if len(diff) > max_lines:
        print(f"... ({len(diff) - max_lines} more diff lines truncated) ...")
    return True


 # ---- GGUF rewrite ----------------------------------------------------------

 def free_bytes(p: Path) -> int:
    return shutil.disk_usage(p.parent).free


 def update_gguf(input_path: Path, template_file: Path) -> None:
    out_path = input_path.with_suffix(input_path.suffix + ".new")
    needed = input_path.stat().st_size + 64 * 1024 * 1024
    if free_bytes(input_path) < needed:
        sys.exit(
            f"Not enough free space on {input_path.parent} for {input_path.name} "
            f"(need ~{needed // 1024**3} GB)."
        )
    print(f"\n→ {input_path.name}: writing {out_path.name}")
    cmd = [
        sys.executable, "-m", "gguf.scripts.gguf_new_metadata",
        "--chat-template-file", str(template_file),
        "--force",
        str(input_path), str(out_path),
    ]
    subprocess.run(cmd, check=True)
    print(f"→ {input_path.name}: replacing original")
    out_path.replace(input_path)


 def confirm(prompt: str) -> bool:
    return input(f"{prompt} [y/N] ").strip().lower() in ("y", "yes")


 def main() -> int:
    import importlib.util
    if importlib.util.find_spec("gguf") is None:
        sys.exit(
            "gguf package not available. Run via 'uv run update_gguf_chat_template.py' "
            "or install gguf into the active environment."
        )

    selected = select_files(list_gguf_files())
    print(f"\nSelected {len(selected)} file(s):")
    for f in selected:
        print(f"  - {f.name}")

    print("\nTemplate source:")
    print("  A) HuggingFace URL to a GGUF file (extract its chat_template)")
    print("  B) Path to a local jinja file")
    print("  C) Paste the template content directly")
    choice = input("Choose [A/B/C]: ").strip().upper()

    if choice == "A":
        template = fetch_remote_template(input("HuggingFace GGUF URL: ").strip())
    elif choice == "B":
        template = read_local_template(input("Path to jinja file: ").strip())
    elif choice == "C":
        template = paste_template()
    else:
        sys.exit("Invalid choice.")

    if not template.strip():
        sys.exit("Empty template, aborting.")

    validate_template(template)

    preview = template[:300].replace("\n", "\\n")
    print(f"\n--- Template preview ({len(template)} chars) ---")
    print(preview + ("..." if len(template) > 300 else ""))
    print("---")

    to_update: list[Path] = []
    for f in selected:
        current = get_current_template(f)
        if show_diff(f.name, current, template):
            to_update.append(f)

    if not to_update:
        print("\nAll selected files already have this template, nothing to do.")
        return 0

    fd, tmpl_name = tempfile.mkstemp(suffix=".jinja", prefix="chat_template_")
    with os.fdopen(fd, "w", encoding="utf-8") as f:
        f.write(template)
    template_file = Path(tmpl_name)

    try:
        msg = (f"\nOverwrite {len(to_update)} file(s) with this template? "
               f"(each will be rewritten to a .new file then renamed over the original)")
        if not confirm(msg):
            print("Aborted.")
            return 0
        for f in to_update:
            update_gguf(f, template_file)
        print("\nDone.")
    finally:
        template_file.unlink(missing_ok=True)
    return 0


 if __name__ == "__main__":
    sys.exit(main())
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.11"
	# dependencies = [
	# "gguf>=0.10",
	# "jinja2>=3.1",
	# "numpy",
	# "tqdm",
	# ]
	# ///
	"""Update tokenizer.chat_template in GGUF files in the current directory.

	Run from a directory containing GGUF files. Rewrites each file with the new
	template via gguf.scripts.gguf_new_metadata (ships with the gguf package).

	uv run update_gguf_chat_template.py # uv handles deps
	python update_gguf_chat_template.py # if gguf+jinja2 already installed
	"""
	from __future__ import annotations

	import difflib
	import os
	import shutil
	import struct
	import subprocess
	import sys
	import tempfile
	import urllib.request
	from pathlib import Path

	# Initial range-request size for remote GGUF headers. Tokenizer vocab can be
	# tens of MB; bump if extraction fails on a particular model.
	REMOTE_CHUNK_BYTES = 64 * 1024 * 1024


	def list_gguf_files() -> list[Path]:
	return sorted(p for p in Path.cwd().glob("*.gguf") if p.is_file())


	def select_files(files: list[Path]) -> list[Path]:
	if not files:
	sys.exit("No .gguf files found in current directory.")
	print("\nGGUF files in current directory:")
	for i, f in enumerate(files, 1):
	size_gb = f.stat().st_size / 1024**3
	print(f" [{i:>2}] {f.name} ({size_gb:.2f} GB)")
	print("\nSelect file(s) by number (comma or space separated; 'a' for all):")
	raw = input("> ").strip()
	if raw.lower() in ("a", "all"):
	return files
	chosen = []
	for tok in raw.replace(",", " ").split():
	idx = int(tok) - 1
	if not 0 <= idx < len(files):
	sys.exit(f"Index out of range: {tok}")
	chosen.append(files[idx])
	if not chosen:
	sys.exit("No files selected.")
	return chosen


	# ---- GGUF header parser (just enough to find tokenizer.chat_template) -----

	# GGUFValueType enum (gguf/constants.py)
	_UINT8, _INT8, _UINT16, _INT16, _UINT32, _INT32, _FLOAT32, _BOOL, _STRING, \
	_ARRAY, _UINT64, _INT64, _FLOAT64 = range(13)

	_SCALAR_FMT = {
	_UINT8: ("<B", 1), _INT8: ("<b", 1),
	_UINT16: ("<H", 2), _INT16: ("<h", 2),
	_UINT32: ("<I", 4), _INT32: ("<i", 4),
	_FLOAT32: ("<f", 4), _BOOL: ("<?", 1),
	_UINT64: ("<Q", 8), _INT64: ("<q", 8), _FLOAT64: ("<d", 8),
	}


	def _read_gguf_string(f) -> str:
	(n,) = struct.unpack("<Q", f.read(8))
	return f.read(n).decode("utf-8")


	def _read_gguf_value(f, vtype: int):
	if vtype == _STRING:
	return _read_gguf_string(f)
	if vtype == _ARRAY:
	(etype,) = struct.unpack("<I", f.read(4))
	(n,) = struct.unpack("<Q", f.read(8))
	return [_read_gguf_value(f, etype) for _ in range(n)]
	fmt, size = _SCALAR_FMT[vtype]
	return struct.unpack(fmt, f.read(size))[0]


	def extract_template_from_gguf(path: Path) -> str:
	"""Parse GGUF header from a (possibly truncated) file, return the value of
	tokenizer.chat_template. Works on partial downloads as long as the KV
	section fits inside the truncated portion."""
	with open(path, "rb") as f:
	if f.read(4) != b"GGUF":
	raise ValueError(f"Not a GGUF file: {path}")
	f.read(4) # version
	f.read(8) # tensor count
	(kv_count,) = struct.unpack("<Q", f.read(8))

	for _ in range(kv_count):
	key = _read_gguf_string(f)
	(vtype,) = struct.unpack("<I", f.read(4))
	value = _read_gguf_value(f, vtype)
	if key == "tokenizer.chat_template":
	if not isinstance(value, str):
	raise TypeError(
	f"tokenizer.chat_template is {type(value).__name__}, expected str. "
	"Multi-template GGUFs are not supported by this script."
	)
	return value
	raise KeyError(
	"tokenizer.chat_template not found. The KV section may extend past the "
	"downloaded chunk; try increasing REMOTE_CHUNK_BYTES."
	)


	# ---- Template sources ------------------------------------------------------

	def fetch_remote_template(url: str) -> str:
	if "huggingface.co" in url and "/blob/" in url:
	url = url.replace("/blob/", "/resolve/")
	print(f"\nFetching up to {REMOTE_CHUNK_BYTES // 1024 // 1024} MB from:\n {url}")
	fd, tmp_name = tempfile.mkstemp(suffix=".gguf.partial")
	os.close(fd)
	tmp_path = Path(tmp_name)
	try:
	req = urllib.request.Request(
	url, headers={"Range": f"bytes=0-{REMOTE_CHUNK_BYTES - 1}"}
	)
	with urllib.request.urlopen(req) as resp, open(tmp_path, "wb") as out:
	shutil.copyfileobj(resp, out)
	return extract_template_from_gguf(tmp_path)
	finally:
	tmp_path.unlink(missing_ok=True)


	def read_local_template(path_str: str) -> str:
	p = Path(path_str).expanduser()
	if not p.is_file():
	sys.exit(f"File not found: {p}")
	return p.read_text(encoding="utf-8")


	def paste_template() -> str:
	print("\nPaste the template. End with a line containing only 'EOF':")
	lines: list[str] = []
	while True:
	try:
	line = input()
	except EOFError:
	break
	if line.strip() == "EOF":
	break
	lines.append(line)
	return "\n".join(lines)


	# ---- Template validation ---------------------------------------------------

	def validate_template(template: str) -> None:
	"""Parse the template with Jinja2 and try a dry render. Aborts on failure."""
	try:
	import jinja2
	except ImportError:
	print("WARNING: jinja2 not available, skipping validation.", file=sys.stderr)
	return

	env = jinja2.Environment(
	trim_blocks=True, lstrip_blocks=True,
	extensions=["jinja2.ext.loopcontrols"],
	)
	try:
	compiled = env.from_string(template)
	except jinja2.TemplateSyntaxError as e:
	sys.exit(f"Jinja2 syntax error at line {e.lineno}: {e.message}")

	sample = {
	"messages": [
	{"role": "user", "content": "hello"},
	{"role": "assistant", "content": "hi"},
	],
	"add_generation_prompt": True,
	"bos_token": "<bos>", "eos_token": "<eos>",
	"tools": None, "tool_choice": None,
	}
	try:
	rendered = compiled.render(**sample)
	except Exception as e:
	print(f"WARNING: template parsed but failed to render with sample messages: "
	f"{type(e).__name__}: {e}", file=sys.stderr)
	if input("Continue anyway? [y/N] ").strip().lower() not in ("y", "yes"):
	sys.exit("Aborted.")
	return
	print(f"Template validated (renders to {len(rendered)} chars on sample input).")


	# ---- Diff display ----------------------------------------------------------

	_USE_COLOUR = sys.stdout.isatty() and os.environ.get("NO_COLOR") is None
	_RESET = "\033[0m"
	_RED = "\033[31m"
	_GREEN = "\033[32m"
	_CYAN = "\033[36m"
	_BOLD = "\033[1m"


	def _paint(line: str) -> str:
	if not _USE_COLOUR:
	return line
	if line.startswith("+++") or line.startswith("---"):
	return f"{_BOLD}{line}{_RESET}"
	if line.startswith("@@"):
	return f"{_CYAN}{line}{_RESET}"
	if line.startswith("+"):
	return f"{_GREEN}{line}{_RESET}"
	if line.startswith("-"):
	return f"{_RED}{line}{_RESET}"
	return line


	def get_current_template(path: Path) -> str \| None:
	try:
	return extract_template_from_gguf(path)
	except KeyError:
	return None


	def show_diff(name: str, current: str \| None, new: str, max_lines: int = 500) -> bool:
	"""Print a unified diff. Returns True if there's a change, False if identical."""
	current_str = current if current is not None else ""
	if current_str == new:
	print(f"\n=== {name}: no change ===")
	return False

	print(f"\n=== diff for {name} ===")
	if current is None:
	print("(no existing tokenizer.chat_template — adding new)")

	diff = list(difflib.unified_diff(
	current_str.splitlines(),
	new.splitlines(),
	fromfile=f"current: {name}",
	tofile="new",
	lineterm="",
	))
	shown = diff[:max_lines]
	print("\n".join(_paint(line) for line in shown))
	if len(diff) > max_lines:
	print(f"... ({len(diff) - max_lines} more diff lines truncated) ...")
	return True


	# ---- GGUF rewrite ----------------------------------------------------------

	def free_bytes(p: Path) -> int:
	return shutil.disk_usage(p.parent).free


	def update_gguf(input_path: Path, template_file: Path) -> None:
	out_path = input_path.with_suffix(input_path.suffix + ".new")
	needed = input_path.stat().st_size + 64 * 1024 * 1024
	if free_bytes(input_path) < needed:
	sys.exit(
	f"Not enough free space on {input_path.parent} for {input_path.name} "
	f"(need ~{needed // 1024**3} GB)."
	)
	print(f"\n→ {input_path.name}: writing {out_path.name}")
	cmd = [
	sys.executable, "-m", "gguf.scripts.gguf_new_metadata",
	"--chat-template-file", str(template_file),
	"--force",
	str(input_path), str(out_path),
	]
	subprocess.run(cmd, check=True)
	print(f"→ {input_path.name}: replacing original")
	out_path.replace(input_path)


	def confirm(prompt: str) -> bool:
	return input(f"{prompt} [y/N] ").strip().lower() in ("y", "yes")


	def main() -> int:
	import importlib.util
	if importlib.util.find_spec("gguf") is None:
	sys.exit(
	"gguf package not available. Run via 'uv run update_gguf_chat_template.py' "
	"or install gguf into the active environment."
	)

	selected = select_files(list_gguf_files())
	print(f"\nSelected {len(selected)} file(s):")
	for f in selected:
	print(f" - {f.name}")

	print("\nTemplate source:")
	print(" A) HuggingFace URL to a GGUF file (extract its chat_template)")
	print(" B) Path to a local jinja file")
	print(" C) Paste the template content directly")
	choice = input("Choose [A/B/C]: ").strip().upper()

	if choice == "A":
	template = fetch_remote_template(input("HuggingFace GGUF URL: ").strip())
	elif choice == "B":
	template = read_local_template(input("Path to jinja file: ").strip())
	elif choice == "C":
	template = paste_template()
	else:
	sys.exit("Invalid choice.")

	if not template.strip():
	sys.exit("Empty template, aborting.")

	validate_template(template)

	preview = template[:300].replace("\n", "\\n")
	print(f"\n--- Template preview ({len(template)} chars) ---")
	print(preview + ("..." if len(template) > 300 else ""))
	print("---")

	to_update: list[Path] = []
	for f in selected:
	current = get_current_template(f)
	if show_diff(f.name, current, template):
	to_update.append(f)

	if not to_update:
	print("\nAll selected files already have this template, nothing to do.")
	return 0

	fd, tmpl_name = tempfile.mkstemp(suffix=".jinja", prefix="chat_template_")
	with os.fdopen(fd, "w", encoding="utf-8") as f:
	f.write(template)
	template_file = Path(tmpl_name)

	try:
	msg = (f"\nOverwrite {len(to_update)} file(s) with this template? "
	f"(each will be rewritten to a .new file then renamed over the original)")
	if not confirm(msg):
	print("Aborted.")
	return 0
	for f in to_update:
	update_gguf(f, template_file)
	print("\nDone.")
	finally:
	template_file.unlink(missing_ok=True)
	return 0


	if __name__ == "__main__":
	sys.exit(main())
No results found