Last active
May 12, 2026 07:34
-
-
Save xqm32/0cc9f4156f266b5029bc95b71ba3fb71 to your computer and use it in GitHub Desktop.
Confluencer - The Confluence Search Tool for LLM Agents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.14" | |
| # dependencies = [ | |
| # "atlassian-python-api>=4.0.7", | |
| # "loguru>=0.7.3", | |
| # "markitdown>=0.1.5", | |
| # "platformdirs>=4.9.6", | |
| # "python-dotenv>=1.2.2", | |
| # "pyyaml>=6.0.3", | |
| # "typer>=0.25.1", | |
| # ] | |
| # /// | |
| from io import BytesIO | |
| from itertools import count | |
| from os import environ, getenv | |
| from pathlib import Path | |
| from time import sleep | |
| from typing import Any, cast | |
| from atlassian import Confluence | |
| from dotenv import load_dotenv | |
| from loguru import logger | |
| from markitdown import MarkItDown, StreamInfo | |
| from platformdirs import PlatformDirs | |
| from rich.prompt import Prompt | |
| from typer import Typer | |
| from yaml import safe_dump | |
| platform_dirs = PlatformDirs("confluencer", ensure_exists=True) | |
| user_data_dir = Path(platform_dirs.user_data_dir) | |
| dotenv_path = user_data_dir / ".env" | |
| load_dotenv(dotenv_path) | |
| app = Typer() | |
| confluence = Confluence( | |
| url=getenv("CONFLUENCE_URL", ""), | |
| token=getenv("CONFLUENCE_TOKEN", ""), | |
| ) | |
| mid = MarkItDown() | |
| def get_markdown_by_id(page_id: str) -> str: | |
| page: Any = confluence.get_page_by_id(page_id, expand="body.storage") | |
| storage = page["body"]["storage"]["value"] | |
| page["body"]["storage"].pop("value") | |
| front_matter = f"---\n{safe_dump(page, allow_unicode=True).strip()}\n---" | |
| content = convert_storage_to_markdown(storage).strip() | |
| return f"{front_matter}\n\n{content}".strip() | |
| def convert_storage_to_markdown(storage: str) -> str: | |
| if len(storage.strip()) == 0: | |
| return storage | |
| return mid.convert_stream( | |
| BytesIO(storage.encode("utf-8")), | |
| stream_info=StreamInfo(extension=".html"), | |
| ).markdown | |
| def save_markdown_by_id(page_id: str, parent: Path) -> None: | |
| logger.info(f"Saving page {page_id} to {parent}") | |
| markdown = get_markdown_by_id(page_id) | |
| parent.mkdir(parents=True, exist_ok=True) | |
| path = parent / f"{page_id}.md" | |
| path.write_text(markdown) | |
| logger.info(f"Saved page {page_id} to {path}") | |
| page_ids = [] | |
| for start in count(0, 200): | |
| child_id_list = confluence.get_child_id_list(page_id, start=start, limit=200) | |
| page_ids.extend(child_id_list) | |
| if len(child_id_list) < 200: | |
| break | |
| if len(page_ids) == 0: | |
| return | |
| logger.info(f"Found {len(page_ids)} child pages of page {page_id}") | |
| if "CONFLUENCE_TREE" in environ: | |
| parent = parent / page_id | |
| if "CONFLUENCE_RATE_LIMIT" in environ: | |
| sleep(float(environ["CONFLUENCE_RATE_LIMIT"])) | |
| for page_id in page_ids: | |
| save_markdown_by_id(page_id, parent) | |
| @app.command() | |
| def auth_login(): | |
| envs = [ | |
| f"CONFLUENCE_URL={Prompt.ask('CONFLUENCE_URL')}", | |
| f"CONFLUENCE_TOKEN={Prompt.ask('CONFLUENCE_TOKEN', password=True)}", | |
| ] | |
| dotenv_path.write_text("\n".join(envs)) | |
| @app.command() | |
| def auth_status(): | |
| current = cast(dict[str, Any], confluence.get("/rest/api/user/current")) | |
| print(current["username"]) | |
| @app.command() | |
| def auth_logout(): | |
| dotenv_path.unlink(missing_ok=True) | |
| @app.command() | |
| def cql(query: str) -> None: | |
| print(safe_dump(confluence.cql(query), allow_unicode=True)) | |
| @app.command() | |
| def get(page_id: str) -> None: | |
| print(get_markdown_by_id(page_id)) | |
| @app.command() | |
| def get_child_pages(page_id: str) -> None: | |
| child_pages: Any = confluence.get_child_pages(page_id) | |
| print(safe_dump(list(child_pages), allow_unicode=True)) | |
| @app.command(hidden=True) | |
| def save(page_id: str) -> None: | |
| save_markdown_by_id(page_id, Path(getenv("CONFLUENCE_OUTPUT_DIR", "site"))) | |
| if __name__ == "__main__": | |
| app() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment