Skip to content

Instantly share code, notes, and snippets.

@xqm32
Last active May 12, 2026 07:34
Show Gist options
  • Select an option

  • Save xqm32/0cc9f4156f266b5029bc95b71ba3fb71 to your computer and use it in GitHub Desktop.

Select an option

Save xqm32/0cc9f4156f266b5029bc95b71ba3fb71 to your computer and use it in GitHub Desktop.
Confluencer - The Confluence Search Tool for LLM Agents
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.14"
# dependencies = [
# "atlassian-python-api>=4.0.7",
# "loguru>=0.7.3",
# "markitdown>=0.1.5",
# "platformdirs>=4.9.6",
# "python-dotenv>=1.2.2",
# "pyyaml>=6.0.3",
# "typer>=0.25.1",
# ]
# ///
from io import BytesIO
from itertools import count
from os import environ, getenv
from pathlib import Path
from time import sleep
from typing import Any, cast
from atlassian import Confluence
from dotenv import load_dotenv
from loguru import logger
from markitdown import MarkItDown, StreamInfo
from platformdirs import PlatformDirs
from rich.prompt import Prompt
from typer import Typer
from yaml import safe_dump
platform_dirs = PlatformDirs("confluencer", ensure_exists=True)
user_data_dir = Path(platform_dirs.user_data_dir)
dotenv_path = user_data_dir / ".env"
load_dotenv(dotenv_path)
app = Typer()
confluence = Confluence(
url=getenv("CONFLUENCE_URL", ""),
token=getenv("CONFLUENCE_TOKEN", ""),
)
mid = MarkItDown()
def get_markdown_by_id(page_id: str) -> str:
page: Any = confluence.get_page_by_id(page_id, expand="body.storage")
storage = page["body"]["storage"]["value"]
page["body"]["storage"].pop("value")
front_matter = f"---\n{safe_dump(page, allow_unicode=True).strip()}\n---"
content = convert_storage_to_markdown(storage).strip()
return f"{front_matter}\n\n{content}".strip()
def convert_storage_to_markdown(storage: str) -> str:
if len(storage.strip()) == 0:
return storage
return mid.convert_stream(
BytesIO(storage.encode("utf-8")),
stream_info=StreamInfo(extension=".html"),
).markdown
def save_markdown_by_id(page_id: str, parent: Path) -> None:
logger.info(f"Saving page {page_id} to {parent}")
markdown = get_markdown_by_id(page_id)
parent.mkdir(parents=True, exist_ok=True)
path = parent / f"{page_id}.md"
path.write_text(markdown)
logger.info(f"Saved page {page_id} to {path}")
page_ids = []
for start in count(0, 200):
child_id_list = confluence.get_child_id_list(page_id, start=start, limit=200)
page_ids.extend(child_id_list)
if len(child_id_list) < 200:
break
if len(page_ids) == 0:
return
logger.info(f"Found {len(page_ids)} child pages of page {page_id}")
if "CONFLUENCE_TREE" in environ:
parent = parent / page_id
if "CONFLUENCE_RATE_LIMIT" in environ:
sleep(float(environ["CONFLUENCE_RATE_LIMIT"]))
for page_id in page_ids:
save_markdown_by_id(page_id, parent)
@app.command()
def auth_login():
envs = [
f"CONFLUENCE_URL={Prompt.ask('CONFLUENCE_URL')}",
f"CONFLUENCE_TOKEN={Prompt.ask('CONFLUENCE_TOKEN', password=True)}",
]
dotenv_path.write_text("\n".join(envs))
@app.command()
def auth_status():
current = cast(dict[str, Any], confluence.get("/rest/api/user/current"))
print(current["username"])
@app.command()
def auth_logout():
dotenv_path.unlink(missing_ok=True)
@app.command()
def cql(query: str) -> None:
print(safe_dump(confluence.cql(query), allow_unicode=True))
@app.command()
def get(page_id: str) -> None:
print(get_markdown_by_id(page_id))
@app.command()
def get_child_pages(page_id: str) -> None:
child_pages: Any = confluence.get_child_pages(page_id)
print(safe_dump(list(child_pages), allow_unicode=True))
@app.command(hidden=True)
def save(page_id: str) -> None:
save_markdown_by_id(page_id, Path(getenv("CONFLUENCE_OUTPUT_DIR", "site")))
if __name__ == "__main__":
app()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment