Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions openkb/agent/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,13 +797,18 @@ def _sanitize_concept_name(name: str) -> str:
_parse_yaml_list_value = frontmatter.parse_list_value


def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is_update: bool, brief: str = "") -> None:
"""Write or update a concept page, managing the sources frontmatter."""
concepts_dir = wiki_dir / "concepts"
concepts_dir.mkdir(parents=True, exist_ok=True)
def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is_update: bool, brief: str = "", topic_dir: Path | None = None) -> None:
"""Write or update a concept page, managing the sources frontmatter.

When ``topic_dir`` is given (topic-tree mode) the page is written there
instead of the flat ``concepts/`` directory; the basename is unchanged so
name-based wikilinks still resolve.
"""
base_dir = topic_dir if topic_dir is not None else (wiki_dir / "concepts")
base_dir.mkdir(parents=True, exist_ok=True)
safe_name = _sanitize_concept_name(name)
path = (concepts_dir / f"{safe_name}.md").resolve()
if not path.is_relative_to(concepts_dir.resolve()):
path = (base_dir / f"{safe_name}.md").resolve()
if not path.is_relative_to(base_dir.resolve()):
logger.warning("Concept name escapes concepts dir: %s", name)
return

Expand Down
55 changes: 53 additions & 2 deletions openkb/agent/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from openkb.config import get_extra_headers, get_timeout_extra_args
from openkb.agent.tools import (
get_wiki_page_content,
read_topic_node,
read_wiki_file,
read_wiki_image,
write_kb_file,
Expand Down Expand Up @@ -48,10 +49,45 @@
"""


_QUERY_INSTRUCTIONS_TREE = """\
You are OpenKB, a knowledge-base Q&A agent. You answer questions by searching the wiki.

{schema_md}

## Search strategy (topic tree)
The concepts/ wiki is a TOPIC TREE — descend it, do not enumerate everything.
1. Call read_topic("") to see the root summary, its child topics, and any concepts there.
2. Pick the child topic(s) most relevant to the question; call read_topic("<name>")
to descend (paths nest, e.g. "attention/multi-head").
3. Repeat until you reach the relevant concept leaves (listed under "concepts here").
4. read_file the relevant concept pages. For "who/what is X" about a named person,
organization, place, or product, read the matching entities/ page.
5. For detailed source content, follow a summary page's `full_text` frontmatter:
short docs → read_file that path; pageindex docs → get_page_content(doc_name, pages)
with tight page ranges. Never fetch a whole document.
6. Source content may reference images; use get_image when needed.
7. If a branch has nothing useful, back up and try a sibling. Synthesize a clear,
concise, well-cited answer grounded in wiki content.

Answer based only on wiki content. Be concise.
Before each tool call, output one short sentence explaining the reason.

If you cannot find relevant information, say so clearly.
"""


def build_query_agent(wiki_root: str, model: str, language: str = "en") -> Agent:
"""Build and return the Q&A agent."""
schema_md = get_agents_md(Path(wiki_root))
instructions = _QUERY_INSTRUCTIONS_TEMPLATE.format(schema_md=schema_md)
from openkb.config import load_config

tree_on = bool(
load_config(Path(wiki_root).parent / ".openkb" / "config.yaml").get(
"topic_tree", False
)
)
template = _QUERY_INSTRUCTIONS_TREE if tree_on else _QUERY_INSTRUCTIONS_TEMPLATE
instructions = template.format(schema_md=schema_md)
instructions += f"\n\nIMPORTANT: Answer in {language} language."

@function_tool
Expand Down Expand Up @@ -88,12 +124,27 @@ def get_image(image_path: str) -> ToolOutputImage | ToolOutputText:
return ToolOutputImage(image_url=result["image_url"])
return ToolOutputText(text=result["text"])

@function_tool
def read_topic(rel: str = "") -> str:
"""Navigate the concept topic tree top-down.

Start at "" (root); the result lists child topics and the concepts at
this node. Descend by calling again with a child topic's path (e.g.
"attention" or "attention/multi-head"); read concept leaves with
read_file. Do not enumerate the whole tree.
"""
return read_topic_node(rel, wiki_root)

from agents.model_settings import ModelSettings

tools = [read_file, get_page_content, get_image]
if tree_on:
tools.append(read_topic)

return Agent(
name="wiki-query",
instructions=instructions,
tools=[read_file, get_page_content, get_image],
tools=tools,
model=f"litellm/{model}",
model_settings=ModelSettings(
parallel_tool_calls=False,
Expand Down
26 changes: 26 additions & 0 deletions openkb/agent/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,32 @@ def read_wiki_file(path: str, wiki_root: str) -> str:
return full_path.read_text(encoding="utf-8")


def read_topic_node(rel: str, wiki_root: str) -> str:
"""Render a topic node: its summary, child topics, and concept briefs.

Use to navigate the concept topic tree top-down: start at ``""`` (root),
pick a child topic, call again with its path, until you reach the concept
leaves you need (then read them with read_wiki_file).

Args:
rel: Topic path relative to ``concepts/`` (``""`` for root,
``"attention"``, ``"attention/multi-head"``).
wiki_root: Absolute path to the wiki root directory.
"""
from openkb.topic_tree import read_topic

concepts_root = Path(wiki_root) / "concepts"
view = read_topic(concepts_root, rel)
lines = [f"# topic: {rel or '(root)'}", "", view.summary, ""]
if view.child_topics:
lines.append("## child topics")
lines += [f"- {n}: {s}" for n, s in view.child_topics]
if view.child_concepts:
lines.append("## concepts here")
lines += [f"- [[{stem}]]: {brief}" for stem, brief in view.child_concepts]
return "\n".join(lines)


def parse_pages(pages: str) -> list[int]:
"""Parse a page specification string into a sorted, deduplicated list of page numbers.

Expand Down
33 changes: 33 additions & 0 deletions openkb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def filter(self, record: logging.LogRecord) -> bool:
from openkb.locks import atomic_write_json, atomic_write_text, kb_ingest_lock, kb_read_lock
from openkb.log import append_log
from openkb.schema import AGENTS_MD, INDEX_SEED, PAGE_CONTENT_DIRS
from openkb.topic_tree import bootstrap as tt_bootstrap

# Suppress warnings after all imports — markitdown overrides filters at import time
import warnings
Expand Down Expand Up @@ -1691,6 +1692,38 @@ def lint(ctx, fix):
asyncio.run(run_lint(kb_dir))


@cli.command()
@click.pass_context
def reindex(ctx):
"""Build the concept topic tree from the existing flat wiki/concepts/ (experimental).

No-op unless `topic_tree: true` is set in .openkb/config.yaml.
"""
kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override"))
if kb_dir is None:
click.echo("No knowledge base found. Run `openkb init` first.")
return
config = load_config(kb_dir / ".openkb" / "config.yaml")
if not bool(config.get("topic_tree", False)):
click.echo(
"topic_tree is not enabled. Set `topic_tree: true` in "
".openkb/config.yaml first."
)
return
_setup_llm_key(kb_dir)
model = config.get("model", DEFAULT_CONFIG["model"])
from openkb.topic_tree_llm import make_cluster, make_summarize

concepts_root = kb_dir / "wiki" / "concepts"
with kb_ingest_lock(kb_dir / ".openkb"):
n = tt_bootstrap(
concepts_root,
cluster=make_cluster(model),
summarize=make_summarize(model),
)
click.echo(f"Reindexed {n} concept(s) into the topic tree.")


@cli.command()
@click.option("--open/--no-open", "open_browser", default=True,
help="Open the graph in your browser after generating (default: on; --no-open for headless).")
Expand Down
13 changes: 12 additions & 1 deletion openkb/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,23 @@ def list_existing_wiki_targets(wiki_dir: Path) -> set[str]:
Used to seed the whitelist passed to :func:`strip_ghost_wikilinks` from
both the compile pipeline and any other code path that writes
LLM-generated content to the wiki (e.g. ``openkb query --save``).

Concepts may be nested under a topic tree, so they are indexed
recursively by BOTH their relative path (``concepts/<...>/<stem>``) and
their bare ``<stem>`` (Obsidian-style, path-independent) — the bare stem
is what lets a link survive a topic split that moves the file.
"""
targets: set[str] = set()
concepts_dir = wiki_dir / "concepts"
summaries_dir = wiki_dir / "summaries"
if concepts_dir.is_dir():
targets.update(f"concepts/{p.stem}" for p in concepts_dir.glob("*.md"))
for p in concepts_dir.rglob("*.md"):
if p.name == "_topic.md":
continue
rel = p.relative_to(wiki_dir).with_suffix("")
targets.add(str(rel).replace("\\", "/")) # concepts/<...>/<stem>
targets.add(f"concepts/{p.stem}") # path-independent concepts/<stem>
targets.add(p.stem) # bare <stem>
if summaries_dir.is_dir():
targets.update(f"summaries/{p.stem}" for p in summaries_dir.glob("*.md"))
entities_dir = wiki_dir / "entities"
Expand Down
Loading