VectifyAI · HaiyangPeng · May 25, 2026
diff --git a/examples/agentic_vectorless_rag_demo.py b/examples/agentic_vectorless_rag_demo.py
@@ -12,7 +12,7 @@
   - get_page_content()       — retrieve text content of specific pages
 
 Steps:
-  1 — Index a PDF and view its tree structure index
+  1 — Index a PDF/md and view its tree structure index
   2 — View document metadata
   3 — Ask a question (agent reasons over the index and auto-calls tools)
 
@@ -44,9 +44,16 @@
 AGENT_SYSTEM_PROMPT = """
 You are PageIndex, a document QA assistant.
 TOOL USE:
-- Call get_document() first to confirm status and page/line count.
-- Call get_document_structure() to identify relevant page ranges.
-- Call get_page_content(pages="5-7") with tight ranges; never fetch the whole document.
+- Call get_document() first to confirm status, type (pdf/md), and
+page/line count.
+- Call get_document_structure() to identify relevant sections.
+- Call get_page_content(pages=...) to fetch content:
+- For PDF documents: use page number ranges like "5-7" or "3,8"
+- For Markdown documents: use ONLY the exact line_num values from the
+structure,
+separated by commas. Example: "3,53,69". Do NOT use ranges.
+Each line_num represents a complete section node — you will receive
+the full content of that section.
 - Before each tool call, output one short sentence explaining the reason.
 Answer based only on tool output. Be concise.
 """
@@ -72,9 +79,12 @@ def get_document_structure() -> str:
     @function_tool
     def get_page_content(pages: str) -> str:
         """
-        Get the text content of specific pages or line numbers.
-        Use tight ranges: e.g. '5-7' for pages 5 to 7, '3,8' for pages 3 and 8, '12' for page 12.
-        For Markdown documents, use line numbers from the structure's line_num field.
+        Get the text content of specific pages (PDF) or sections (Markdown).
+        For PDF: use page ranges, e.g. '5-7', '3,8', '12'.
+        For Markdown: use EXACT line_num values from get_document_structure(),
+        comma-separated. e.g. '3,53,69'. Each line_num returns the FULL text
+        of that section node. Do NOT guess or interpolate line numbers —
+        only use values that appear in the structure.
         """
         return client.get_page_content(doc_id, pages)
 

diff --git a/pageindex/retrieve.py b/pageindex/retrieve.py
@@ -55,17 +55,19 @@ def _get_pdf_page_content(doc_info: dict, page_nums: list[int]) -> list[dict]:
 
 def _get_md_page_content(doc_info: dict, page_nums: list[int]) -> list[dict]:
     """
-    For Markdown documents, 'pages' are line numbers.
-    Find nodes whose line_num falls within [min(page_nums), max(page_nums)] and return their text.
+    For Markdown documents, page_nums are line_num values of section
+    nodes.
+    Return the text of nodes whose line_num exactly matches one of the
+    requested values.
     """
-    min_line, max_line = min(page_nums), max(page_nums)
+    requested = set(page_nums)
     results = []
     seen = set()
 
     def _traverse(nodes):
         for node in nodes:
             ln = node.get('line_num')
-            if ln and min_line <= ln <= max_line and ln not in seen:
+            if ln and ln in requested and ln not in seen:
                 seen.add(ln)
                 results.append({'page': ln, 'content': node.get('text', '')})
             if node.get('nodes'):