feat: adjust skill prompt and fix some bugs (#988)

CaralHsi · yuan.wang · Wang-Daoji · web-flow · commit e8e597455c39 · 2026-01-30T10:36:36.000+08:00
* feat: skill with history (#986) * feat: skill with history * feat: pass chat-history into skill * feat: modify chat-history passing in skills * feat: modify code * fix: we don't need to pass history in part B * fix: process skill memory * feat: we do not return None with few history now * feat: update skill * feat: modify _split_task_chunk_by_llm * feat: filter by embedding * feat: modify skill --------- Co-authored-by: yuan.wang <yuan.wang@yuanwangdebijibendiannao.local> Co-authored-by: Wang Daoji <75928131+Wang-Daoji@users.noreply.github.com> * feat: skill with history (#987) * feat: skill with history * feat: pass chat-history into skill * feat: modify chat-history passing in skills * feat: modify code * fix: we don't need to pass history in part B * fix: process skill memory * feat: we do not return None with few history now * feat: update skill * feat: modify _split_task_chunk_by_llm * feat: filter by embedding * feat: modify skill * feat: reinforce update rule --------- Co-authored-by: yuan.wang <yuan.wang@yuanwangdebijibendiannao.local> Co-authored-by: Wang Daoji <75928131+Wang-Daoji@users.noreply.github.com> --------- Co-authored-by: yuan.wang <yuan.wang@yuanwangdebijibendiannao.local> Co-authored-by: Wang Daoji <75928131+Wang-Daoji@users.noreply.github.com>
diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py
@@ -377,16 +377,13 @@ def _extract_embeddings(memories: list[dict[str, Any]]) -> list[list[float]] | N
 
     @staticmethod
     def _strip_embeddings(results: dict[str, Any]) -> None:
-        for bucket in results.get("text_mem", []):
-            for mem in bucket.get("memories", []):
-                metadata = mem.get("metadata", {})
-                if "embedding" in metadata:
-                    metadata["embedding"] = []
-        for bucket in results.get("tool_mem", []):
-            for mem in bucket.get("memories", []):
-                metadata = mem.get("metadata", {})
-                if "embedding" in metadata:
-                    metadata["embedding"] = []
+        for _mem_type, mem_results in results.items():
+            if isinstance(mem_results, list):
+                for bucket in mem_results:
+                    for mem in bucket.get("memories", []):
+                        metadata = mem.get("metadata", {})
+                        if "embedding" in metadata:
+                            metadata["embedding"] = []
 
     @staticmethod
     def _dice_similarity(text1: str, text2: str) -> float:
diff --git a/src/memos/mem_reader/base.py b/src/memos/mem_reader/base.py
@@ -42,7 +42,7 @@ def set_searcher(self, searcher: "Searcher | None") -> None:
 
     @abstractmethod
     def get_memory(
-        self, scene_data: list, type: str, info: dict[str, Any], mode: str = "fast"
+        self, scene_data: list, type: str, info: dict[str, Any], mode: str = "fast", **kwargs
     ) -> list[list[TextualMemoryItem]]:
         """Various types of memories extracted from scene_data"""
 
diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py
@@ -862,7 +862,7 @@ def _process_multi_modal_data(
             # Part A: call llm in parallel using thread pool
             fine_memory_items = []
 
-            with ContextThreadPoolExecutor(max_workers=2) as executor:
+            with ContextThreadPoolExecutor(max_workers=3) as executor:
                 future_string = executor.submit(
                     self._process_string_fine, fast_memory_items, info, custom_tags, **kwargs
                 )
diff --git a/src/memos/mem_reader/read_skill_memory/process_skill_memory.py b/src/memos/mem_reader/read_skill_memory/process_skill_memory.py
@@ -105,6 +105,17 @@ def _reconstruct_messages_from_memory_items(memory_items: list[TextualMemoryItem
     return reconstructed_messages
 
 
+def _preprocess_extract_messages(
+    history: MessageList, messages: MessageList
+) -> (MessageList, MessageList):
+    """Process data and check whether to extract skill memory"""
+    history = history[-20:]
+    if (len(history) + len(messages)) < 10:
+        # TODO: maybe directly return []
+        logger.warning("[PROCESS_SKILLS] Not enough messages to extract skill memory")
+    return history, messages
+
+
 def _add_index_to_message(messages: MessageList) -> MessageList:
     for i, message in enumerate(messages):
         message["idx"] = i
@@ -144,18 +155,27 @@ def _split_task_chunk_by_llm(llm: BaseLLM, messages: MessageList) -> dict[str, M
         message_indices = item["message_indices"]
         for indices in message_indices:
             # Validate that indices is a list/tuple with exactly 2 elements
-            if not isinstance(indices, list | tuple) or len(indices) != 2:
+            if isinstance(indices, list) and len(indices) == 1:
+                start, end = indices[0], indices[0] + 1
+            elif isinstance(indices, int):
+                start, end = indices, indices + 1
+            elif isinstance(indices, list) and len(indices) == 2:
+                start, end = indices[0], indices[1] + 1
+            else:
                 logger.warning(
                     f"[PROCESS_SKILLS] Invalid message indices format for task '{task_name}': {indices}, skipping"
                 )
                 continue
-            start, end = indices
-            task_chunks.setdefault(task_name, []).extend(messages[start : end + 1])
+            task_chunks.setdefault(task_name, []).extend(messages[start:end])
     return task_chunks
 
 
 def _extract_skill_memory_by_llm(
-    messages: MessageList, old_memories: list[TextualMemoryItem], llm: BaseLLM
+    messages: MessageList,
+    old_memories: list[TextualMemoryItem],
+    llm: BaseLLM,
+    chat_history: MessageList,
+    chat_history_max_length: int = 5000,
 ) -> dict[str, Any]:
     old_memories_dict = [skill_memory.model_dump() for skill_memory in old_memories]
     old_mem_references = [
@@ -169,7 +189,7 @@ def _extract_skill_memory_by_llm(
             "examples": mem["metadata"]["examples"],
             "tags": mem["metadata"]["tags"],
             "scripts": mem["metadata"].get("scripts"),
-            "others": mem["metadata"]["others"],
+            "others": mem["metadata"].get("others"),
         }
         for mem in old_memories_dict
     ]
@@ -179,17 +199,26 @@ def _extract_skill_memory_by_llm(
         [f"{message['role']}: {message['content']}" for message in messages]
     )
 
+    # Prepare history context
+    chat_history_context = "\n".join(
+        [f"{history['role']}: {history['content']}" for history in chat_history]
+    )
+    chat_history_context = chat_history_context[-chat_history_max_length:]
+
     # Prepare old memories context
     old_memories_context = json.dumps(old_mem_references, ensure_ascii=False, indent=2)
 
     # Prepare prompt
     lang = detect_lang(messages_context)
     template = SKILL_MEMORY_EXTRACTION_PROMPT_ZH if lang == "zh" else SKILL_MEMORY_EXTRACTION_PROMPT
-    prompt_content = template.replace("{old_memories}", old_memories_context).replace(
-        "{messages}", messages_context
+    prompt_content = (
+        template.replace("{old_memories}", old_memories_context)
+        .replace("{messages}", messages_context)
+        .replace("{chat_history}", chat_history_context)
     )
 
     prompt = [{"role": "user", "content": prompt_content}]
+    logger.info(f"[Skill Memory]: Prompt {prompt_content}")
 
     # Call LLM to extract skill memory with retry logic
     for attempt in range(3):
@@ -198,7 +227,8 @@ def _extract_skill_memory_by_llm(
             skills_llm = os.getenv("SKILLS_LLM", None)
             llm_kwargs = {"model_name_or_path": skills_llm} if skills_llm else {}
             response_text = llm.generate(prompt, **llm_kwargs)
-            # Clean up response (remove markdown code blocks if present)
+            # Clean up response (remove Markdown code blocks if present)
+            logger.info(f"[Skill Memory]: response_text {response_text}")
             response_text = response_text.strip()
             response_text = response_text.replace("```json", "").replace("```", "").strip()
 
@@ -537,6 +567,11 @@ def process_skill_memory_fine(
         )
         return []
 
+    chat_history = kwargs.get("chat_history")
+    if not chat_history or not isinstance(chat_history, list):
+        chat_history = []
+        logger.warning("[PROCESS_SKILLS] History is None in Skills")
+
     # Validate skills_dir has required keys
     required_keys = ["skills_local_dir", "skills_oss_dir"]
     missing_keys = [key for key in required_keys if key not in skills_dir_config]
@@ -552,7 +587,13 @@ def process_skill_memory_fine(
         return []
 
     messages = _reconstruct_messages_from_memory_items(fast_memory_items)
+
+    chat_history, messages = _preprocess_extract_messages(chat_history, messages)
+    if not messages:
+        return []
+
     messages = _add_index_to_message(messages)
+    chat_history = _add_index_to_message(chat_history)
 
     task_chunks = _split_task_chunk_by_llm(llm, messages)
     if not task_chunks:
@@ -594,6 +635,7 @@ def process_skill_memory_fine(
                 messages,
                 related_skill_memories_by_task.get(task_type, []),
                 llm,
+                chat_history,
             ): task_type
             for task_type, messages in task_chunks.items()
         }
@@ -608,7 +650,7 @@ def process_skill_memory_fine(
 
     # write skills to file and get zip paths
     skill_memory_with_paths = []
-    with ContextThreadPoolExecutor(max_workers=min(len(skill_memories), 5)) as executor:
+    with ContextThreadPoolExecutor(max_workers=5) as executor:
         futures = {
             executor.submit(
                 _write_skills_to_file, skill_memory, info, skills_dir_config
@@ -713,9 +755,11 @@ def process_skill_memory_fine(
             continue
 
     # TODO: deprecate this funtion and call
-    for skill_memory in skill_memory_items:
+    for skill_memory, skill_memory_item in zip(skill_memories, skill_memory_items, strict=False):
+        if skill_memory.get("update", False) and skill_memory.get("old_memory_id", ""):
+            continue
         add_id_to_mysql(
-            memory_id=skill_memory.id, mem_cube_id=kwargs.get("user_name", info.get("user_id", ""))
+            memory_id=skill_memory_item.id,
+            mem_cube_id=kwargs.get("user_name", info.get("user_id", "")),
         )
-
     return skill_memory_items
diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py
@@ -428,6 +428,7 @@ def get_memory(
         info: dict[str, Any],
         mode: str = "fine",
         user_name: str | None = None,
+        **kwargs,
     ) -> list[list[TextualMemoryItem]]:
         """
         Extract and classify memory content from scene_data.
@@ -471,7 +472,9 @@ def get_memory(
         # Backward compatibility, after coercing scene_data, we only tackle
         # with standard scene_data type: MessagesType
         standard_scene_data = coerce_scene_data(scene_data, type)
-        return self._read_memory(standard_scene_data, type, info, mode, user_name=user_name)
+        return self._read_memory(
+            standard_scene_data, type, info, mode, user_name=user_name, **kwargs
+        )
 
     def rewrite_memories(
         self, messages: list[dict], memory_list: list[TextualMemoryItem], user_only: bool = True
diff --git a/src/memos/mem_scheduler/general_scheduler.py b/src/memos/mem_scheduler/general_scheduler.py
@@ -764,6 +764,7 @@ def process_message(message: ScheduleMessageItem):
                 content = message.content
                 user_name = message.user_name
                 info = message.info or {}
+                chat_history = message.chat_history
 
                 # Parse the memory IDs from content
                 mem_ids = json.loads(content) if isinstance(content, str) else content
@@ -790,6 +791,7 @@ def process_message(message: ScheduleMessageItem):
                     custom_tags=info.get("custom_tags", None),
                     task_id=message.task_id,
                     info=info,
+                    chat_history=chat_history,
                 )
 
                 logger.info(
@@ -817,6 +819,7 @@ def _process_memories_with_reader(
         custom_tags: list[str] | None = None,
         task_id: str | None = None,
         info: dict | None = None,
+        chat_history: list | None = None,
     ) -> None:
         logger.info(
             f"[DIAGNOSTIC] general_scheduler._process_memories_with_reader called. mem_ids: {mem_ids}, user_id: {user_id}, mem_cube_id: {mem_cube_id}, task_id: {task_id}"
@@ -878,6 +881,7 @@ def _process_memories_with_reader(
                     type="chat",
                     custom_tags=custom_tags,
                     user_name=user_name,
+                    chat_history=chat_history,
                 )
             except Exception as e:
                 logger.warning(f"{e}: Fail to transfer mem: {memory_items}")
diff --git a/src/memos/mem_scheduler/schemas/message_schemas.py b/src/memos/mem_scheduler/schemas/message_schemas.py
@@ -54,6 +54,7 @@ class ScheduleMessageItem(BaseModel, DictConversionMixin):
         default=None,
         description="Optional business-level task ID. Multiple items can share the same task_id.",
     )
+    chat_history: list | None = Field(default=None, description="user chat history")
 
     # Pydantic V2 model configuration
     model_config = ConfigDict(
@@ -89,6 +90,7 @@ def to_dict(self) -> dict:
             "timestamp": self.timestamp.isoformat(),
             "user_name": self.user_name,
             "task_id": self.task_id if self.task_id is not None else "",
+            "chat_history": self.chat_history if self.chat_history is not None else [],
         }
 
     @classmethod
@@ -104,6 +106,7 @@ def from_dict(cls, data: dict) -> "ScheduleMessageItem":
             timestamp=datetime.fromisoformat(data["timestamp"]),
             user_name=data.get("user_name"),
             task_id=data.get("task_id"),
+            chat_history=data.get("chat_history"),
         )
 
 
@@ -158,6 +161,7 @@ class ScheduleLogForWebItem(BaseModel, DictConversionMixin):
         default=None, description="Completion status of the task (e.g., 'completed', 'failed')"
     )
     source_doc_id: str | None = Field(default=None, description="Source document ID")
+    chat_history: list | None = Field(default=None, description="user chat history")
 
     def debug_info(self) -> dict[str, Any]:
         """Return structured debug information for logging purposes."""
diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py
@@ -553,6 +553,7 @@ def _schedule_memory_tasks(
                     timestamp=datetime.utcnow(),
                     user_name=self.cube_id,
                     info=add_req.info,
+                    chat_history=add_req.chat_history,
                 )
                 self.mem_scheduler.submit_messages(messages=[message_item_read])
                 self.logger.info(
@@ -807,6 +808,7 @@ def _process_text_mem(
             },
             mode=extract_mode,
             user_name=user_context.mem_cube_id,
+            chat_history=add_req.chat_history,
         )
         self.logger.info(
             f"Time for get_memory in extract mode {extract_mode}: {time.time() - init_time}"
diff --git a/src/memos/templates/skill_mem_prompt.py b/src/memos/templates/skill_mem_prompt.py
@@ -78,6 +78,9 @@
 # Existing Skill Memories
 {old_memories}
 
+# Chat_history
+{chat_history}
+
 # Conversation Messages
 {messages}
 
@@ -86,6 +89,11 @@
 2. **Universality**: All fields except "example" must remain general and scenario-independent.
 3. **Similarity Check**: If similar skill exists, set "update": true with "old_memory_id". Otherwise, set "update": false and leave "old_memory_id" empty.
 4. **Language Consistency**: Match the conversation language.
+5. **History Usage Constraints**:
+   - `chat_history` serves only as auxiliary context to supplement stable preferences or methodologies that are not explicitly stated in `messages` but may affect skill abstraction.
+   - `chat_history` may be considered only when it provides information **missing from `messages`** and **relevant to the current task’s goals, execution approach, or constraints**.
+   - `chat_history` must not be the primary source of a skill, and may only be used to enrich auxiliary fields such as `preference` or `experience`.
+   - If `chat_history` does not provide any valid information beyond what already exists in `messages`, or contains only greetings or background content, it must be completely ignored.
 
 # Output Format
 ```json
@@ -100,7 +108,9 @@
   "scripts": {"script_name.py": "# Python code here\nprint('Hello')", "another_script.py": "# More code\nimport os"},
   "others": {"Section Title": "Content here", "reference.md": "# Reference content for this skill"},
   "update": false,
-  "old_memory_id": ""
+  "old_memory_id": "",
+  "whether_use_chat_history": false,
+  "content_of_related_chat_history": ""
 }
 ```
 
@@ -119,6 +129,10 @@
 - **examples**: Complete output templates showing the final deliverable format and structure. Should demonstrate how the task result looks when this skill is applied, including format, sections, and content organization. Content can be abbreviated but must show the complete structure. Use markdown format for better readability
 - **update**: true if updating existing skill, false if new
 - **old_memory_id**: ID of skill being updated, or empty string if new
+- **whether_use_chat_history**: Indicates whether information from chat_history that does not appear in messages was incorporated into the skill
+- **content_of_related_chat_history**:
+  If whether_use_chat_history is true, provide a high-level summary of the type of historical information used (e.g., “long-term preference: prioritizes cultural attractions”); do not quote the original dialogue verbatim
+  If not used, leave this field as an empty string
 
 # Critical Guidelines
 - Keep all fields general except "examples"
@@ -141,14 +155,23 @@
 # 现有技能记忆
 {old_memories}
 
-# 对话消息
+# 对话消息的上下文chat_history
+{chat_history}
+
+# 当前对话消息
 {messages}
 
 # 核心原则
 1. **通用化**：提取可跨场景应用的抽象方法论。避免具体细节（如"旅行规划"而非"北京旅行规划"）。
 2. **普适性**：除"examples"外，所有字段必须保持通用，与具体场景无关。
 3. **相似性检查**：如存在相似技能，设置"update": true 及"old_memory_id"。否则设置"update": false 并将"old_memory_id"留空。
 4. **语言一致性**：与对话语言保持一致。
+5. **历史使用约束**：
+   - chat_history 仅作为辅助上下文，用于补充 messages 中未明确出现的、但会影响技能抽象的稳定偏好或方法论。
+   - 当 chat_history 能提供 messages 中缺失、且与当前任务目标、执行方式或约束相关的信息增量时，可以纳入考虑。
+   - chat_history 不得作为技能的主要来源，仅可用于完善 preference、experience 等辅助字段。
+   - 若 chat_history 未提供任何 messages 中不存在的有效信息，或仅包含寒暄、背景性内容，应完全忽略。
+6. 如果你提取的抽象方法论和已有的技能记忆描述的是同一个主题（比如同一个生活场景），请务必使用更新操作，不要新建一个方法论，注意合理的追加到已有的技能记忆上，保证通顺且不丢失已有方法论的信息。
 
 # 输出格式
 ```json
@@ -163,7 +186,10 @@
   "scripts": {"script_name.py": "# Python 代码\nprint('Hello')", "another_script.py": "# 更多代码\nimport os"},
   "others": {"章节标题": "这里的内容", "reference.md": "# 此技能的参考内容"},
   "update": false,
-  "old_memory_id": ""
+  "old_memory_id": "",
+  "content_of_current_message": "",
+  "whether_use_chat_history": false,
+  "content_of_related_chat_history": "",
 }
 ```
 
@@ -182,12 +208,21 @@
 - **examples**：展示最终任务成果的输出模板，包括格式、章节和内容组织结构。应展示应用此技能后任务结果的样子，包含所有必要的部分。内容可以省略但必须展示完整结构。使用 markdown 格式以提高可读性
 - **update**：更新现有技能为true，新建为false
 - **old_memory_id**：被更新技能的ID，新建则为空字符串
+- **content_of_current_message**: 从当前对话消息中提取的核心内容（简写但必填）,
+- **whether_use_chat_history**：是否从 chat_history 中引用了 messages 中没有的内容并提取到skill中
+- **content_of_related_chat_history**：若 whether_use_chat_history 为 true，
+  仅需概括性说明所使用的历史信息类型（如“长期偏好：文化类景点优先”），
+  不要求逐字引用原始对话内容；
+  若未使用，则置为空字符串。
 
 # 关键指导
 - 除"examples"外保持所有字段通用
 - "examples"应展示完整的最终输出格式和结构，包含所有必要章节
 - "others"包含补充说明或扩展信息
 - 无法提取技能时返回null
+- 注意区分chat_history与当前对话消息，如果能提出skill，必须有一部分来自于当前对话消息
+- 一定仅在必要时才新建方法论，同样的场景尽量合并（"update": true）,
+如饮食规划合并为一条，不要已有“饮食规划”的情况下，再新增一个“生酮饮食规划”。
 
 # 输出格式
 仅输出JSON对象。

Original file line number	Diff line number	Diff line change
`@@ -862,7 +862,7 @@ def _process_multi_modal_data(`
`862`	`862`	`# Part A: call llm in parallel using thread pool`
`863`	`863`	`fine_memory_items = []`
`864`	`864`
`865`		`- with ContextThreadPoolExecutor(max_workers=2) as executor:`
	`865`	`+ with ContextThreadPoolExecutor(max_workers=3) as executor:`
`866`	`866`	`future_string = executor.submit(`
`867`	`867`	`self._process_string_fine, fast_memory_items, info, custom_tags, **kwargs`
`868`	`868`	`)`
Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,7 @@ class ScheduleMessageItem(BaseModel, DictConversionMixin):`
`54`	`54`	`default=None,`
`55`	`55`	`description="Optional business-level task ID. Multiple items can share the same task_id.",`
`56`	`56`	`)`
	`57`	`+ chat_history: list \| None = Field(default=None, description="user chat history")`
`57`	`58`
`58`	`59`	`# Pydantic V2 model configuration`
`59`	`60`	`model_config = ConfigDict(`
`@@ -89,6 +90,7 @@ def to_dict(self) -> dict:`
`89`	`90`	`"timestamp": self.timestamp.isoformat(),`
`90`	`91`	`"user_name": self.user_name,`
`91`	`92`	`"task_id": self.task_id if self.task_id is not None else "",`
	`93`	`+ "chat_history": self.chat_history if self.chat_history is not None else [],`
`92`	`94`	`}`
`93`	`95`
`94`	`96`	`@classmethod`
`@@ -104,6 +106,7 @@ def from_dict(cls, data: dict) -> "ScheduleMessageItem":`
`104`	`106`	`timestamp=datetime.fromisoformat(data["timestamp"]),`
`105`	`107`	`user_name=data.get("user_name"),`
`106`	`108`	`task_id=data.get("task_id"),`
	`109`	`+ chat_history=data.get("chat_history"),`
`107`	`110`	`)`
`108`	`111`
`109`	`112`
`@@ -158,6 +161,7 @@ class ScheduleLogForWebItem(BaseModel, DictConversionMixin):`
`158`	`161`	`default=None, description="Completion status of the task (e.g., 'completed', 'failed')"`
`159`	`162`	`)`
`160`	`163`	`source_doc_id: str \| None = Field(default=None, description="Source document ID")`
	`164`	`+ chat_history: list \| None = Field(default=None, description="user chat history")`
`161`	`165`
`162`	`166`	`def debug_info(self) -> dict[str, Any]:`
`163`	`167`	`"""Return structured debug information for logging purposes."""`