PAACE:计划感知的Agent上下文工程
1. 问题背景
1.1 Agent工作流的上下文挑战
大语言模型Agent在执行复杂任务时,需要处理大量的上下文信息:
典型Agent工作流:
用户请求 → 历史对话 → 工具定义 → 中间结果 → 当前状态 → ...
↓ ↓ ↓ ↓ ↓
100 tokens 10K tokens 5K tokens 20K tokens 2K tokens
当上下文超过模型的处理能力时,会导致:
- 性能下降:模型难以关注关键信息
- 成本增加:长上下文带来更高的计算和存储成本
- 信息丢失:简单的截断会丢失关键信息
1.2 现有方法的局限
| 方法 | 策略 | 问题 |
|---|---|---|
| 简单截断 | 保留最近的token | 丢失早期关键信息 |
| 摘要压缩 | LLM生成摘要 | 忽略Agent的层级结构 |
| 查询感知 | 基于相关性压缩 | 忽视任务规划信息 |
| 滑动窗口 | 固定窗口 | 无法捕捉任务整体 |
1.3 PAACE的核心洞察
PAACE(Plan-Aware Automated Agent Context Engineering) 提出了一个关键洞察:
Agent工作流不仅仅是序列数据,还包含隐式的层级结构和计划信息。
Agent工作流的层级结构:
├── 任务目标 (Goal)
│ └── 任务规划 (Plan)
│ ├── 子目标 1
│ │ └── 行动步骤 [a, b, c]
│ ├── 子目标 2
│ │ └── 行动步骤 [d, e, f]
│ └── 子目标 3
│ └── 行动步骤 [g, h, i]
└── 当前状态 (State)
└── 当前行动 (Action)
PAACE利用这种层级结构进行计划感知的上下文压缩。
2. 技术详解
2.1 形式化定义
2.1.1 Agent工作流形式化
定义Agent工作流为一个半结构化序列:
其中:
- :第 步的上下文内容
- :步骤类型
- :执行结果(成功/失败/进行中)
2.1.2 计划感知压缩目标
目标是找到一个压缩函数 使得:
其中 是当前查询, 是压缩后的上下文,满足:
- 信息保持:关键信息不丢失
- 结构保持:计划层级结构保留
- 相关性:与当前任务相关的信息优先
2.2 PAACE框架
┌─────────────────────────────────────────────────────────────┐
│ PAACE Framework │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ 结构解析 │───▶│ 计划提取 │───▶│ 重要性评估 │ │
│ │(Structure │ │(Plan │ │(Importance │ │
│ │ Parsing) │ │ Extraction) │ │ Scoring) │ │
│ └─────────────┘ └──────────────┘ └──────────────┘ │
│ │ │ │
│ ▼ ▼ │
│ ┌─────────────┐ ┌──────────────┐ │
│ │ 结构保持 │ │ 相关性加权 │ │
│ │(Structure │ │(Relevance │ │
│ │ Preserv.) │ │ Weighting) │ │
│ └─────────────┘ └──────────────┘ │
│ │ │ │
│ └────────────────┬─────────────────────┘ │
│ ▼ │
│ ┌──────────────┐ │
│ │ 层级压缩 │ │
│ │(Hierarchical│ │
│ │ Compression)│ │
│ └──────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────────┐ │
│ │ 压缩后上下文│ │
│ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
2.3 核心组件
2.3.1 结构解析器
class StructureParser:
"""
Agent工作流结构解析器
"""
def __init__(self, llm):
self.llm = llm
def parse(self, messages: list) -> dict:
"""
解析Agent工作流结构
Args:
messages: 对话消息列表
Returns:
结构化的工作流
"""
structured = {
'goal': None,
'plans': [],
'current_state': None,
'history': []
}
# 识别任务目标
for msg in messages:
if msg['role'] == 'user' and structured['goal'] is None:
structured['goal'] = self._extract_goal(msg['content'])
# 解析计划层级
plans = self._extract_plans(messages)
structured['plans'] = plans
# 提取当前状态
structured['current_state'] = self._extract_state(messages[-1])
# 历史记录
structured['history'] = self._extract_history(messages)
return structured
def _extract_goal(self, text: str) -> str:
"""提取任务目标"""
prompt = f"""
从以下用户请求中提取任务目标:
{text}
目标应该简洁、明确地表达用户想要完成的任务。
"""
return self.llm.generate(prompt)
def _extract_plans(self, messages: list) -> list:
"""提取计划层级"""
plans = []
current_plan = None
current_steps = []
for msg in messages:
if msg.get('type') == 'plan':
if current_plan:
plans.append({'name': current_plan, 'steps': current_steps})
current_plan = msg['content']
current_steps = []
elif msg.get('type') == 'action':
current_steps.append({
'action': msg['content'],
'result': msg.get('result', ''),
'status': msg.get('status', 'unknown')
})
if current_plan:
plans.append({'name': current_plan, 'steps': current_steps})
return plans2.3.2 重要性评估器
class ImportanceScorer:
"""
基于Agent行为的重要性评估
"""
def __init__(self, llm):
self.llm = llm
self.llmJudge = LLMJudge()
def score(self, structured: dict, query: str) -> dict:
"""
评估各部分的重要性
Args:
structured: 结构化工作流
query: 当前查询
Returns:
重要性分数 {component_id: score}
"""
scores = {}
# 目标重要性(通常较高)
if structured['goal']:
scores['goal'] = self._score_component(
structured['goal'], query,
weight={'relevance': 0.8, 'novelty': 0.2}
)
# 计划重要性
for i, plan in enumerate(structured['plans']):
plan_score = self._score_component(
plan['name'], query,
weight={'relevance': 0.6, 'progress': 0.4}
)
scores[f'plan_{i}'] = plan_score
# 步骤重要性
for j, step in enumerate(plan['steps']):
step_score = self._score_step(step, query)
scores[f'step_{i}_{j}'] = step_score
# 当前状态重要性(通常最高)
scores['current_state'] = self._score_component(
structured['current_state'], query,
weight={'relevance': 1.0}
)
return scores
def _score_component(self, text: str, query: str, weight: dict) -> float:
"""评估单个组件"""
# 相关性评分
relevance = self.llmJudge.score_relevance(text, query)
# 新颖性评分(与已选内容相比)
novelty = self.llmJudge.score_novelty(text, query)
# 进度评分(是否与当前任务进展相关)
progress = self.llmJudge.score_progress(text)
return (weight.get('relevance', 0.5) * relevance +
weight.get('novelty', 0.25) * novelty +
weight.get('progress', 0.25) * progress)
def _score_step(self, step: dict, query: str) -> float:
"""评估行动步骤"""
# 基础分数
base = self._score_component(step['action'], query,
weight={'relevance': 0.7, 'novelty': 0.3})
# 结果影响
if step['status'] == 'success':
impact = 0.8
elif step['status'] == 'failed':
impact = 0.6 # 失败也可能重要
else:
impact = 0.5
# 错误恢复知识
if step.get('result') and 'error' in step['result'].lower():
impact = max(impact, 0.9)
return base * impact2.3.3 层级压缩器
class HierarchicalCompressor:
"""
层级感知的上下文压缩
"""
def __init__(self, llm, max_tokens: int):
self.llm = llm
self.max_tokens = max_tokens
def compress(
self,
structured: dict,
scores: dict,
query: str
) -> str:
"""
执行层级压缩
Args:
structured: 结构化工作流
scores: 重要性分数
query: 当前查询
"""
# 计算预算分配
budget = self._allocate_budget(structured, scores)
# 分层压缩
compressed_parts = []
# 1. 目标(可能全保留)
if structured['goal'] and budget.get('goal', 0) > 0:
compressed_parts.append(self._compress_text(
structured['goal'],
budget['goal'],
prefix="[目标]"
))
# 2. 计划层级
for i, plan in enumerate(structured['plans']):
plan_key = f'plan_{i}'
if plan_key in scores and budget.get(plan_key, 0) > 0:
plan_text = self._compress_plan(
plan, scores, i,
budget.get(plan_key, 0)
)
compressed_parts.append(plan_text)
# 3. 当前状态(全保留)
if structured['current_state']:
compressed_parts.append(
f"[当前状态]\n{structured['current_state']}"
)
return "\n\n".join(compressed_parts)
def _allocate_budget(self, structured: dict, scores: dict) -> dict:
"""分配压缩预算"""
# 基础预算
total_budget = self.max_tokens
# 目标保留20%
budget = {'goal': int(total_budget * 0.2)}
remaining = total_budget - budget['goal']
# 按重要性分配给各计划
plan_scores = {k: v for k, v in scores.items() if k.startswith('plan_')}
total_plan_score = sum(plan_scores.values()) + 1e-6
for key in plan_scores:
budget[key] = int(remaining * (plan_scores[key] / total_plan_score))
return budget
def _compress_plan(self, plan: dict, scores: dict, plan_idx: int, budget: int) -> str:
"""压缩单个计划"""
parts = [f"[计划 {plan_idx+1}: {plan['name']}]"]
# 步骤分配预算
step_budget = budget - len(parts[0]) // 4 # 预留标题空间
# 选择最重要的步骤
step_scores = [
(j, scores.get(f'step_{plan_idx}_{j}', 0))
for j in range(len(plan['steps']))
]
step_scores.sort(key=lambda x: -x[1]) # 降序排列
# 选择top-k步骤
accumulated = 0
selected_steps = []
for j, score in step_scores:
step = plan['steps'][j]
step_tokens = len(step['action'].split()) * 1.3 # 粗略估计
if accumulated + step_tokens <= step_budget:
selected_steps.append((j, step))
accumulated += step_tokens
# 按原始顺序排序
selected_steps.sort(key=lambda x: x[0])
# 生成压缩文本
for j, step in selected_steps:
status_icon = "✓" if step['status'] == 'success' else "✗"
parts.append(f" {status_icon} {step['action']}")
return "\n".join(parts)
def _compress_text(self, text: str, budget: int, prefix: str = "") -> str:
"""压缩普通文本"""
tokens = text.split()
if len(tokens) * 1.3 <= budget:
return f"{prefix}\n{text}" if prefix else text
# 保留关键词
keywords = self._extract_keywords(text)
# 生成简洁版本
prompt = f"""
将以下文本压缩到约{budget}个token,保留关键信息:
原文:{text}
关键词:{', '.join(keywords)}
压缩后的版本:
"""
compressed = self.llm.generate(prompt)
return f"{prefix}\n{compressed}" if prefix else compressed2.4 端到端PAACE
class PAACE:
"""
计划感知的Agent上下文工程
完整实现
"""
def __init__(self, llm, max_tokens: int = 8192):
self.struct_parser = StructureParser(llm)
self.importance_scorer = ImportanceScorer(llm)
self.compressor = HierarchicalCompressor(llm, max_tokens)
self.llm = llm
def process(
self,
messages: list,
query: str
) -> str:
"""
处理Agent工作流上下文
Args:
messages: Agent对话历史
query: 当前查询
Returns:
压缩后的上下文
"""
# 1. 解析结构
structured = self.struct_parser.parse(messages)
# 2. 评估重要性
scores = self.importance_scorer.score(structured, query)
# 3. 层级压缩
compressed = self.compressor.compress(structured, scores, query)
return compressed
def build_prompt(
self,
messages: list,
query: str,
system_prompt: str = None
) -> str:
"""
构建最终的提示
Args:
messages: 对话历史
query: 当前查询
system_prompt: 系统提示
"""
# 压缩上下文
context = self.process(messages, query)
# 构建完整提示
parts = []
if system_prompt:
parts.append(f"[系统]\n{system_prompt}")
parts.append(f"[历史上下文]\n{context}")
parts.append(f"[当前查询]\n{query}")
return "\n\n".join(parts)3. 实验结果
3.1 基准测试
| 任务 | 基线(Truncation) | 摘要压缩 | PAACE |
|---|---|---|---|
| ToolBench | 65.2% | 68.7% | 73.4% |
| WebArena | 58.1% | 61.3% | 67.8% |
| OSWorld | 42.3% | 45.6% | 52.1% |
| MiniWob++ | 78.9% | 81.2% | 84.7% |
3.2 上下文长度分析
任务成功率 vs 上下文长度:
上下文长度 │ 基线 │ PAACE │ 改进
-----------|---------|---------|------
1K tokens │ 78.2% │ 78.2% │ 0%
4K tokens │ 71.3% │ 76.8% │ +5.5%
16K tokens │ 58.7% │ 74.2% │ +15.5%
32K tokens │ 42.1% │ 71.8% │ +29.7%
64K tokens │ 28.4% │ 68.3% │ +39.9%
3.3 效率分析
| 方法 | 上下文大小 | 推理延迟 | 成本 |
|---|---|---|---|
| 全上下文 | 64K | 1.0× | 1.0× |
| 简单截断 | 8K | 0.15× | 0.15× |
| 摘要压缩 | 4K | 0.25× | 0.25× |
| PAACE | 8K | 0.35× | 0.35× |
4. PyTorch实现
虽然PAACE主要是一个框架,但我们可以实现一个轻量级的评分器用于实际部署:
import torch
import torch.nn as nn
import torch.nn.functional as F
class LightweightImportanceScorer(nn.Module):
"""
轻量级重要性评分器
用于实际部署
"""
def __init__(self, d_model: int, num_types: int = 5):
super().__init__()
# 消息类型嵌入
self.type_embedding = nn.Embedding(num_types, d_model // 2)
# 重要性评分头
self.score_head = nn.Sequential(
nn.Linear(d_model + d_model // 2, d_model),
nn.GELU(),
nn.Linear(d_model, 1),
nn.Sigmoid()
)
# 当前查询注意力
self.query_attention = nn.MultiheadAttention(
d_model, num_heads=4, batch_first=True
)
def forward(
self,
messages_emb: torch.Tensor, # [batch, seq, d_model]
message_types: torch.Tensor, # [batch, seq] (0-4)
query_emb: torch.Tensor, # [batch, d_model]
current_pos: torch.Tensor, # [batch, 1] 当前步骤位置
) -> torch.Tensor:
"""
预测每个消息的重要性
Returns:
scores: [batch, seq]
"""
batch_size, seq_len, d = messages_emb.shape
# 类型嵌入
type_emb = self.type_embedding(message_types) # [batch, seq, d/2]
# 与查询的相关性
query = query_emb.unsqueeze(1).expand(-1, seq_len, -1)
relevance, _ = self.query_attention(query, messages_emb, messages_emb)
# 位置衰减
positions = torch.arange(seq_len, device=messages_emb.device).unsqueeze(0)
positions = positions.expand(batch_size, -1)
recency = torch.exp(-0.1 * (positions - current_pos).clamp(min=0).float())
# 特征拼接
features = torch.cat([
messages_emb, # 原始内容
type_emb, # 类型信息
relevance, # 查询相关性
], dim=-1)
# 评分
scores = self.score_head(features).squeeze(-1) # [batch, seq]
# 乘以时效性因子
scores = scores * recency
# 当前步骤及其后保持较高分数
mask = (positions >= current_pos - 1)
scores = scores * mask.float() + 0.1 * (~mask).float()
return scores
class HierarchicalCompressionPolicy(nn.Module):
"""
层级压缩策略
学习如何分配压缩预算
"""
def __init__(self, d_model: int, num_components: int = 6):
super().__init__()
self.budget_head = nn.Sequential(
nn.Linear(d_model, d_model // 2),
nn.GELU(),
nn.Linear(d_model // 2, num_components),
nn.Softmax(dim=-1)
)
def forward(
self,
context_emb: torch.Tensor,
total_budget: int
) -> dict:
"""
计算各组件的预算分配
Returns:
budget_allocation: {component_name: token_count}
"""
proportions = self.budget_head(context_emb.mean(dim=1))
budget_dict = {}
component_names = ['goal', 'plan_0', 'plan_1', 'plan_2', 'current_state', 'history']
for name, prop in zip(component_names, proportions):
budget_dict[name] = int(prop.item() * total_budget)
return budget_dict5. 应用场景
5.1 Tool-Augmented Agent
# Tool-Augmented Agent场景
agent = Agent(tools=[calculator, search, code_executor])
messages = []
for step in range(20):
# PAACE压缩历史
compressed_context = paace.process(messages, query)
# 构建请求
prompt = f"""
{compressed_context}
当前任务: {query}
"""
# Agent决策
action = agent.decide(prompt)
result = agent.execute(action)
messages.append({'role': 'assistant', 'content': action})
messages.append({'role': 'system', 'content': result, 'type': 'observation'})5.2 多智能体协作
# 多智能体协作场景
orchestrator = Orchestrator(agents=[planner, executor, verifier])
for round in range(5):
# 收集所有Agent的上下文
all_contexts = [agent.get_context() for agent in orchestrator.agents]
# 整体压缩
merged = merge_contexts(all_contexts)
compressed = paace.process(merged, task)
# 分发给各Agent
for agent in orchestrator.agents:
agent.update_context(compressed)5.3 长期记忆系统
# 长期记忆系统
memory = LongTermMemory()
for interaction in user_interactions:
# PAACE压缩历史交互
compressed = paace.process(memory.history, current_query)
# 检索相关记忆
relevant = memory.retrieve(compressed, top_k=5)
# 增强当前上下文
enhanced = f"{compressed}\n\n[相关记忆]\n{relevant}"
# 处理请求
response = llm.generate(enhanced)
# 更新记忆
memory.add(interaction, response)6. 与相关工作的对比
6.1 vs 简单摘要
| 方面 | 简单摘要 | PAACE |
|---|---|---|
| 结构感知 | ❌ | ✅ |
| 计划保留 | ❌ | ✅ |
| 重要性加权 | ❌ | ✅ |
| Agent特性 | ❌ | ✅ |
6.2 vs 查询感知压缩
| 方面 | 查询感知压缩 | PAACE |
|---|---|---|
| 层级结构 | ❌ | ✅ |
| 计划信息 | ❌ | ✅ |
| Agent行为 | ❌ | ✅ |
| 错误恢复 | ❌ | ✅ |