AutoGPT核心代码分析——核心是ReAct
最核心的部分:
agent.py
from __future__ import annotations import inspect import logging from datetime import datetime from typing import TYPE_CHECKING, Optional import sentry_sdk from forge.agent.base import BaseAgent, BaseAgentConfiguration, BaseAgentSettings from forge.agent.protocols import ( AfterExecute, AfterParse, CommandProvider, DirectiveProvider, MessageProvider, ) from forge.command.command import Command, CommandOutput from forge.components.action_history import ( ActionHistoryComponent, EpisodicActionHistory, ) from forge.components.code_executor.code_executor import CodeExecutorComponent from forge.components.context.context import AgentContext, ContextComponent from forge.components.file_manager import FileManagerComponent from forge.components.git_operations import GitOperationsComponent from forge.components.image_gen import ImageGeneratorComponent from forge.components.system import SystemComponent from forge.components.user_interaction import UserInteractionComponent from forge.components.watchdog import WatchdogComponent from forge.components.web import WebSearchComponent, WebSeleniumComponent from forge.file_storage.base import FileStorage from forge.llm.prompting.schema import ChatPrompt from forge.llm.prompting.utils import dump_prompt from forge.llm.providers import ( AssistantFunctionCall, ChatMessage, ChatModelProvider, ChatModelResponse, ) from forge.llm.providers.utils import function_specs_from_commands from forge.models.action import ( ActionErrorResult, ActionInterruptedByHuman, ActionResult, ActionSuccessResult, ) from forge.models.config import Configurable from forge.utils.exceptions import ( AgentException, AgentTerminated, CommandExecutionError, UnknownCommandError, ) from pydantic import Field from autogpt.app.log_cycle import ( CURRENT_CONTEXT_FILE_NAME, NEXT_ACTION_FILE_NAME, USER_INPUT_FILE_NAME, LogCycleHandler, ) from .prompt_strategies.one_shot import ( OneShotAgentActionProposal, OneShotAgentPromptStrategy, ) if TYPE_CHECKING: from forge.config.config import Config logger = logging.getLogger(__name__) class AgentConfiguration(BaseAgentConfiguration): pass class AgentSettings(BaseAgentSettings): config: AgentConfiguration = Field(default_factory=AgentConfiguration) history: EpisodicActionHistory[OneShotAgentActionProposal] = Field( default_factory=EpisodicActionHistory[OneShotAgentActionProposal] ) """(STATE) The action history of the agent.""" context: AgentContext = Field(default_factory=AgentContext) class Agent(BaseAgent, Configurable[AgentSettings]): default_settings: AgentSettings = AgentSettings( name="Agent", description=__doc__ if __doc__ else "", ) def __init__( self, settings: AgentSettings, llm_provider: ChatModelProvider, file_storage: FileStorage, legacy_config: Config, ): super().__init__(settings) self.llm_provider = llm_provider self.ai_profile = settings.ai_profile self.directives = settings.directives prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(deep=True) prompt_config.use_functions_api = ( settings.config.use_functions_api # Anthropic currently doesn't support tools + prefilling :( and self.llm.provider_name != "anthropic" ) self.prompt_strategy = OneShotAgentPromptStrategy(prompt_config, logger) self.commands: list[Command] = [] # Components self.system = SystemComponent(legacy_config, settings.ai_profile) self.history = ActionHistoryComponent( settings.history, self.send_token_limit, lambda x: self.llm_provider.count_tokens(x, self.llm.name), legacy_config, llm_provider, ).run_after(WatchdogComponent) self.user_interaction = UserInteractionComponent(legacy_config) self.file_manager = FileManagerComponent(settings, file_storage) self.code_executor = CodeExecutorComponent( self.file_manager.workspace, settings, legacy_config, ) self.git_ops = GitOperationsComponent(legacy_config) self.image_gen = ImageGeneratorComponent( self.file_manager.workspace, legacy_config ) self.web_search = WebSearchComponent(legacy_config) self.web_selenium = WebSeleniumComponent(legacy_config, llm_provider, self.llm) self.context = ContextComponent(self.file_manager.workspace, settings.context) self.watchdog = WatchdogComponent(settings.config, settings.history).run_after( ContextComponent ) self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S") """Timestamp the agent was created; only used for structured debug logging.""" self.log_cycle_handler = LogCycleHandler() """LogCycleHandler for structured debug logging.""" self.event_history = settings.history self.legacy_config = legacy_config async def propose_action(self) -> OneShotAgentActionProposal: """Proposes the next action to execute, based on the task and current state. Returns: The command name and arguments, if any, and the agent's thoughts. """ self.reset_trace() # Get directives resources = await self.run_pipeline(DirectiveProvider.get_resources) constraints = await self.run_pipeline(DirectiveProvider.get_constraints) best_practices = await self.run_pipeline(DirectiveProvider.get_best_practices) directives = self.state.directives.copy(deep=True) directives.resources += resources directives.constraints += constraints directives.best_practices += best_practices # Get commands self.commands = await self.run_pipeline(CommandProvider.get_commands) self._remove_disabled_commands() # Get messages messages = await self.run_pipeline(MessageProvider.get_messages) prompt: ChatPrompt = self.prompt_strategy.build_prompt( messages=messages, task=self.state.task, ai_profile=self.state.ai_profile, ai_directives=directives, commands=function_specs_from_commands(self.commands), include_os_info=self.legacy_config.execute_local_commands, ) self.log_cycle_handler.log_count_within_cycle = 0 self.log_cycle_handler.log_cycle( self.state.ai_profile.ai_name, self.created_at, self.config.cycle_count, prompt.raw(), CURRENT_CONTEXT_FILE_NAME, ) logger.debug(f"Executing prompt:\n{dump_prompt(prompt)}") output = await self.complete_and_parse(prompt) self.config.cycle_count += 1 return output async def complete_and_parse( self, prompt: ChatPrompt, exception: Optional[Exception] = None ) -> OneShotAgentActionProposal: if exception: prompt.messages.append(ChatMessage.system(f"Error: {exception}")) response: ChatModelResponse[ OneShotAgentActionProposal ] = await self.llm_provider.create_chat_completion( prompt.messages, model_name=self.llm.name, completion_parser=self.prompt_strategy.parse_response_content, functions=prompt.functions, prefill_response=prompt.prefill_response, ) result = response.parsed_result self.log_cycle_handler.log_cycle( self.state.ai_profile.ai_name, self.created_at, self.config.cycle_count, result.thoughts.dict(), NEXT_ACTION_FILE_NAME, ) await self.run_pipeline(AfterParse.after_parse, result) return result async def execute( self, proposal: OneShotAgentActionProposal, user_feedback: str = "", ) -> ActionResult: tool = proposal.use_tool # Get commands self.commands = await self.run_pipeline(CommandProvider.get_commands) self._remove_disabled_commands() try: return_value = await self._execute_tool(tool) result = ActionSuccessResult(outputs=return_value) except AgentTerminated: raise except AgentException as e: result = ActionErrorResult.from_exception(e) logger.warning(f"{tool} raised an error: {e}") sentry_sdk.capture_exception(e) result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name) if result_tlength > self.send_token_limit // 3: result = ActionErrorResult( reason=f"Command {tool.name} returned too much output. " "Do not execute this command again with the same arguments." ) await self.run_pipeline(AfterExecute.after_execute, result) logger.debug("\n".join(self.trace)) return result async def do_not_execute( self, denied_proposal: OneShotAgentActionProposal, user_feedback: str ) -> ActionResult: result = ActionInterruptedByHuman(feedback=user_feedback) self.log_cycle_handler.log_cycle( self.state.ai_profile.ai_name, self.created_at, self.config.cycle_count, user_feedback, USER_INPUT_FILE_NAME, ) await self.run_pipeline(AfterExecute.after_execute, result) logger.debug("\n".join(self.trace)) return result async def _execute_tool(self, tool_call: AssistantFunctionCall) -> CommandOutput: """Execute the command and return the result Args: tool_call (AssistantFunctionCall): The tool call to execute Returns: str: The execution result """ # Execute a native command with the same name or alias, if it exists command = self._get_command(tool_call.name) try: result = command(**tool_call.arguments) if inspect.isawaitable(result): return await result return result except AgentException: raise except Exception as e: raise CommandExecutionError(str(e)) def _get_command(self, command_name: str) -> Command: for command in reversed(self.commands): if command_name in command.names: return command raise UnknownCommandError( f"Cannot execute command '{command_name}': unknown command." ) def _remove_disabled_commands(self) -> None: self.commands = [ command for command in self.commands if not any( name in self.legacy_config.disabled_commands for name in command.names ) ] def find_obscured_commands(self) -> list[Command]: seen_names = set() obscured_commands = [] for command in reversed(self.commands): # If all of the command's names have been seen, it's obscured if seen_names.issuperset(command.names): obscured_commands.append(command) else: seen_names.update(command.names) return list(reversed(obscured_commands))
是一个助理AI系统的实现。以下是代码的功能概述:
- 导入必要的模块和类,包括日志记录、时间处理、类型检查等。
- 定义了AgentConfiguration、AgentSettings和Agent类,用于配置助理代理的设置和行为。
- 实现了用于处理指令、命令、消息等的Provider类。
- 实现了用于执行代码、文件管理、图片生成、web搜索等功能的组件。
- 定义了"propose_action"方法,用于提出下一步的操作建议。
- 包含了对话提示、日志处理、异常处理等模块。
- 实现了"execute"和"do_not_execute"方法,用于执行或终止操作建议。
- 包含了辅助方法用于执行工具调用、找到可执行命令等功能。
- 使用Pydantic进行数据验证和模型定义。
- 包含了一些辅助方法和变量,用于监控操作、管理上下文等。
总体而言,这些代码构建了一个助理AI系统,用于提出决策建议、执行操作,并与用户进行交互。
另外一个就是利用gpt4 类似的LLM进行任务编排,代码在:
one_shot.py
from __future__ import annotations import json import platform import re from logging import Logger import distro from forge.config.ai_directives import AIDirectives from forge.config.ai_profile import AIProfile from forge.json.parsing import extract_dict_from_json from forge.llm.prompting import ChatPrompt, LanguageModelClassification, PromptStrategy from forge.llm.prompting.utils import format_numbered_list from forge.llm.providers.schema import ( AssistantChatMessage, ChatMessage, CompletionModelFunction, ) from forge.models.action import ActionProposal from forge.models.config import SystemConfiguration, UserConfigurable from forge.models.json_schema import JSONSchema from forge.models.utils import ModelWithSummary from forge.utils.exceptions import InvalidAgentResponseError from pydantic import Field _RESPONSE_INTERFACE_NAME = "AssistantResponse" class AssistantThoughts(ModelWithSummary): observations: str = Field( ..., description="Relevant observations from your last action (if any)" ) text: str = Field(..., description="Thoughts") reasoning: str = Field(..., description="Reasoning behind the thoughts") self_criticism: str = Field(..., description="Constructive self-criticism") plan: list[str] = Field( ..., description="Short list that conveys the long-term plan" ) speak: str = Field(..., description="Summary of thoughts, to say to user") def summary(self) -> str: return self.text class OneShotAgentActionProposal(ActionProposal): thoughts: AssistantThoughts class OneShotAgentPromptConfiguration(SystemConfiguration): DEFAULT_BODY_TEMPLATE: str = ( "## Constraints\n" "You operate within the following constraints:\n" "{constraints}\n" "\n" "## Resources\n" "You can leverage access to the following resources:\n" "{resources}\n" "\n" "## Commands\n" "These are the ONLY commands you can use." " Any action you perform must be possible through one of these commands:\n" "{commands}\n" "\n" "## Best practices\n" "{best_practices}" ) DEFAULT_CHOOSE_ACTION_INSTRUCTION: str = ( "Determine exactly one command to use next based on the given goals " "and the progress you have made so far, " "and respond using the JSON schema specified previously:" ) body_template: str = UserConfigurable(default=DEFAULT_BODY_TEMPLATE) choose_action_instruction: str = UserConfigurable( default=DEFAULT_CHOOSE_ACTION_INSTRUCTION ) use_functions_api: bool = UserConfigurable(default=False) ######### # State # ######### # progress_summaries: dict[tuple[int, int], str] = Field( # default_factory=lambda: {(0, 0): ""} # ) class OneShotAgentPromptStrategy(PromptStrategy): default_configuration: OneShotAgentPromptConfiguration = ( OneShotAgentPromptConfiguration() ) def __init__( self, configuration: OneShotAgentPromptConfiguration, logger: Logger, ): self.config = configuration self.response_schema = JSONSchema.from_dict(OneShotAgentActionProposal.schema()) self.logger = logger @property def model_classification(self) -> LanguageModelClassification: return LanguageModelClassification.FAST_MODEL # FIXME: dynamic switching def build_prompt( self, *, messages: list[ChatMessage], task: str, ai_profile: AIProfile, ai_directives: AIDirectives, commands: list[CompletionModelFunction], include_os_info: bool, **extras, ) -> ChatPrompt: """Constructs and returns a prompt with the following structure: 1. System prompt 3. `cycle_instruction` """ system_prompt, response_prefill = self.build_system_prompt( ai_profile=ai_profile, ai_directives=ai_directives, commands=commands, include_os_info=include_os_info, ) final_instruction_msg = ChatMessage.user(self.config.choose_action_instruction) return ChatPrompt( messages=[ ChatMessage.system(system_prompt), ChatMessage.user(f'"""{task}"""'), *messages, final_instruction_msg, ], prefill_response=response_prefill, functions=commands if self.config.use_functions_api else [], ) def build_system_prompt( self, ai_profile: AIProfile, ai_directives: AIDirectives, commands: list[CompletionModelFunction], include_os_info: bool, ) -> tuple[str, str]: """ Builds the system prompt. Returns: str: The system prompt body str: The desired start for the LLM's response; used to steer the output """ response_fmt_instruction, response_prefill = self.response_format_instruction( self.config.use_functions_api ) system_prompt_parts = ( self._generate_intro_prompt(ai_profile) + (self._generate_os_info() if include_os_info else []) + [ self.config.body_template.format( constraints=format_numbered_list( ai_directives.constraints + self._generate_budget_constraint(ai_profile.api_budget) ), resources=format_numbered_list(ai_directives.resources), commands=self._generate_commands_list(commands), best_practices=format_numbered_list(ai_directives.best_practices), ) ] + [ "## Your Task\n" "The user will specify a task for you to execute, in triple quotes," " in the next message. Your job is to complete the task while following" " your directives as given above, and terminate when your task is done." ] + ["## RESPONSE FORMAT\n" + response_fmt_instruction] ) # Join non-empty parts together into paragraph format return ( "\n\n".join(filter(None, system_prompt_parts)).strip("\n"), response_prefill, ) def response_format_instruction(self, use_functions_api: bool) -> tuple[str, str]: response_schema = self.response_schema.copy(deep=True) if ( use_functions_api and response_schema.properties and "use_tool" in response_schema.properties ): del response_schema.properties["use_tool"] # Unindent for performance response_format = re.sub( r"\n\s+", "\n", response_schema.to_typescript_object_interface(_RESPONSE_INTERFACE_NAME), ) response_prefill = f'{{\n "{list(response_schema.properties.keys())[0]}":' return ( ( f"YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n" f"{response_format}" + ("\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "") ), response_prefill, ) def _generate_intro_prompt(self, ai_profile: AIProfile) -> list[str]: """Generates the introduction part of the prompt. Returns: list[str]: A list of strings forming the introduction part of the prompt. """ return [ f"You are {ai_profile.ai_name}, {ai_profile.ai_role.rstrip('.')}.", "Your decisions must always be made independently without seeking " "user assistance. Play to your strengths as an LLM and pursue " "simple strategies with no legal complications.", ] def _generate_os_info(self) -> list[str]: """Generates the OS information part of the prompt. Params: config (Config): The configuration object. Returns: str: The OS information part of the prompt. """ os_name = platform.system() os_info = ( platform.platform(terse=True) if os_name != "Linux" else distro.name(pretty=True) ) return [f"The OS you are running on is: {os_info}"] def _generate_budget_constraint(self, api_budget: float) -> list[str]: """Generates the budget information part of the prompt. Returns: list[str]: The budget information part of the prompt, or an empty list. """ if api_budget > 0.0: return [ f"It takes money to let you run. " f"Your API budget is ${api_budget:.3f}" ] return [] def _generate_commands_list(self, commands: list[CompletionModelFunction]) -> str: """Lists the commands available to the agent. Params: agent: The agent for which the commands are being listed. Returns: str: A string containing a numbered list of commands. """ try: return format_numbered_list([cmd.fmt_line() for cmd in commands]) except AttributeError: self.logger.warning(f"Formatting commands failed. {commands}") raise def parse_response_content( self, response: AssistantChatMessage, ) -> OneShotAgentActionProposal: if not response.content: raise InvalidAgentResponseError("Assistant response has no text content") self.logger.debug( "LLM response content:" + ( f"\n{response.content}" if "\n" in response.content else f" '{response.content}'" ) ) assistant_reply_dict = extract_dict_from_json(response.content) self.logger.debug( "Parsing object extracted from LLM response:\n" f"{json.dumps(assistant_reply_dict, indent=4)}" ) parsed_response = OneShotAgentActionProposal.parse_obj(assistant_reply_dict) if self.config.use_functions_api: if not response.tool_calls: raise InvalidAgentResponseError("Assistant did not use a tool") parsed_response.use_tool = response.tool_calls[0].function return parsed_response
这段代码实现了一个助理AI系统,本质上是ReAct框架,主要包括以下功能:
- 定义了助理AI的思考模型AssistantThoughts,包括观察、思考、推理、自我批评、计划和口头表达等属性。
- 定义了OneShotAgentActionProposal类,用于存储助理AI提出的行动建议和思考。
- 实现了OneShotAgentPromptConfiguration和OneShotAgentPromptStrategy类,用于配置和构建助理AI的提示信息和操作策略。
- 使用Pydantic库进行数据验证和模型定义。
- 实现了各种辅助方法,用于生成系统提示、解析回复内容等功能。
- 包含了与JSON数据交互、日志记录、异常处理等相关模块和函数。
总体而言,这段代码涵盖了助理AI系统中与提示信息、响应解析、操作建议等方面相关的关键功能。