AutoGPT核心代码分析——核心是ReAct
最核心的部分:
agent.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | from __future__ import annotations import inspect import logging from datetime import datetime from typing import TYPE_CHECKING, Optional import sentry_sdk from forge.agent.base import BaseAgent, BaseAgentConfiguration, BaseAgentSettings from forge.agent.protocols import ( AfterExecute, AfterParse, CommandProvider, DirectiveProvider, MessageProvider, ) from forge.command.command import Command, CommandOutput from forge.components.action_history import ( ActionHistoryComponent, EpisodicActionHistory, ) from forge.components.code_executor.code_executor import CodeExecutorComponent from forge.components.context.context import AgentContext, ContextComponent from forge.components.file_manager import FileManagerComponent from forge.components.git_operations import GitOperationsComponent from forge.components.image_gen import ImageGeneratorComponent from forge.components.system import SystemComponent from forge.components.user_interaction import UserInteractionComponent from forge.components.watchdog import WatchdogComponent from forge.components.web import WebSearchComponent, WebSeleniumComponent from forge.file_storage.base import FileStorage from forge.llm.prompting.schema import ChatPrompt from forge.llm.prompting.utils import dump_prompt from forge.llm.providers import ( AssistantFunctionCall, ChatMessage, ChatModelProvider, ChatModelResponse, ) from forge.llm.providers.utils import function_specs_from_commands from forge.models.action import ( ActionErrorResult, ActionInterruptedByHuman, ActionResult, ActionSuccessResult, ) from forge.models.config import Configurable from forge.utils.exceptions import ( AgentException, AgentTerminated, CommandExecutionError, UnknownCommandError, ) from pydantic import Field from autogpt.app.log_cycle import ( CURRENT_CONTEXT_FILE_NAME, NEXT_ACTION_FILE_NAME, USER_INPUT_FILE_NAME, LogCycleHandler, ) from .prompt_strategies.one_shot import ( OneShotAgentActionProposal, OneShotAgentPromptStrategy, ) if TYPE_CHECKING: from forge.config.config import Config logger = logging.getLogger(__name__) class AgentConfiguration(BaseAgentConfiguration): pass class AgentSettings(BaseAgentSettings): config: AgentConfiguration = Field(default_factory = AgentConfiguration) history: EpisodicActionHistory[OneShotAgentActionProposal] = Field( default_factory = EpisodicActionHistory[OneShotAgentActionProposal] ) """(STATE) The action history of the agent.""" context: AgentContext = Field(default_factory = AgentContext) class Agent(BaseAgent, Configurable[AgentSettings]): default_settings: AgentSettings = AgentSettings( name = "Agent" , description = __doc__ if __doc__ else "", ) def __init__( self , settings: AgentSettings, llm_provider: ChatModelProvider, file_storage: FileStorage, legacy_config: Config, ): super ().__init__(settings) self .llm_provider = llm_provider self .ai_profile = settings.ai_profile self .directives = settings.directives prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(deep = True ) prompt_config.use_functions_api = ( settings.config.use_functions_api # Anthropic currently doesn't support tools + prefilling :( and self .llm.provider_name ! = "anthropic" ) self .prompt_strategy = OneShotAgentPromptStrategy(prompt_config, logger) self .commands: list [Command] = [] # Components self .system = SystemComponent(legacy_config, settings.ai_profile) self .history = ActionHistoryComponent( settings.history, self .send_token_limit, lambda x: self .llm_provider.count_tokens(x, self .llm.name), legacy_config, llm_provider, ).run_after(WatchdogComponent) self .user_interaction = UserInteractionComponent(legacy_config) self .file_manager = FileManagerComponent(settings, file_storage) self .code_executor = CodeExecutorComponent( self .file_manager.workspace, settings, legacy_config, ) self .git_ops = GitOperationsComponent(legacy_config) self .image_gen = ImageGeneratorComponent( self .file_manager.workspace, legacy_config ) self .web_search = WebSearchComponent(legacy_config) self .web_selenium = WebSeleniumComponent(legacy_config, llm_provider, self .llm) self .context = ContextComponent( self .file_manager.workspace, settings.context) self .watchdog = WatchdogComponent(settings.config, settings.history).run_after( ContextComponent ) self .created_at = datetime.now().strftime( "%Y%m%d_%H%M%S" ) """Timestamp the agent was created; only used for structured debug logging.""" self .log_cycle_handler = LogCycleHandler() """LogCycleHandler for structured debug logging.""" self .event_history = settings.history self .legacy_config = legacy_config async def propose_action( self ) - > OneShotAgentActionProposal: """Proposes the next action to execute, based on the task and current state. Returns: The command name and arguments, if any, and the agent's thoughts. """ self .reset_trace() # Get directives resources = await self .run_pipeline(DirectiveProvider.get_resources) constraints = await self .run_pipeline(DirectiveProvider.get_constraints) best_practices = await self .run_pipeline(DirectiveProvider.get_best_practices) directives = self .state.directives.copy(deep = True ) directives.resources + = resources directives.constraints + = constraints directives.best_practices + = best_practices # Get commands self .commands = await self .run_pipeline(CommandProvider.get_commands) self ._remove_disabled_commands() # Get messages messages = await self .run_pipeline(MessageProvider.get_messages) prompt: ChatPrompt = self .prompt_strategy.build_prompt( messages = messages, task = self .state.task, ai_profile = self .state.ai_profile, ai_directives = directives, commands = function_specs_from_commands( self .commands), include_os_info = self .legacy_config.execute_local_commands, ) self .log_cycle_handler.log_count_within_cycle = 0 self .log_cycle_handler.log_cycle( self .state.ai_profile.ai_name, self .created_at, self .config.cycle_count, prompt.raw(), CURRENT_CONTEXT_FILE_NAME, ) logger.debug(f "Executing prompt:\n{dump_prompt(prompt)}" ) output = await self .complete_and_parse(prompt) self .config.cycle_count + = 1 return output async def complete_and_parse( self , prompt: ChatPrompt, exception: Optional[Exception] = None ) - > OneShotAgentActionProposal: if exception: prompt.messages.append(ChatMessage.system(f "Error: {exception}" )) response: ChatModelResponse[ OneShotAgentActionProposal ] = await self .llm_provider.create_chat_completion( prompt.messages, model_name = self .llm.name, completion_parser = self .prompt_strategy.parse_response_content, functions = prompt.functions, prefill_response = prompt.prefill_response, ) result = response.parsed_result self .log_cycle_handler.log_cycle( self .state.ai_profile.ai_name, self .created_at, self .config.cycle_count, result.thoughts. dict (), NEXT_ACTION_FILE_NAME, ) await self .run_pipeline(AfterParse.after_parse, result) return result async def execute( self , proposal: OneShotAgentActionProposal, user_feedback: str = "", ) - > ActionResult: tool = proposal.use_tool # Get commands self .commands = await self .run_pipeline(CommandProvider.get_commands) self ._remove_disabled_commands() try : return_value = await self ._execute_tool(tool) result = ActionSuccessResult(outputs = return_value) except AgentTerminated: raise except AgentException as e: result = ActionErrorResult.from_exception(e) logger.warning(f "{tool} raised an error: {e}" ) sentry_sdk.capture_exception(e) result_tlength = self .llm_provider.count_tokens( str (result), self .llm.name) if result_tlength > self .send_token_limit / / 3 : result = ActionErrorResult( reason = f "Command {tool.name} returned too much output. " "Do not execute this command again with the same arguments." ) await self .run_pipeline(AfterExecute.after_execute, result) logger.debug( "\n" .join( self .trace)) return result async def do_not_execute( self , denied_proposal: OneShotAgentActionProposal, user_feedback: str ) - > ActionResult: result = ActionInterruptedByHuman(feedback = user_feedback) self .log_cycle_handler.log_cycle( self .state.ai_profile.ai_name, self .created_at, self .config.cycle_count, user_feedback, USER_INPUT_FILE_NAME, ) await self .run_pipeline(AfterExecute.after_execute, result) logger.debug( "\n" .join( self .trace)) return result async def _execute_tool( self , tool_call: AssistantFunctionCall) - > CommandOutput: """Execute the command and return the result Args: tool_call (AssistantFunctionCall): The tool call to execute Returns: str: The execution result """ # Execute a native command with the same name or alias, if it exists command = self ._get_command(tool_call.name) try : result = command( * * tool_call.arguments) if inspect.isawaitable(result): return await result return result except AgentException: raise except Exception as e: raise CommandExecutionError( str (e)) def _get_command( self , command_name: str ) - > Command: for command in reversed ( self .commands): if command_name in command.names: return command raise UnknownCommandError( f "Cannot execute command '{command_name}': unknown command." ) def _remove_disabled_commands( self ) - > None : self .commands = [ command for command in self .commands if not any ( name in self .legacy_config.disabled_commands for name in command.names ) ] def find_obscured_commands( self ) - > list [Command]: seen_names = set () obscured_commands = [] for command in reversed ( self .commands): # If all of the command's names have been seen, it's obscured if seen_names.issuperset(command.names): obscured_commands.append(command) else : seen_names.update(command.names) return list ( reversed (obscured_commands)) |
是一个助理AI系统的实现。以下是代码的功能概述:
- 导入必要的模块和类,包括日志记录、时间处理、类型检查等。
- 定义了AgentConfiguration、AgentSettings和Agent类,用于配置助理代理的设置和行为。
- 实现了用于处理指令、命令、消息等的Provider类。
- 实现了用于执行代码、文件管理、图片生成、web搜索等功能的组件。
- 定义了"propose_action"方法,用于提出下一步的操作建议。
- 包含了对话提示、日志处理、异常处理等模块。
- 实现了"execute"和"do_not_execute"方法,用于执行或终止操作建议。
- 包含了辅助方法用于执行工具调用、找到可执行命令等功能。
- 使用Pydantic进行数据验证和模型定义。
- 包含了一些辅助方法和变量,用于监控操作、管理上下文等。
总体而言,这些代码构建了一个助理AI系统,用于提出决策建议、执行操作,并与用户进行交互。
另外一个就是利用gpt4 类似的LLM进行任务编排,代码在:
one_shot.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 | from __future__ import annotations import json import platform import re from logging import Logger import distro from forge.config.ai_directives import AIDirectives from forge.config.ai_profile import AIProfile from forge.json.parsing import extract_dict_from_json from forge.llm.prompting import ChatPrompt, LanguageModelClassification, PromptStrategy from forge.llm.prompting.utils import format_numbered_list from forge.llm.providers.schema import ( AssistantChatMessage, ChatMessage, CompletionModelFunction, ) from forge.models.action import ActionProposal from forge.models.config import SystemConfiguration, UserConfigurable from forge.models.json_schema import JSONSchema from forge.models.utils import ModelWithSummary from forge.utils.exceptions import InvalidAgentResponseError from pydantic import Field _RESPONSE_INTERFACE_NAME = "AssistantResponse" class AssistantThoughts(ModelWithSummary): observations: str = Field( ..., description = "Relevant observations from your last action (if any)" ) text: str = Field(..., description = "Thoughts" ) reasoning: str = Field(..., description = "Reasoning behind the thoughts" ) self_criticism: str = Field(..., description = "Constructive self-criticism" ) plan: list [ str ] = Field( ..., description = "Short list that conveys the long-term plan" ) speak: str = Field(..., description = "Summary of thoughts, to say to user" ) def summary( self ) - > str : return self .text class OneShotAgentActionProposal(ActionProposal): thoughts: AssistantThoughts class OneShotAgentPromptConfiguration(SystemConfiguration): DEFAULT_BODY_TEMPLATE: str = ( "## Constraints\n" "You operate within the following constraints:\n" "{constraints}\n" "\n" "## Resources\n" "You can leverage access to the following resources:\n" "{resources}\n" "\n" "## Commands\n" "These are the ONLY commands you can use." " Any action you perform must be possible through one of these commands:\n" "{commands}\n" "\n" "## Best practices\n" "{best_practices}" ) DEFAULT_CHOOSE_ACTION_INSTRUCTION: str = ( "Determine exactly one command to use next based on the given goals " "and the progress you have made so far, " "and respond using the JSON schema specified previously:" ) body_template: str = UserConfigurable(default = DEFAULT_BODY_TEMPLATE) choose_action_instruction: str = UserConfigurable( default = DEFAULT_CHOOSE_ACTION_INSTRUCTION ) use_functions_api: bool = UserConfigurable(default = False ) ######### # State # ######### # progress_summaries: dict[tuple[int, int], str] = Field( # default_factory=lambda: {(0, 0): ""} # ) class OneShotAgentPromptStrategy(PromptStrategy): default_configuration: OneShotAgentPromptConfiguration = ( OneShotAgentPromptConfiguration() ) def __init__( self , configuration: OneShotAgentPromptConfiguration, logger: Logger, ): self .config = configuration self .response_schema = JSONSchema.from_dict(OneShotAgentActionProposal.schema()) self .logger = logger @property def model_classification( self ) - > LanguageModelClassification: return LanguageModelClassification.FAST_MODEL # FIXME: dynamic switching def build_prompt( self , * , messages: list [ChatMessage], task: str , ai_profile: AIProfile, ai_directives: AIDirectives, commands: list [CompletionModelFunction], include_os_info: bool , * * extras, ) - > ChatPrompt: """Constructs and returns a prompt with the following structure: 1. System prompt 3. `cycle_instruction` """ system_prompt, response_prefill = self .build_system_prompt( ai_profile = ai_profile, ai_directives = ai_directives, commands = commands, include_os_info = include_os_info, ) final_instruction_msg = ChatMessage.user( self .config.choose_action_instruction) return ChatPrompt( messages = [ ChatMessage.system(system_prompt), ChatMessage.user(f '"""{task}"""' ), * messages, final_instruction_msg, ], prefill_response = response_prefill, functions = commands if self .config.use_functions_api else [], ) def build_system_prompt( self , ai_profile: AIProfile, ai_directives: AIDirectives, commands: list [CompletionModelFunction], include_os_info: bool , ) - > tuple [ str , str ]: """ Builds the system prompt. Returns: str: The system prompt body str: The desired start for the LLM's response; used to steer the output """ response_fmt_instruction, response_prefill = self .response_format_instruction( self .config.use_functions_api ) system_prompt_parts = ( self ._generate_intro_prompt(ai_profile) + ( self ._generate_os_info() if include_os_info else []) + [ self .config.body_template. format ( constraints = format_numbered_list( ai_directives.constraints + self ._generate_budget_constraint(ai_profile.api_budget) ), resources = format_numbered_list(ai_directives.resources), commands = self ._generate_commands_list(commands), best_practices = format_numbered_list(ai_directives.best_practices), ) ] + [ "## Your Task\n" "The user will specify a task for you to execute, in triple quotes," " in the next message. Your job is to complete the task while following" " your directives as given above, and terminate when your task is done." ] + [ "## RESPONSE FORMAT\n" + response_fmt_instruction] ) # Join non-empty parts together into paragraph format return ( "\n\n" .join( filter ( None , system_prompt_parts)).strip( "\n" ), response_prefill, ) def response_format_instruction( self , use_functions_api: bool ) - > tuple [ str , str ]: response_schema = self .response_schema.copy(deep = True ) if ( use_functions_api and response_schema.properties and "use_tool" in response_schema.properties ): del response_schema.properties[ "use_tool" ] # Unindent for performance response_format = re.sub( r "\n\s+" , "\n" , response_schema.to_typescript_object_interface(_RESPONSE_INTERFACE_NAME), ) response_prefill = f '{{\n "{list(response_schema.properties.keys())[0]}":' return ( ( f "YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n" f "{response_format}" + ( "\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "") ), response_prefill, ) def _generate_intro_prompt( self , ai_profile: AIProfile) - > list [ str ]: """Generates the introduction part of the prompt. Returns: list[str]: A list of strings forming the introduction part of the prompt. """ return [ f "You are {ai_profile.ai_name}, {ai_profile.ai_role.rstrip('.')}." , "Your decisions must always be made independently without seeking " "user assistance. Play to your strengths as an LLM and pursue " "simple strategies with no legal complications." , ] def _generate_os_info( self ) - > list [ str ]: """Generates the OS information part of the prompt. Params: config (Config): The configuration object. Returns: str: The OS information part of the prompt. """ os_name = platform.system() os_info = ( platform.platform(terse = True ) if os_name ! = "Linux" else distro.name(pretty = True ) ) return [f "The OS you are running on is: {os_info}" ] def _generate_budget_constraint( self , api_budget: float ) - > list [ str ]: """Generates the budget information part of the prompt. Returns: list[str]: The budget information part of the prompt, or an empty list. """ if api_budget > 0.0 : return [ f "It takes money to let you run. " f "Your API budget is ${api_budget:.3f}" ] return [] def _generate_commands_list( self , commands: list [CompletionModelFunction]) - > str : """Lists the commands available to the agent. Params: agent: The agent for which the commands are being listed. Returns: str: A string containing a numbered list of commands. """ try : return format_numbered_list([cmd.fmt_line() for cmd in commands]) except AttributeError: self .logger.warning(f "Formatting commands failed. {commands}" ) raise def parse_response_content( self , response: AssistantChatMessage, ) - > OneShotAgentActionProposal: if not response.content: raise InvalidAgentResponseError( "Assistant response has no text content" ) self .logger.debug( "LLM response content:" + ( f "\n{response.content}" if "\n" in response.content else f " '{response.content}'" ) ) assistant_reply_dict = extract_dict_from_json(response.content) self .logger.debug( "Parsing object extracted from LLM response:\n" f "{json.dumps(assistant_reply_dict, indent=4)}" ) parsed_response = OneShotAgentActionProposal.parse_obj(assistant_reply_dict) if self .config.use_functions_api: if not response.tool_calls: raise InvalidAgentResponseError( "Assistant did not use a tool" ) parsed_response.use_tool = response.tool_calls[ 0 ].function return parsed_response |
这段代码实现了一个助理AI系统,本质上是ReAct框架,主要包括以下功能:
- 定义了助理AI的思考模型AssistantThoughts,包括观察、思考、推理、自我批评、计划和口头表达等属性。
- 定义了OneShotAgentActionProposal类,用于存储助理AI提出的行动建议和思考。
- 实现了OneShotAgentPromptConfiguration和OneShotAgentPromptStrategy类,用于配置和构建助理AI的提示信息和操作策略。
- 使用Pydantic库进行数据验证和模型定义。
- 实现了各种辅助方法,用于生成系统提示、解析回复内容等功能。
- 包含了与JSON数据交互、日志记录、异常处理等相关模块和函数。
总体而言,这段代码涵盖了助理AI系统中与提示信息、响应解析、操作建议等方面相关的关键功能。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
2022-05-23 Nebula入门学习——day2 初次使用实践
2022-05-23 nebula入门学习——day1 nebula基本概念、原理和架构
2021-05-23 靶场推荐——pikachu SQL注入示例
2021-05-23 高精度IP定位——可以使用opengps 效果还不错
2021-05-23 Telegram 社工库泄露用户隐私
2021-05-23 监控github上新增的cve编号项目漏洞,推送钉钉或者server酱——可以直接利用poc进行漏洞测试利用
2021-05-23 robots.txt 信息收集 信息泄露 可以看到后台服务器的一些信息 目录信息