AnsibleAPI源码剖析(1)-Runner类的 初始化
#ansible版本说明:ansible1.9.1
1.简单使用例子
# -*- coding=utf-8 -*- import ansible.runner ########################## runner = ansible.runner.Runner( host_list = 'ip.txt', #指定主机文件 remote_user = 'admin', #指定远程执行用户 module_name = 'shell', #使用ansible的shell module_args = 'echo 111;sleep 5', #模块参数 pattern = 'test', #主机文件中生效的组 forks = 5, #多进程并发数量 remote_pass = '123456', #远程执行的密码 #is_playbook= True, ) datastructure = runner.run() print datastructure
ip.txt
[test]
127.0.0.1
2.ansible.runner模块中的Runner类
(1)初始化函数学习
# -*- coding=utf-8 -*- class Runner(object): ''' core API interface to ansible ''' # see bin/ansible for how this is used... def __init__(self, host_list=C.DEFAULT_HOST_LIST, # ex: /etc/ansible/hosts, legacy usage #初始化默认参数,如果没指定走/etc/ansible/hosts文件 module_path=None, # ex: /usr/share/ansible #Ansible的路径一般不用写 module_name=C.DEFAULT_MODULE_NAME, # ex: copy #模块名字必须指定 module_args=C.DEFAULT_MODULE_ARGS, # ex: "src=/tmp/a dest=/tmp/b" #模块的参数,对应了ansible命令行中的-a后面带的内容 forks=C.DEFAULT_FORKS, # parallelism level #进程的数目,如果你填入了20,它会判断你的list_hosts里面是否有20个IP,没有的话根据主机的数量来派生进程,如果多的话,就用multiprocess的pool进程池来调用 timeout=C.DEFAULT_TIMEOUT, # SSH timeout #SSH的超时时间 pattern=C.DEFAULT_PATTERN, # which hosts? ex: 'all', 'acme.example.org'#指定hostfile文件中执行的分组 remote_user=C.DEFAULT_REMOTE_USER, # ex: 'username' #远程执行的用户 remote_pass=C.DEFAULT_REMOTE_PASS, # ex: 'password123' or None if using key #远程执行的密码 remote_port=None, # if SSH on different ports #远程执行的端口,如果ssh端口被变更过的话 private_key_file=C.DEFAULT_PRIVATE_KEY_FILE, # if not using keys/passwords #自钥地址,用来秘钥验证的 background=0, # async poll every X seconds, else 0 for non-async #0代表不异步,其余数代表多少秒后根据任务id去取数据 basedir=None, # directory of playbook, if applicable #指定playbook的存放目录 setup_cache=None, # used to share fact data w/ other tasks #用于与其他任务共享实时数据 vars_cache=None, # used to store variables about hosts #存储有关主机的变量 transport=C.DEFAULT_TRANSPORT, # 'ssh', 'paramiko', 'local' #3种传输模式,ssh,paramiko,local conditional='True', # run only if this fact expression evals to true #状态的判断, callbacks=None, # used for output #指定回调输出 module_vars=None, # a playbooks internals thing #playbooks内部的东西 play_vars=None, # #和playbook相关的变量 play_file_vars=None, # role_vars=None, # role_params=None, # default_vars=None, # #默认变量 extra_vars=None, # extra vars specified with he playbook(s)#playbook指定额外的变量 is_playbook=False, # running from playbook or not? #是否以playbook运行 inventory=None, # reference to Inventory object #引用inventory对象 subset=None, # subset pattern #子集模式 check=False, # don't make any changes, just try to probe for potential changes #不做任何改变,仅仅尝试 diff=False, # whether to show diffs for template files that change #是否显示更改的模板文件差异 environment=None, # environment variables (as dict) to use inside the command #环境变量以字典方式传递进来 complex_args=None, # structured data in addition to module_args, must be a dict #结构化数据,参数 error_on_undefined_vars=C.DEFAULT_UNDEFINED_VAR_BEHAVIOR, # ex. False accelerate=False, # use accelerated connection #使用加速ssh连接 accelerate_ipv6=False, # accelerated connection w/ IPv6 accelerate_port=None, # port to use with accelerated connection #使用加速连接使用的端口 vault_pass=None, run_hosts=None, # an optional list of pre-calculated hosts to run on #要运行的预计算的主机列表 no_log=False, # option to enable/disable logging for a given task #是否开启任务日志 run_once=False, # option to enable/disable host bypass loop for a given task become=False, # whether to run privelege escalation or not #是否运行特权升级sudo become_method=C.DEFAULT_BECOME_METHOD, become_user=C.DEFAULT_BECOME_USER, # ex: 'root' #sudo用户 become_pass=C.DEFAULT_BECOME_PASS, # ex: 'password123' or None #sudo密码 become_exe=C.DEFAULT_BECOME_EXE, # ex: /usr/local/bin/sudo #sudo命令行 ): # used to lock multiprocess inputs and outputs at various levels self.output_lockfile = OUTPUT_LOCKFILE #输出文件锁 self.process_lockfile = PROCESS_LOCKFILE #进程锁 if not complex_args: #为空的话初始化为空字典 complex_args = {} # storage & defaults self.check = check self.diff = diff self.setup_cache = utils.default(setup_cache, lambda: ansible.cache.FactCache()) self.vars_cache = utils.default(vars_cache, lambda: collections.defaultdict(dict)) self.basedir = utils.default(basedir, lambda: os.getcwd()) self.callbacks = utils.default(callbacks, lambda: DefaultRunnerCallbacks()) self.generated_jid = str(random.randint(0, 999999999999)) self.transport = transport self.inventory = utils.default(inventory, lambda: ansible.inventory.Inventory(host_list)) # utils.default详细见下方的备注执行lambda函数,self.inventory就是Inventory对象,初始化通过host_list来执行,这个对象下包含了主机信息以及各种变量 self.module_vars = utils.default(module_vars, lambda: {}) self.play_vars = utils.default(play_vars, lambda: {}) self.play_file_vars = utils.default(play_file_vars, lambda: {}) self.role_vars = utils.default(role_vars, lambda: {}) self.role_params = utils.default(role_params, lambda: {}) self.default_vars = utils.default(default_vars, lambda: {}) self.extra_vars = utils.default(extra_vars, lambda: {}) self.always_run = None self.connector = connection.Connector(self) self.conditional = conditional self.delegate_to = None self.module_name = module_name self.forks = int(forks) self.pattern = pattern self.module_args = module_args self.timeout = timeout self.remote_user = remote_user self.remote_pass = remote_pass self.remote_port = remote_port self.private_key_file = private_key_file self.background = background self.become = become self.become_method = become_method self.become_user_var = become_user self.become_user = None self.become_pass = become_pass self.become_exe = become_exe self.is_playbook = is_playbook self.environment = environment self.complex_args = complex_args self.error_on_undefined_vars = error_on_undefined_vars self.accelerate = accelerate self.accelerate_port = accelerate_port self.accelerate_ipv6 = accelerate_ipv6 self.callbacks.runner = self self.omit_token = '__omit_place_holder__%s' % sha1(os.urandom(64)).hexdigest() self.vault_pass = vault_pass self.no_log = no_log self.run_once = run_once if self.transport == 'smart': # 判断传输模式,确定最后是否使用paramiko,ansible1.2.1/1.3版本里面有smart的东西,前向兼容 # If the transport is 'smart', check to see if certain conditions # would prevent us from using ssh, and fallback to paramiko. # 'smart' is the default since 1.2.1/1.3 self.transport = "ssh" #判断系统平台和远程执行密码来确定传输模式 if sys.platform.startswith('darwin') and self.remote_pass: # due to a current bug in sshpass on OSX, which can trigger # a kernel panic even for non-privileged users, we revert to # paramiko on that OS when a SSH password is specified self.transport = "paramiko" else: # see if SSH can support ControlPersist if not use paramiko #执行命令ssh -o ControlPersist加速模式,确定传输模式 cmd = subprocess.Popen(['ssh','-o','ControlPersist'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) (out, err) = cmd.communicate() if "Bad configuration option" in err: self.transport = "paramiko" # save the original transport, in case it gets # changed later via options like accelerate self.original_transport = self.transport # misc housekeeping if subset and self.inventory._subset is None: # don't override subset when passed from playbook self.inventory.subset(subset) # If we get a pre-built list of hosts to run on, from say a playbook, use them. # Also where we will store the hosts to run on once discovered self.run_hosts = run_hosts if self.transport == 'local': #传输模式本地的话,pwd和os模块联合方法取出当前系统执行用户,linux下的方法 self.remote_user = pwd.getpwuid(os.geteuid())[0] #模块路径指定的话,切割模块路径 if module_path is not None: for i in module_path.split(os.pathsep): utils.plugins.module_finder.add_directory(i) utils.plugins.push_basedir(self.basedir) # ensure we are using unique tmp paths # 初始化基本随机数生成器 random.seed() # *****************************************************
备注:utils.default
def default(value, function): ''' syntactic sugar around lazy evaluation of defaults ''' if value is None: return function() return value
(2) ansible.inventory.Inventory(host_list)原理剖析(假设都按照最上面的例子来传递,则host_list=ip.txt)
ansible/inventory/__init__.py
class Inventory(object): """ Host inventory for ansible. """ __slots__ = [ 'host_list', 'groups', '_restriction', '_also_restriction', '_subset', 'parser', '_vars_per_host', '_vars_per_group', '_hosts_cache', '_groups_list', '_pattern_cache', '_vault_password', '_vars_plugins', '_playbook_basedir'] def __init__(self, host_list=C.DEFAULT_HOST_LIST, vault_password=None): # the host file file, or script path, or list of hosts # if a list, inventory data will NOT be loaded self.host_list = host_list # 文件名传递进去 self._vault_password=vault_password # caching to avoid repeated calculations, particularly with # external inventory scripts. self._vars_per_host = {} self._vars_per_group = {} self._hosts_cache = {} self._groups_list = {} self._pattern_cache = {} # to be set by calling set_playbook_basedir by playbook code self._playbook_basedir = None # the inventory object holds a list of groups self.groups = [] # a list of host(names) to contain current inquiries to self._restriction = None self._also_restriction = None self._subset = None # 判断host_list是不是字符串类型 if isinstance(host_list, basestring): if "," in host_list: host_list = host_list.split(",") host_list = [ h for h in host_list if h and h.strip() ] if host_list is None: self.parser = None elif isinstance(host_list, list): self.parser = None all = Group('all') self.groups = [ all ] ipv6_re = re.compile('\[([a-f:A-F0-9]*[%[0-z]+]?)\](?::(\d+))?') for x in host_list: m = ipv6_re.match(x) if m: all.add_host(Host(m.groups()[0], m.groups()[1])) else: if ":" in x: tokens = x.rsplit(":", 1) # if there is ':' in the address, then this is an ipv6 if ':' in tokens[0]: all.add_host(Host(x)) else: all.add_host(Host(tokens[0], tokens[1])) else: all.add_host(Host(x)) # 判断host_list文件是否存在,将IP筛选出来 elif os.path.exists(host_list): if os.path.isdir(host_list): # Ensure basedir is inside the directory判断是否是目录 self.host_list = os.path.join(self.host_list, "") self.parser = InventoryDirectory(filename=host_list) self.groups = self.parser.groups.values() else: # check to see if the specified file starts with a 如果是文件的话执行如下流程 # shebang (#!/), so if an error is raised by the parser # class we can show a more apropos error shebang_present = False try: inv_file = open(host_list) first_line = inv_file.readlines()[0] inv_file.close() if first_line.startswith('#!'): shebang_present = True
#判断文件第一行是不是shebang except: pass #实际的逻辑执行 if utils.is_executable(host_list): try: self.parser = InventoryScript(filename=host_list) self.groups = self.parser.groups.values() except: if not shebang_present: raise errors.AnsibleError("The file %s is marked as executable, but failed to execute correctly. " % host_list + \ "If this is not supposed to be an executable script, correct this with `chmod -x %s`." % host_list) else: raise else: try: self.parser = InventoryParser(filename=host_list) self.groups = self.parser.groups.values() except: if shebang_present: raise errors.AnsibleError("The file %s looks like it should be an executable inventory script, but is not marked executable. " % host_list + \ "Perhaps you want to correct this with `chmod +x %s`?" % host_list) else: raise utils.plugins.vars_loader.add_directory(self.basedir(), with_subdir=True) else: raise errors.AnsibleError("Unable to find an inventory file, specify one with -i ?") # 获取插件变量 self._vars_plugins = [ x for x in utils.plugins.vars_loader.all(self) ] # get group vars from group_vars/ files and vars plugins获取组变量 for group in self.groups: group.vars = utils.combine_vars(group.vars, self.get_group_variables(group.name, vault_password=self._vault_password)) # get host vars from host_vars/ files and vars plugins获取主机变量 for host in self.get_hosts(): host.vars = utils.combine_vars(host.vars, self.get_host_variables(host.name, vault_password=self._vault_password))
(3)ansible/inventory/script.py
class InventoryScript(object): ''' Host inventory parser for ansible using external inventory scripts. ''' def __init__(self, filename=C.DEFAULT_HOST_LIST): # Support inventory scripts that are not prefixed with some # path information but happen to be in the current working # directory when '.' is not in PATH. self.filename = os.path.abspath(filename) cmd = [ self.filename, "--list" ] try: # 检测主机文件是否存在 sp = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError, e: raise errors.AnsibleError("problem running %s (%s)" % (' '.join(cmd), e)) (stdout, stderr) = sp.communicate() if sp.returncode != 0: raise errors.AnsibleError("Inventory script (%s) had an execution error: %s " % (filename,stderr)) self.data = stdout # see comment about _meta below self.host_vars_from_top = None # 错误信息处理 self.groups = self._parse(stderr) def _parse(self, err): all_hosts = {} # not passing from_remote because data from CMDB is trusted self.raw = utils.parse_json(self.data) self.raw = json_dict_bytes_to_unicode(self.raw) all = Group('all') groups = dict(all=all) group = None if 'failed' in self.raw: sys.stderr.write(err + "\n") raise errors.AnsibleError("failed to parse executable inventory script results: %s" % self.raw) for (group_name, data) in self.raw.items(): # in Ansible 1.3 and later, a "_meta" subelement may contain # a variable "hostvars" which contains a hash for each host # if this "hostvars" exists at all then do not call --host for each # host. This is for efficiency and scripts should still return data # if called with --host for backwards compat with 1.2 and earlier. if group_name == '_meta': if 'hostvars' in data: self.host_vars_from_top = data['hostvars'] continue if group_name != all.name: group = groups[group_name] = Group(group_name) else: group = all host = None if not isinstance(data, dict): data = {'hosts': data} # is not those subkeys, then simplified syntax, host with vars elif not any(k in data for k in ('hosts','vars','children')): data = {'hosts': [group_name], 'vars': data} if 'hosts' in data: if not isinstance(data['hosts'], list): raise errors.AnsibleError("You defined a group \"%s\" with bad " "data for the host list:\n %s" % (group_name, data)) for hostname in data['hosts']: if not hostname in all_hosts: all_hosts[hostname] = Host(hostname) host = all_hosts[hostname] group.add_host(host) if 'vars' in data: if not isinstance(data['vars'], dict): raise errors.AnsibleError("You defined a group \"%s\" with bad " "data for variables:\n %s" % (group_name, data)) for k, v in data['vars'].iteritems(): if group.name == all.name: all.set_variable(k, v) else: group.set_variable(k, v) # Separate loop to ensure all groups are defined for (group_name, data) in self.raw.items(): if group_name == '_meta': continue if isinstance(data, dict) and 'children' in data: for child_name in data['children']: if child_name in groups: groups[group_name].add_child_group(groups[child_name]) for group in groups.values(): if group.depth == 0 and group.name != 'all': all.add_child_group(group) return groups
总归一句话就是,通过以上代码片段,json化输出IP列表
(4)run函数,实际执行
def run(self): ''' xfer & run module on all matched hosts ''' # find hosts that match the pattern # 通过pattern找到host_list中的主机信息 if not self.run_hosts: self.run_hosts = self.inventory.list_hosts(self.pattern) # 通过self.pattern找到list_hosts的json字段,获取内容 hosts = self.run_hosts # 如果主机数量是0的话,callback回调,内容是空字典 if len(hosts) == 0: self.callbacks.on_no_hosts() return dict(contacted={}, dark={}) # 把实例赋值给全局变量multiprocessing_runner global multiprocessing_runner multiprocessing_runner = self results = None # Check if this is an action plugin. Some of them are designed # to be ran once per group of hosts. Example module: pause, # run once per hostgroup, rather than pausing once per each # host.
# p呢就是根据module_name找出要执行的相应模块插件
p = utils.plugins.action_loader.get(self.module_name, self)
# 进程数量优化 if self.forks == 0 or self.forks > len(hosts): self.forks = len(hosts) if (p and (getattr(p, 'BYPASS_HOST_LOOP', None)) or self.run_once): # Expose the current hostgroup to the bypassing plugins self.host_set = hosts # We aren't iterating over all the hosts in this # group. So, just choose the "delegate_to" host if that is defined and is # one of the targeted hosts, otherwise pick the first host in our group to # construct the conn object with. if self.delegate_to is not None and self.delegate_to in hosts: host = self.delegate_to else: host = hosts[0] result_data = self._executor(host, None).result # Create a ResultData item for each host in this group # using the returned result. If we didn't do this we would # get false reports of dark hosts. results = [ ReturnData(host=h, result=result_data, comm_ok=True) \ for h in hosts ] del self.host_set elif self.forks > 1: try:
# 调用_parallel_exec函数去跑结果 results = self._parallel_exec(hosts) except IOError, ie: print ie.errno if ie.errno == 32: # broken pipe from Ctrl+C raise errors.AnsibleError("interrupted") raise else: results = [ self._executor(h, None) for h in hosts ] return self._partition_results(results)
(5)_parallel_exec函数
def _parallel_exec(self, hosts): ''' handles mulitprocessing when more than 1 fork is required ''' manager = multiprocessing.Manager() job_queue = manager.Queue() #任务队列 for host in hosts: job_queue.put(host) #结果队列 result_queue = manager.Queue() try: fileno = sys.stdin.fileno() except ValueError: fileno = None workers = [] '''起forks进程数的进程去执行_executor_hook函数,函数的参数是任务队列,结果队列,以及new_stdin''' for i in range(self.forks): new_stdin = None if fileno is not None: try: new_stdin = os.fdopen(os.dup(fileno)) except OSError, e: # couldn't dupe stdin, most likely because it's # not a valid file descriptor, so we just rely on # using the one that was passed in pass prc = multiprocessing.Process(target=_executor_hook, args=(job_queue, result_queue, new_stdin)) prc.start() workers.append(prc)
#把每个进程放到workers列表里 try:
#遍历workers列表中的每个多进程实例,join方法呢是等待每个进程执行结束。保证多进程每个进程都执行结束,如果出现异常,进程中断再结束 for worker in workers: worker.join() except KeyboardInterrupt: for worker in workers: worker.terminate() worker.join() #结果集列表 results = [] try:
#结果队列不为空的话,不断从中取数据追加到结果result中 while not result_queue.empty(): results.append(result_queue.get(block=False)) except socket.error: raise errors.AnsibleError("<interrupted>") return results
(6)_executor_hook函数
def _executor_hook(job_queue, result_queue, new_stdin): # attempt workaround of https://github.com/newsapps/beeswithmachineguns/issues/17 # this function also not present in CentOS 6 if HAS_ATFORK: atfork() signal.signal(signal.SIGINT, signal.SIG_IGN) while not job_queue.empty(): try: host = job_queue.get(block=False) return_data = multiprocessing_runner._executor(host, new_stdin) result_queue.put(return_data) except Queue.Empty: pass except: traceback.print_exc()
原创:做时间的朋友