之于

Flink源码阅读 - K8s上部署session模式的集群(入口配置)

启动主类 KubernetesSessionCli

KubernetesSessionCli#mian 主类入口

// org.apache.flink.kubernetes.cli.KubernetesSessionCli#main
public static void main(String[] args) {
    // 通过环境变量加载配置 ${FLINK_CONF_DIR}/flink-conf.yaml 
    final Configuration configuration = GlobalConfiguration.loadConfiguration();
    // 获取配置目录, 按优先级只取前一个符合存在条件的目录: FLINK_CONF_DIR, ../conf, conf
    final String configDir = CliFrontend.getConfigurationDirectoryFromEnv();
    int retCode;
    try {
        final KubernetesSessionCli cli = new KubernetesSessionCli(configuration, configDir);
        retCode = SecurityUtils.getInstalledContext().runSecured(() -> cli.run(args));
    } catch (CliArgsException e) {
        retCode = AbstractCustomCommandLine.handleCliArgsException(e, LOG);
    } catch (Exception e) {
        retCode = AbstractCustomCommandLine.handleError(e, LOG);
    }
    System.exit(retCode);
}

KubernetesSessionCli#run 核心流程

获取有效配置 -> 加载和匹配ClusterClientFactory -> 构造Descriptor -> 查询或创建服务 -> Descriptor配置集群内部细节 -> 附属管道检测stop,quit命令

//org.apache.flink.kubernetes.cli.KubernetesSessionCli#run
private int run(String[] args) throws FlinkException, CliArgsException {
    /*
    获取有效配置, 包括三部分:
      ${FLINK_CONF_DIR}/flink-conf.yaml
      其他 -Dk=v动态配置, 无v时视为true
      execution.target = kubernetes-session
    */
    final Configuration configuration = getEffectiveConfiguration(args);
    // loader加载所有ClusterClientFactory, 根据execution.target匹配唯一Factory
    final ClusterClientFactory<String> kubernetesClusterClientFactory =
            clusterClientServiceLoader.getClusterClientFactory(configuration);
    // 加载k8s配置以创建NamespacedKubernetesClient, 以此构造Descriptor
    final ClusterDescriptor<String> kubernetesClusterDescriptor =
            kubernetesClusterClientFactory.createClusterDescriptor(configuration);
    try {
        final ClusterClient<String> clusterClient;
        // kubernetes.cluster-id
        String clusterId = kubernetesClusterClientFactory.getClusterId(configuration);
        // execution.attached =  false
        final boolean detached = !configuration.get(DeploymentOptions.ATTACHED);
        // kubernetesClusterDescriptor已经存在一个client连接Cluster, 此处使用新的client用于连接Service
        final FlinkKubeClient kubeClient =
                FlinkKubeClientFactory.getInstance().fromConfiguration(configuration, "client");
        // Retrieve or create a session cluster.
        if (clusterId != null && kubeClient.getRestService(clusterId).isPresent()) {
            clusterClient = kubernetesClusterDescriptor.retrieve(clusterId).getClusterClient();
        } else {
            clusterClient =
                    kubernetesClusterDescriptor
                            // 集群部署细节可以查看: Flink源码阅读 - K8s上部署session模式的集群(内部细节) 
                            .deploySessionCluster(
                                    kubernetesClusterClientFactory.getClusterSpecification(
                                            configuration))
                            .getClusterClient();
            clusterId = clusterClient.getClusterId();
        }
        // 如果为附属模式提交管道, 对stop,quit输入命令做集群关闭, client关闭
        try {
            if (!detached) {
                Tuple2<Boolean, Boolean> continueRepl = new Tuple2<>(true, false);
                try (BufferedReader in = new BufferedReader(new InputStreamReader(System.in))) {
                    // f0 = true 持续接收和解析用户交互输入
                    while (continueRepl.f0) {
                        continueRepl = repStep(in);
                    }
                } catch (Exception e) {
                    LOG.warn(
                            "Exception while running the interactive command line interface.",
                            e);
                }
                // 用户输入quit/stop时, 不继续取输入, stop关闭集群
                if (continueRepl.f1) {
                    kubernetesClusterDescriptor.killCluster(clusterId);
                }
            }
            // 客户端退出
            clusterClient.close();
            kubeClient.close();
        } catch (Exception e) {
            LOG.info("Could not properly shutdown cluster client.", e);
        }
    } finally {
        try {
            kubernetesClusterDescriptor.close();
        } catch (Exception e) {
            LOG.info("Could not properly close the kubernetes cluster descriptor.", e);
        }
    }
    return 0;
}

getEffectiveConfiguration 获取有效配置

// org.apache.flink.kubernetes.cli.KubernetesSessionCli#getEffectiveConfiguration
Configuration getEffectiveConfiguration(String[] args) throws CliArgsException {
    // 解析主类收到的所有入参, GenericCLI cli
    final CommandLine commandLine = cli.parseCommandLineOptions(args, true);
    // 追溯调用链可知baseConfiguration内容即 ${FLINK_CONF_DIR}/flink-conf.yaml
    final Configuration effectiveConfiguration = new Configuration(baseConfiguration);
    
    // CommandLine 转 Configuration
    effectiveConfiguration.addAll(cli.toConfiguration(commandLine));
    // execution.target = kubernetes-session
    effectiveConfiguration.set(DeploymentOptions.TARGET, KubernetesSessionClusterExecutor.NAME);
    return effectiveConfiguration;
}

parseCommandLineOptions 解析为命令行

// org.apache.flink.client.cli.CustomCommandLine#parseCommandLineOptions
default CommandLine parseCommandLineOptions(String[] args, boolean stopAtNonOptions)
        throws CliArgsException {
    final Options options = new Options();
    // 添加原生options
    addGeneralOptions(options);
    // 添加运行时 options, GenericCLI实现为空操作
    addRunOptions(options);
    // 调用 common-cli 解析参数数组为 CommandLine
    return CliFrontendParser.parse(options, args, stopAtNonOptions);
}

// org.apache.flink.client.cli.GenericCLI#addGeneralOptions
@Override
public void addGeneralOptions(Options baseOptions) {
    // executorOption = e[executor], hasArg, 匹配示例: -e kubernetes-session
    baseOptions.addOption(executorOption);
    // targetOption = t[target], hasArg, 匹配示例: -t kubernetes-session
    baseOptions.addOption(targetOption);
    // 匹配 -Dk=v
    baseOptions.addOption(DynamicPropertiesUtil.DYNAMIC_PROPERTIES);
}

// org.apache.flink.client.cli.CliFrontendParser#parse
public static CommandLine parse(Options options, String[] args, boolean stopAtNonOptions)
        throws CliArgsException {
    final DefaultParser parser = new DefaultParser();
    try {
        return parser.parse(options, args, stopAtNonOptions);
    } catch (ParseException e) {
        throw new CliArgsException(e.getMessage());
    }
}

toConfiguration 解析为Flink配置

// org.apache.flink.client.cli.GenericCLI#toConfiguration
@Override
public Configuration toConfiguration(final CommandLine commandLine) {
    final Configuration resultConfiguration = new Configuration();

    final String executorName = commandLine.getOptionValue(executorOption.getOpt());
    if (executorName != null) {
        resultConfiguration.setString(DeploymentOptions.TARGET, executorName);
    }

    final String targetName = commandLine.getOptionValue(targetOption.getOpt());
    if (targetName != null) {
        resultConfiguration.setString(DeploymentOptions.TARGET, targetName);
    }
    // 以上配置在KubernetesSessionCli调用此方法后被覆盖为  execution.target = kubernetes-session

    // CommandLine -> Properties -> Configuration
    DynamicPropertiesUtil.encodeDynamicProperties(commandLine, resultConfiguration);

    // $internal.deployment.config-dir = FLINK_CONF_DIR[或 ../conf, conf]
    resultConfiguration.set(DeploymentOptionsInternal.CONF_DIR, configurationDir);

    return resultConfiguration;
}

// org.apache.flink.client.cli.DynamicPropertiesUtil#encodeDynamicProperties
static void encodeDynamicProperties(
        final CommandLine commandLine, final Configuration effectiveConfiguration) {
    final Properties properties = commandLine.getOptionProperties(DYNAMIC_PROPERTIES.getOpt());
    properties
            .stringPropertyNames()
            .forEach(
                    key -> {
                        final String value = properties.getProperty(key);
                        if (value != null) {
                            // 匹配到 -Dk=v
                            effectiveConfiguration.setString(key, value);
                        } else {
                            // 匹配到 -Dk
                            effectiveConfiguration.setString(key, "true");
                        }
                    });
}

getClusterClientFactory 获取配置工厂

// org.apache.flink.client.deployment.DefaultClusterClientServiceLoader#getClusterClientFactory
@Override
public <ClusterID> ClusterClientFactory<ClusterID> getClusterClientFactory(
        final Configuration configuration) {
    checkNotNull(configuration);
    // JAVA 基础 ServiceLoader 服务懒加载可自行研究
    final ServiceLoader<ClusterClientFactory> loader =
            ServiceLoader.load(ClusterClientFactory.class);
    final List<ClusterClientFactory> compatibleFactories = new ArrayList<>();
    /*
    迭代服务资源 META-INF/services/org.apache.flink.client.deployment.ClusterClientFactory
       org.apache.flink.client.deployment.StandaloneClientFactory
       org.apache.flink.kubernetes.KubernetesClusterClientFactory
       org.apache.flink.yarn.YarnClusterClientFactory
    因此对应到官方文档 Deployment.ResourceProviders 章节, 仅提供了三种资源模式: Standalone, Native Kubernetes, Yarn
    */
    final Iterator<ClusterClientFactory> factories = loader.iterator();
    while (factories.hasNext()) {
        try {
            final ClusterClientFactory factory = factories.next();
            // 检查工厂是否兼容提供的配置
            if (factory != null && factory.isCompatibleWith(configuration)) {
                compatibleFactories.add(factory);
            }
        } catch (Throwable e) {
            if (e.getCause() instanceof NoClassDefFoundError) {
                LOG.info("Could not load factory due to missing dependencies.");
            } else {
                throw e;
            }
        }
    }
    if (compatibleFactories.size() > 1) {
        final List<String> configStr =
                configuration.toMap().entrySet().stream()
                        .map(e -> e.getKey() + "=" + e.getValue())
                        .collect(Collectors.toList());
        throw new IllegalStateException(
                "Multiple compatible client factories found for:\n"
                        + String.join("\n", configStr)
                        + ".");
    }
    if (compatibleFactories.isEmpty()) {
        throw new IllegalStateException(
                "No ClusterClientFactory found. If you were targeting a Yarn cluster, "
                        + "please make sure to export the HADOOP_CLASSPATH environment variable or have hadoop in your "
                        + "classpath. For more information refer to the \"Deployment\" section of the official "
                        + "Apache Flink documentation.");
    }
    //仅允许一个有效工厂
    return (ClusterClientFactory<ClusterID>) compatibleFactories.get(0);
}

// org.apache.flink.kubernetes.KubernetesClusterClientFactory#isCompatibleWith
@Override
public boolean isCompatibleWith(Configuration configuration) {
    checkNotNull(configuration);
    // 此时 execution.target = kubernetes-session
    final String deploymentTarget = configuration.getString(DeploymentOptions.TARGET);
    return KubernetesDeploymentTarget.isValidKubernetesTarget(deploymentTarget);
}

// org.apache.flink.kubernetes.configuration.KubernetesDeploymentTarget#isValidKubernetesTarget
public static boolean isValidKubernetesTarget(final String configValue) {
    return configValue != null
            && Arrays.stream(KubernetesDeploymentTarget.values()) // values = [kubernetes-session, kubernetes-application]
                    .anyMatch(
                            kubernetesDeploymentTarget ->
                                    kubernetesDeploymentTarget.name.equalsIgnoreCase(
                                            configValue));
}

createClusterDescriptor 创建集群Descripter

// org.apache.flink.kubernetes.KubernetesClusterClientFactory#createClusterDescriptor
@Override
public KubernetesClusterDescriptor createClusterDescriptor(Configuration configuration) {
    checkNotNull(configuration);
    if (!configuration.contains(KubernetesConfigOptions.CLUSTER_ID)) {
        final String clusterId = generateClusterId();
        configuration.setString(KubernetesConfigOptions.CLUSTER_ID, clusterId);
    }
    return new KubernetesClusterDescriptor(
            configuration,
            // 封装K8s原生Client的FlinkKubeClient
            FlinkKubeClientFactory.getInstance().fromConfiguration(configuration, "client"));
}

// org.apache.flink.kubernetes.KubernetesClusterClientFactory#generateClusterId
// 生成 flink-cluster-xxx 的共45位的clusterId
private String generateClusterId() {
    final String randomID = new AbstractID().toString();
    return (CLUSTER_ID_PREFIX + randomID)
            .substring(0, Constants.MAXIMUM_CHARACTERS_OF_CLUSTER_ID);
}

fromConfiguration 创建FlinkKubeClient

Flink配置 -> K8s配置, namespace -> NamespacedKubernetesClient -> Fabric8FlinkKubeClient(Flink配置, K8sCli, IO线程池)

// org.apache.flink.kubernetes.kubeclient.FlinkKubeClientFactory#fromConfiguration
public FlinkKubeClient fromConfiguration(Configuration flinkConfig, String useCase) {
    final Config config;
    // kubernetes.context 基于配置的不同上下文管理不同flink集群
    final String kubeContext = flinkConfig.getString(KubernetesConfigOptions.CONTEXT);
    if (kubeContext != null) {
        LOG.info("Configuring kubernetes client to use context {}.", kubeContext);
    }
    // kubernetes.config.file
    final String kubeConfigFile =
            flinkConfig.getString(KubernetesConfigOptions.KUBE_CONFIG_FILE);
    if (kubeConfigFile != null) {
        LOG.debug("Trying to load kubernetes config from file: {}.", kubeConfigFile);
        try {
            /* 
            Config构造 Config fromKubeconfig(String context, String kubeconfigContents, String kubeconfigPath)
            如果kubeContext为空,kubeConfigFile中的默认上下文将被使用。注意:第三个参数kubeconfigPath是可选
            的,设置为空。它仅用于在传递文件时重写kubecconfig内部的相对tls资产路径,并且在kubecconfig通过相对
            路径引用一些资产的情况下
            */
            config = Config.fromKubeconfig(kubeContext,
                    FileUtils.readFileUtf8(new File(kubeConfigFile)), null);
        } catch (IOException e) {
            throw new KubernetesClientException("Load kubernetes config failed.", e);
        }
    } else {
        LOG.debug("Trying to load default kubernetes config.");
        config = Config.autoConfigure(kubeContext);
    }
    // kubernetes.namespace
    final String namespace = flinkConfig.getString(KubernetesConfigOptions.NAMESPACE);
    LOG.debug("Setting namespace of Kubernetes client to {}", namespace);
    config.setNamespace(namespace);
    // This could be removed after we bump the fabric8 Kubernetes client version to 4.13.0+ or
    // use the a shared connection for all ConfigMap watches. See FLINK-22006 for more
    // information.  以后可能会移除
    trySetMaxConcurrentRequest(config);
    // 与K8s交互的底层Client
    final NamespacedKubernetesClient client = new DefaultKubernetesClient(config);
    
    // kubernetes.client.io-pool.size = 4
    final int poolSize =
            flinkConfig.get(KubernetesConfigOptions.KUBERNETES_CLIENT_IO_EXECUTOR_POOL_SIZE);
    return new Fabric8FlinkKubeClient(
            flinkConfig, client, createThreadPoolForAsyncIO(poolSize, useCase));
}

@VisibleForTesting
static void trySetMaxConcurrentRequest(Config config) {
    // kubernetes.max.concurrent.requests = 64
    final String configuredMaxConcurrentRequests =
            Utils.getSystemPropertyOrEnvVar(
                    Config.KUBERNETES_MAX_CONCURRENT_REQUESTS,
                    String.valueOf(Config.DEFAULT_MAX_CONCURRENT_REQUESTS));
    if (configuredMaxConcurrentRequests != null) {
        LOG.debug(
                "Setting max concurrent requests of Kubernetes client to {}",
                configuredMaxConcurrentRequests);
        config.setMaxConcurrentRequests(Integer.parseInt(configuredMaxConcurrentRequests));
    }
}

repStep 附属管道对命令的重复检测

// org.apache.flink.kubernetes.cli.KubernetesSessionCli#repStep

// 检查是否继续读取输入和关闭集群
// f0=true 继续读取用户交互输入
// f1=true 关闭集群
private Tuple2<Boolean, Boolean> repStep(BufferedReader in)
        throws IOException, InterruptedException {
    final long startTime = System.currentTimeMillis();
    // 3s内输入流为空时一直循环等待
    while ((System.currentTimeMillis() - startTime) < CLIENT_POLLING_INTERVAL_MS
            && (!in.ready())) {
        Thread.sleep(200L);
    }
    // ------------- handle interactive command by user. ----------------------
    if (in.ready()) {
        final String command = in.readLine();
        switch (command) {
            case "quit":
                return new Tuple2<>(false, false);
            case "stop":
                return new Tuple2<>(false, true);
            case "help":
                System.err.println(KUBERNETES_CLUSTER_HELP);
                break;
            default:
                System.err.println("Unknown command '" + command + "'. Showing help:");
                System.err.println(KUBERNETES_CLUSTER_HELP);
                break;
        }
    }
    return new Tuple2<>(true, false);
}

后续源码研读转移到 Gitee

posted on 2022-01-05 10:15  之于  阅读(981)  评论(0编辑  收藏  举报

导航