Hive QL模块SemanticAnalyzer分析

  void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    生成解析树
    LOG.info("Starting Semantic Analysis");
    //change the location of position alias process here
    processPositionAlias(ast);
    if (!genResolvedParseTree(ast, plannerCtx)) {
      return;
    }

    // 2. Gen OP Tree from resolved Parse Tree
    生成操作树
    Operator sinkOp = genOPTree(ast, plannerCtx);

    if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
      // Here we rewrite the * and also the masking table
      ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(),
              ctx, db, tabNameToTabObject, ignoredTokens);
      if (tree != ast) {
        ctx.setSkipTableMasking(true);
        init(true);
        //change the location of position alias process here
        processPositionAlias(tree);
        genResolvedParseTree(tree, plannerCtx);
        if (this instanceof CalcitePlanner) {
          ((CalcitePlanner) this).resetCalciteConfiguration();
        }
        sinkOp = genOPTree(tree, plannerCtx);
      }
    }

    // 3. Deduce Resultset Schema
    推断结果集模式
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
      resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
      // resultSchema will be null if
      // (1) cbo is disabled;
      // (2) or cbo is enabled with AST return path (whether succeeded or not,
      // resultSchema will be re-initialized)
      // It will only be not null if cbo is enabled with new return path and it
      // succeeds.
      if (resultSchema == null) {
        resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(),
            HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
      }
    }

    // 4. Generate Parse Context for Optimizer & Physical compiler
    为优化器和物理编译器生成解析上下文
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps,
        new HashSet<JoinOperator>(joinContext.keySet()),
        new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
        loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx,
        listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner,
        globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner,
        viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting,
        analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);

    // 5. Take care of view creation
    
    if (createVwDesc != null) {
      if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
        return;
      }
      
      if (!ctx.isCboSucceeded()) {
        saveViewDefinition();
      }

      // validate the create view statement at this point, the createVwDesc gets
      // all the information for semanticcheck
      validateCreateView();

      if (!createVwDesc.isMaterialized()) {
        // Since we're only creating a view (not executing it), we don't need to
        // optimize or translate the plan (and in fact, those procedures can
        // interfere with the view creation). So skip the rest of this method.
        ctx.setResDir(null);
        ctx.setResFile(null);

        try {
          PlanUtils.addInputsForView(pCtx);
        } catch (HiveException e) {
          throw new SemanticException(e);
        }

        // Generate lineage info for create view statements
        // if LineageLogger hook is configured.
        // Add the transformation that computes the lineage information.
        Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults()
            .omitEmptyStrings()
            .split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
        if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter")
            || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger")
            || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
          ArrayList<Transform> transformations = new ArrayList<Transform>();
          transformations.add(new HiveOpConverterPostProc());
          transformations.add(new Generator());
          for (Transform t : transformations) {
            pCtx = t.transform(pCtx);
          }
          // we just use view name as location.
          SessionState.get().getLineageState()
              .mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
        }
        return;
      }
    }

    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
      TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
      setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }

    // 7. Perform Logical optimization
    逻辑优化
    if (LOG.isDebugEnabled()) {
      LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
      // set ColumnAccessInfo for view column authorization
      setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    FetchTask origFetchTask = pCtx.getFetchTask();
    if (LOG.isDebugEnabled()) {
      LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }

    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    如果需要,生成列访问统计信息——等待在优化过程中进行列剪枝
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2()
        && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth
        || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
      ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
      // view column access info is carried by this.getColumnAccessInfo().
      setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }

    // 9. Optimize Physical op tree & Translate to target execution engine (MR,
    // TEZ..)
    优化物理操作树,转换到目标执行引擎
    if (!ctx.getExplainLogical()) {
      TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
      compiler.init(queryState, console, db);
      compiler.compile(pCtx, rootTasks, inputs, outputs);
      fetchTask = pCtx.getFetchTask();
    }
    LOG.info("Completed plan generation");

    // 10. put accessed columns to readEntity
    将访问的列放入readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
      putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }

    // 11. if desired check we're not going over partition scan limits
    如果需要进行检查,我们不会超过分区扫描限制
    if (!ctx.isExplainSkipExecution()) {
      enforceScanLimits(pCtx, origFetchTask);
    }

    return;
  }

posted @ 2021-12-16 16:57  Yu\.W  阅读(373)  评论(0编辑  收藏  举报