运行spark sql时出现的一个问题
运行如下代码时 出现了
org.apache.spark.sql.AnalysisException 错误
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import org.apache.log4j.{Level, Logger} import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} /** * 使用SaprkSQL实现iplocation * Created by lq on 2018/9/29 17:04. */ object SQLIPLocation { val rulesFilePath = "f:\\data\\ip.txt" val accessFilePath = "f:\\data\\access.log" def main(args: Array[String]): Unit = { Logger.getLogger( "org.apache.spark" ).setLevel(Level.OFF) val spark = SparkSession.builder().appName( "SQLIPLocation" ).master( "local[*]" ).getOrCreate() //读取ip规则数据 val ipRulesLine: Dataset[String] = spark.read.textFile(rulesFilePath) //整理IP规则数据 import spark.implicits._ val tpRDDs: Dataset[(Long, Long, String)] = ipRulesLine.map(line => { val fields = line.split( "[|]" ) val startNum = fields(2).toLong val endNum = fields(3).toLong val province = fields(6) (startNum, endNum, province) }) val ipRulesDF: DataFrame = tpRDDs.toDF( "start_num" , "end_num" , "province" ) //将IP规则数据注册成视图 ipRulesDF.createTempView( "v_ip_rules" ) //读取访问日志数据 val accessLogLine: Dataset[String] = spark.read.textFile(accessFilePath) //整理访问日志数据 import cn.edu360.spark.day06.MyUtils val ips: DataFrame = accessLogLine.map(line=> { val fields = line.split( "[|]" ) val ip = fields(1) MyUtils.ip2Long(ip) }).toDF( "ip" ) //将访问日志数据注册成视图 ips.createTempView( "v_access_ip" ) //写SQL(Join)关联两张表数据 val result = spark.sql( "SELECT province, COUNT(*) counts FROM v_ip_rules JOIN v_access_ip ON ip>=start_num AND ip<=end_num GROUP BY province ORDER BY counts DESC" ) //触发Action result.show() //释放资源 spark.stop() } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`word`' given input columns: [value]; line 1 pos 56; 'Sort [' counts DESC NULLS LAST], true +- 'Aggregate [' word], [value#10 AS word#13, count(1) AS counts#14L] +- SubqueryAlias w_words, `w_words` +- SerializeFromObject [staticinvoke( class org.apache.spark. unsafe .types.UTF8String, StringType, fromString, input[0, java.lang.String, true ], true ) AS value#10] +- MapPartitions <function1>, obj#9: java.lang.String +- DeserializeToObject cast(value#0 as string ).toString, obj#8: java.lang.String +- Project [value#0] +- Relation[value#0] text at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:86) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:83) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:290) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:290) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:289) at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:255) at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:255) at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:266) at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:276) at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:280) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) at scala.collection.mutable.ResizableArray$ class . foreach (ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer. foreach (ArrayBuffer.scala:48) at scala.collection.TraversableLike$ class .map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:280) at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$6.apply(QueryPlan.scala:285) at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188) at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:285) at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:255) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:83) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:76) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:128) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:127) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:127) at scala.collection.immutable.List. foreach (List.scala:381) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$ class .checkAnalysis(CheckAnalysis.scala:76) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:57) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:63) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592) at cn.edu360.spark.day08.SQLWorkCount$.main(SQLWorkCount.scala:28) at cn.edu360.spark.day08.SQLWorkCount.main(SQLWorkCount.scala) |
找遍了网上所有解决方法,无果
最后发现 更换下pom.xml中
<spark.version>2.1.1</spark.version>
改为
<spark.version>2.2.1</spark.version>
即可。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET开发智能桌面机器人:用.NET IoT库编写驱动控制两个屏幕
· 用纯.NET开发并制作一个智能桌面机器人:从.NET IoT入门开始
· 一个超经典 WinForm,WPF 卡死问题的终极反思
· ASP.NET Core - 日志记录系统(二)
· .NET 依赖注入中的 Captive Dependency
· 几个自学项目的通病,别因为它们浪费了时间!
· 在外漂泊的这几年总结和感悟,展望未来
· 如何在 ASP.NET Core 中实现速率限制?
· Kubernetes 知识梳理及集群搭建
· 一文搞懂SaaS架构建设流程:业务战略设计、架构蓝图设计、领域系统架构设计、架构治理与实施