运行spark sql时出现的一个问题

 

运行如下代码时 出现了

org.apache.spark.sql.AnalysisException 错误

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 
/**
  * 使用SaprkSQL实现iplocation
  * Created by lq on 2018/9/29 17:04.
  */
object SQLIPLocation {
  val rulesFilePath = "f:\\data\\ip.txt"
  val accessFilePath = "f:\\data\\access.log"
 
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
    val spark = SparkSession.builder().appName("SQLIPLocation").master("local[*]").getOrCreate()
 
    //读取ip规则数据
    val ipRulesLine: Dataset[String] = spark.read.textFile(rulesFilePath)
 
    //整理IP规则数据
    import spark.implicits._
    val tpRDDs: Dataset[(Long, Long, String)] = ipRulesLine.map(line => {
      val fields = line.split("[|]")
      val startNum = fields(2).toLong
      val endNum = fields(3).toLong
      val province = fields(6)
      (startNum, endNum, province)
    })
 
    val ipRulesDF: DataFrame = tpRDDs.toDF("start_num", "end_num", "province")
    //将IP规则数据注册成视图
    ipRulesDF.createTempView("v_ip_rules")
 
    //读取访问日志数据
    val accessLogLine: Dataset[String] = spark.read.textFile(accessFilePath)
 
    //整理访问日志数据
    import cn.edu360.spark.day06.MyUtils
    val ips: DataFrame = accessLogLine.map(line=> {
      val fields = line.split("[|]")
      val ip = fields(1)
      MyUtils.ip2Long(ip)
    }).toDF("ip")
 
    //将访问日志数据注册成视图
    ips.createTempView("v_access_ip")
 
    //写SQL(Join)关联两张表数据
    val result = spark.sql("SELECT province, COUNT(*) counts FROM v_ip_rules JOIN v_access_ip ON ip>=start_num AND ip<=end_num GROUP BY province ORDER BY counts DESC")
 
    //触发Action
    result.show()
 
    //释放资源
    spark.stop()
  }
}

  

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`word`' given input columns: [value]; line 1 pos 56;
'Sort ['counts DESC NULLS LAST], true
+- 'Aggregate ['word], [value#10 AS word#13, count(1) AS counts#14L]
+- SubqueryAlias w_words, `w_words`
+- SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#10]
+- MapPartitions <function1>, obj#9: java.lang.String
+- DeserializeToObject cast(value#0 as string).toString, obj#8: java.lang.String
+- Project [value#0]
+- Relation[value#0] text
 
at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:86)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:83)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:290)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:290)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:289)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:255)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(QueryPlan.scala:255)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(QueryPlan.scala:266)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:276)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(QueryPlan.scala:280)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.AbstractTraversable.map(Traversable.scala:104)
at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(QueryPlan.scala:280)
at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$6.apply(QueryPlan.scala:285)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:188)
at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(QueryPlan.scala:285)
at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(QueryPlan.scala:255)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:83)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:76)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:128)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:127)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:127)
at scala.collection.immutable.List.foreach(List.scala:381)
at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127)
at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:76)
at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:57)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:52)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:63)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:592)
at cn.edu360.spark.day08.SQLWorkCount$.main(SQLWorkCount.scala:28)
at cn.edu360.spark.day08.SQLWorkCount.main(SQLWorkCount.scala)

  

找遍了网上所有解决方法,无果

最后发现 更换下pom.xml中

<spark.version>2.1.1</spark.version>

改为
<spark.version>2.2.1</spark.version>

即可。


posted @   夕阳如火  阅读(20746)  评论(0编辑  收藏  举报
编辑推荐:
· .NET开发智能桌面机器人:用.NET IoT库编写驱动控制两个屏幕
· 用纯.NET开发并制作一个智能桌面机器人:从.NET IoT入门开始
· 一个超经典 WinForm,WPF 卡死问题的终极反思
· ASP.NET Core - 日志记录系统(二)
· .NET 依赖注入中的 Captive Dependency
阅读排行:
· 几个自学项目的通病,别因为它们浪费了时间!
· 在外漂泊的这几年总结和感悟,展望未来
· 如何在 ASP.NET Core 中实现速率限制?
· Kubernetes 知识梳理及集群搭建
· 一文搞懂SaaS架构建设流程:业务战略设计、架构蓝图设计、领域系统架构设计、架构治理与实施
点击右上角即可分享
微信分享提示