parse_url函数

参数

  • path:路径/login
  • host:www.baidu.com
  • query:username=zs
  • protocol:http协议
package SparkSQL.fun

import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, SparkSession}

object ParseUrlFun {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("func").setMaster("local[*]")
    val session = SparkSession.builder().config(conf).getOrCreate()

    import session.implicits._
    val dataset: Dataset[(Int, String)] = session.createDataset(Array(
      (1, "http://www.baidu.com/login?username=zs"),
      (2, "http://www.baidu.com/index?search=zs")
    ))
    val frame1 = dataset.toDF("id", "url")
    dataset.printSchema()

    frame1.createTempView("temp")
    val frame = session.sql("select * from temp")
    frame.show()

    /*
    QUERY: username=zs
    HOST: www.baidu.com
    PATH: /login
    PROTOCOL: http
     */
    val frame2 = session.sql("select parse_url(url, 'QUERY') from temp")
    frame2.show()

    val frame3 = session.sql("select parse_url(url, 'HOST') from temp")
    frame3.show()

    val frame4 = session.sql("select parse_url(url, 'PATH') from temp")
    frame4.show()

    val frame5 = session.sql("select parse_url(url, 'PROTOCOL') from temp")
    frame5.show()

    session.stop()
  }
}
posted @ 2022-09-05 20:51  jsqup  阅读(84)  评论(0编辑  收藏  举报