parse_url函数

参数

  • path:路径/login
  • host:www.baidu.com
  • query:username=zs
  • protocol:http协议
package SparkSQL.fun
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, SparkSession}
object ParseUrlFun {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("func").setMaster("local[*]")
val session = SparkSession.builder().config(conf).getOrCreate()
import session.implicits._
val dataset: Dataset[(Int, String)] = session.createDataset(Array(
(1, "http://www.baidu.com/login?username=zs"),
(2, "http://www.baidu.com/index?search=zs")
))
val frame1 = dataset.toDF("id", "url")
dataset.printSchema()
frame1.createTempView("temp")
val frame = session.sql("select * from temp")
frame.show()
/*
QUERY: username=zs
HOST: www.baidu.com
PATH: /login
PROTOCOL: http
*/
val frame2 = session.sql("select parse_url(url, 'QUERY') from temp")
frame2.show()
val frame3 = session.sql("select parse_url(url, 'HOST') from temp")
frame3.show()
val frame4 = session.sql("select parse_url(url, 'PATH') from temp")
frame4.show()
val frame5 = session.sql("select parse_url(url, 'PROTOCOL') from temp")
frame5.show()
session.stop()
}
}
posted @   jsqup  阅读(95)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
点击右上角即可分享
微信分享提示