scala文件和正则表达式

1.读取行

object Test extends App {
  val source = scala.io.Source.fromFile("E:\\sparkhive\\src\\test\\scala\\test.txt", "UTF-8")
  val lineIterator = source.getLines()
  for (i <- lineIterator) {
        println(i)
  }

  //转为字符串
  val line: String = source.mkString
  println(line)
  source.close()
}

 

2.读取字符

 

object Test extends App {
  val source = scala.io.Source.fromFile("E:\\sparkhive\\src\\test\\scala\\test.txt", "UTF-8")
  //source 扩展自Iterator[char]
  for (i <- source) {
    println(i + "--")
  }
}

 

3.读取词法单元和数字

 

object Test extends App {
  val source = scala.io.Source.fromFile("E:\\sparkhive\\src\\test\\scala\\test.txt", "UTF-8")
  val tokens = source.mkString.split("\\S+") //空格分隔的词法单元

  val numbers = for (w <- tokens) yield w.toDouble  //字符串转为数字
  val numbers2 = tokens.map(_.toDouble)
}

 

4.从URL或其他源读取

 

object Test extends App {
  val source1 = scala.io.Source.fromURL("http://horstamnn.com", "UTF-8")
  val source2 = scala.io.Source.fromString("hello world!!")
  val source3 = scala.io.Source.stdin //标准输入读取

}

 

5.读取二进制文件

 

import java.io.{File, FileInputStream}

object Test extends App {
  val filename = "test.txt"
  val file = new File(filename)
  val in = new FileInputStream(file)
  val bytes = new Array[Byte](file.length.toInt)
  in.read(bytes) //将文件读取成字节数组
  for (i<-bytes){
    println(i)
  }

}

 

6.写入文本文件

 

import java.io.{File, FileInputStream, PrintWriter}

object Test extends App {
  val out = new PrintWriter("result.txt")
  for (i <- 1 to 10) {
    out.println(i)
  }
  out.close()
}

 

7.正则表达式

 

import scala.util.matching.Regex

object Test extends App {

  val str = "ds1dsf2fa3"
  val pattern: Regex = "[0-9]+".r //Regex对象
  //正则表达式中含有反斜杠 或者引号 使用原始字符串语法 即 """....."""
  val pattern2 =
  """\s+[0-9]+\s+""".r
  //findAllIn 返回匹配项迭代器
  for (i <- pattern.findAllIn(str)) {
    print(i + "--")
  }
  //将迭代器转成数组
  val matches = pattern.findAllIn(str).toArray
  println(matches.mkString)

  //找到首个匹配项 findFirstIn
  println(pattern.findFirstIn(str)) //Some(1)
  //字符串开始部分 findPrefixOf
  println(pattern.findPrefixOf(str)) //None
  //替换
  pattern.replaceAllIn(str,"adsf")
  pattern.replaceFirstIn(str,"adsf")

}

 

8.正则表达式组

 

object Test extends App {

 val numitemPattern = "([0-9]+) ([a-z]+)".r

  val numitemPattern(num,item) = "99 abcd"

  for (numitemPattern(num,item) <-numitemPattern.findAllIn("99 abcd")){
    println(num)
    println(item)
  }
}

 

posted @ 2019-05-08 15:48  问题不大1  阅读(216)  评论(0编辑  收藏  举报