spark geoip

import java.io.File
import scala.io.Source
import com.sanoma.cda.geoip.MaxMindIpGeo
import com.sanoma.cda.geo.Point
import java.io.PrintWriter

val geoIp = MaxMindIpGeo("/data/elas-input/GeoIP2-City.mmdb", 1000,synchronized = true)

def iter_dir(srcDir:String,dstDir:String): Unit ={
  val files = (new File(srcDir)).listFiles().filter(_.isFile)
  for( item <- files){
    println(item.getName)
    val dstname = item.getName
    val out = new PrintWriter(s"""${dstDir}/${dstname}""")

    for(line <- Source.fromFile(item).getLines()){
      val it = line.split("\t")
      val geo = geoIp.getLocation(it(0))
      if(geo.isEmpty){
        out.printf("%s,%s,%s,%s\n",it(0),it(1),it(2),it(3),it(4),"")
      }
      else{
        val geoGet = geo.get
        val countryCode = geoGet.countryCode.getOrElse("")
        val countryName = geoGet.countryName.getOrElse("")
        val region = geoGet.region.getOrElse("")
        val city = geoGet.city.getOrElse("")
        val geoPoint = geoGet.geoPoint
        val latitude = if(geoPoint.isEmpty) "" else geoPoint.get.latitude.toString
        val longitude = if(geoPoint.isEmpty) "" else geoPoint.get.longitude.toString
        val postalCode = geoGet.postalCode.getOrElse("")
        val continent = geoGet.continent.getOrElse("")
        out.printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",it(0),it(1),it(2),it(3),it(4),countryCode,countryName,region,city,latitude,longitude,postalCode,continent,it(5))
      }
    }
    out.close()
  }
}
iter_dir("/data/elas-input/uniqServiceDir","/data/elas-input/tsoutput")

val str2 = "North Amercia"
val index = str.indexOf(str2)
val index2 = str.length + index + 1
val content = str.substring(index2)
posted @ 2016-09-26 14:56  蚂蚁都督  阅读(407)  评论(0编辑  收藏  举报