《理财市场情绪监测系统》代码实现【1】之行业词库
系统的目标有两个:
一是行业分类;
二是每个行业的情感值计算;
①因为是基金推荐项目,分析了国民经济分类和同花顺,最终采用同花顺的66个行业分类为标准。
②行业分类依据词,词库采用从百度词库、搜狗细胞词库手工分析下载而来,下载而来的是未解析的格式,
我这里以‘深蓝词库转换’这个工具进行转换成对应的66个行业的66个txt,然后再用scala代码解析到mysql中,在MySQL中以words为索引建好表。
建表语句如下:
-- 以名词关联查看结果 SELECT w.`innersessionId`,w.`words`,c.`username`,c.`category` FROM `words2` w JOIN category c ON w.`words`=c.`username` WHERE w.`sensi`='n' -- 创建category 初步结果存储表 CREATE TABLE `category` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `editdate` char(20) NOT NULL DEFAULT "20170622", `number` CHAR(100) NOT NULL DEFAULT "spell", `category` CHAR(100) NOT NULL DEFAULT "综合", `words` CHAR(100) NOT NULL DEFAULT "name", PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8; ALTER TABLE `category` ADD INDEX(`words`); -- 创建category2 带双索引的表作为分类结果存储 CREATE TABLE `category2` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `editdate` char(20) NOT NULL DEFAULT "20170622", `number` CHAR(100) NOT NULL DEFAULT "spell", `category` CHAR(100) NOT NULL DEFAULT "综合", `words` CHAR(100) NOT NULL DEFAULT "name", PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8; ALTER TABLE `category` ADD INDEX(`words`); --- 填充到分类结果表 INSERT INTO category2(editdate,number,category,words) SELECT DISTINCT DATE,number,category,username FROM category; --- 创建源数据存储表 CREATE TABLE title( id INT(12) UNSIGNED NOT NULL AUTO_INCREMENT, bizNo VARCHAR(100), bizType VARCHAR(100), bizTypeEnum VARCHAR(100), channelEnum VARCHAR(100), channelId VARCHAR(100), channelType VARCHAR(100), collectTime VARCHAR(100), hostIp VARCHAR(100), hostName VARCHAR(100), innerSessionId VARCHAR(100), logNo VARCHAR(100), merchantNo VARCHAR(100), modelNo VARCHAR(100), operateType VARCHAR(100), operateTypeEnum VARCHAR(100), PROCESS VARCHAR(100), productNo VARCHAR(100), protocolVersion VARCHAR(100), receiveTime VARCHAR(100), sessionId VARCHAR(100), sourcetitle VARCHAR(100), systemCode VARCHAR(100), taskType VARCHAR(100), TIME VARCHAR(100), title VARCHAR(200), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8; val pp = value.map(p => { val v0 = p.getString(0).toInt val v1 = p.getInt(1).toString val v2 = p.getString(2) val v3 = p.getString(3) (v0,v1,v2,v3) }) --- 创建解析title CREATE TABLE titlesplit(id INT(12) UNSIGNED NOT NULL AUTO_INCREMENT, innserSessionid VARCHAR(50), times VARCHAR(50), channelType VARCHAR(50), sourcetitle VARCHAR(500), title VARCHAR(500), words VARCHAR(500), characters VARCHAR(150), refer VARCHAR(150), role VARCHAR(150), PRIMARY KEY(id) ) DEFAULT CHARSET=utf8; ALTER TABLE `titlesplit` ADD INDEX(`words`) ---情感词-极值表 CREATE table layer(id INT(12) UNSIGNED NOT NULL AUTO_INCREMENT, words VARCHAR(500), value decimal(10,7), PRIMARY KEY(id) ) DEFAULT CHARSET=utf8; ALTER TABLE `layer` ADD INDEX(`words`) --- 查询关键词对应的情感值 select l.id,l.`words`,t.`innserSessionid`,t.`times`,t.`id`,t.`words`,t.`refer`,t.`role`,l.`VALUE` from layer l join titlesplit t on l.`words`=t.`words` --- 创建结果表 CREATE TABLE `connect` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `innserSessionid` VARCHAR(50), `sourcetitle` varCHAR(500), `times` varCHAR(50) , `words` varCHAR(500) , `characters` varchar(150), `refer` varchar(150) , `role` varCHAR(150), `category` CHAR(100), `number` CHAR(100), `VALUE` decimal(10,7), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8; ALTER TABLE `connect` ADD INDEX(`words`); SELECT titlesplit.`innserSessionid`,titlesplit.`sourcetitle`,titlesplit.`times`,titlesplit.`words`,titlesplit.`refer`,titlesplit.`role`,category.`category`,category.`number`,layer.`VALUE` FROM titlesplit LEFT JOIN category ON titlesplit.`words`=category.`words` LEFT JOIN layer ON titlesplit.`words`=layer.`words`; CREATE TABLE `CsenticNetlayer2` ( `id` INT(10) UNSIGNED NOT NULL AUTO_INCREMENT, `words` VARCHAR(250), `attitude` varCHAR(150), `expression` varCHAR(150) , `extreme` varCHAR(500) , `VALUE` decimal(10,7), PRIMARY KEY (`id`) ) DEFAULT CHARSET=utf8; ALTER TABLE `connect` ADD INDEX(`words`); -- 划分主题逻辑 1、极值情感词不能成为主题词 SELECT t.`innserSessionid`,t.`words`,l.`words`,l2.`words`,c.`category`,t.`characters` FROM titlesplit t JOIN category c ON t.`words`=c.`words` LEFT JOIN layer l ON t.`words`=l.`words` LEFT JOIN CsenticNetlayer2 l2 ON t.`words`=l2.`words` WHERE l.`words` IS NULL AND l2.`words` IS NULL AND t.`characters`='n' 2、去除地名 SELECT t.`words`,t.`characters`,t.`refer`,t.`role` FROM `titlesplit` t WHERE t.`characters`='ns' -- 地名 3、词性分类 -- b j n nh ni nz ws -- 25325 387 SELECT DISTINCT t.`sourcetitle`,t.`characters`,t.`words` FROM titlesplit t WHERE t.`characters`='b' OR t.`characters`='j' OR t.`characters`='nh' OR t.`characters`='ni' OR t.`characters`='nz' OR t.`characters`='ws'; -- b j n nh ni nz ws -- 25325 387 SELECT MIN(c.`id`),t.`sourcetitle`,t.`characters`,t.`words`,c.`category` FROM titlesplit t JOIN category c ON t.`words`=c.`words` WHERE t.`characters`='b' OR t.`characters`='j' OR t.`characters`='nh' OR t.`characters`='ni' OR t.`characters`='nz' OR t.`characters`='ws' AND LENGTH(t.`sourcetitle`)>16 GROUP BY t.`sourcetitle`,t.`characters`,t.`words` 4、筛选主语和相关词性 SELECT t.`innserSessionid`,t.`times`,t.`sourcetitle`,t.words,t.`characters`,t.`refer`,t.`role` FROM `titlesplit` t WHERE (t.`characters`='n' OR t.`characters`='ni') AND role='SBV' 5 SELECT t.`innserSessionid`,t.`times`,t.`sourcetitle`,t.words,t.`characters`,t.`refer`,t.`role`,c.`id`,c.`category` FROM `titlesplit` t JOIN category c ON t.`words`=c.`words` WHERE (t.`characters`='n' OR t.`characters`='ni') AND role='SBV'; -- 行业划分 -- select t.`innserSessionid`,t.`id`,c.`category` from -- categorys c join titlesplit t on c.`words`=t.`words` where c.`pos` Like "n%" -- layer词性划分 1613 SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN layer l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t WHERE t.sm IS NOT NULL -- layer词性划分 1820 SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN layer l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t -- `CsenticNetlayer2`词性划分 1170 SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN CsenticNetlayer2 l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t WHERE t.sm IS NOT NULL -- `CsenticNetlayer2`词性划分 1820 SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm FROM `titlesplit` t LEFT JOIN CsenticNetlayer2 l ON t.`words`=l.words GROUP BY t.`innserSessionid`,t.`sourcetitle`)t -- 关联 SELECT t.`innserSessionid`,t.`sourcetitle`,t.sm1,t.sm2 FROM (SELECT t.`innserSessionid`,t.`sourcetitle`,SUM(l.`VALUE`) sm1,SUM(l2.`VALUE`) sm2 FROM `titlesplit` t LEFT JOIN CsenticNetlayer2 l ON t.`words`=l.words LEFT JOIN layer l2 ON t.`words`=l2.`words` GROUP BY t.`innserSessionid`,t.`sourcetitle`)t SELECT DISTINCT t.`innserSessionid`,t.`words`,s.`classification`,s.`value` FROM `titlesplit` t JOIN `sentiment` s ON s.`words`=t.`words`; SELECT DISTINCT t.`innserSessionid` FROM `titlesplit` t; SELECT * FROM `sentiment` s WHERE s.`words`='坦诚'; SELECT * FROM `titlesplit` t WHERE t.`innserSessionid`='6D238824ECD84D1C9BF42B2920E3FE9C'
将66个行业词库导入到MySQL中的scala代码如下
import java.sql.{DriverManager, ResultSet} import org.apache.spark.SparkContext import org.apache.spark.SparkConf object category { val rl= "jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456&useUnicode=true&characterEncoding=utf8&autoReconnect=true&failOverReadOnly=false" classOf[com.mysql.jdbc.Driver] val conn = DriverManager.getConnection(rl) def main(args: Array[String]) { val conf = new SparkConf().setMaster("local").setAppName("test") val sc = new SparkContext(conf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) // val role = "jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456&useUnicode=true&characterEncoding=utf8&autoReconnect=true&failOverReadOnly=false" val log881101 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881101.种植业与林业.txt") val format = new java.text.SimpleDateFormat("yyyyMMdd") val date = format.format(new java.util.Date().getTime()).toString val rowRDD881101= log881101.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val s881101 = rowRDD881101.map(line => (date, "881101", "种植业与林业", line._4)) import sqlContext.implicits._ val s2881101= s881101.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881101= sqlContext.sql("select distinct editdate,number,category,words from category") val p881101= value881101.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881101.foreach(p => { val v0=p._1 val v1=p._2 val v2=p._3 val v3=p._4 insert(v0,v1,v2,v3) }) val log881102 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881102.养殖业.txt") val rowRDDlog881102=log881102.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881102=rowRDDlog881102.map(line=>(date,"881102","养殖业",line._4)) import sqlContext.implicits._ val s2881102= slog881102.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881102= sqlContext.sql("select distinct editdate,number,category,words from category") val p881102= value881102.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881102.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881103 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881103.农产品加工.txt") val rowRDDlog881103=log881103.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881103=rowRDDlog881103.map(line=>(date,"881103","农产品加工",line._4)) import sqlContext.implicits._ val s2881103= slog881103.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881103= sqlContext.sql("select distinct editdate,number,category,words from category") val p881103= value881103.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881103.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881104 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881104.农业服务.txt") val rowRDDlog881104=log881104.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881104=rowRDDlog881104.map(line=>(date,"881104","农业服务",line._4)) import sqlContext.implicits._ val s2881104= slog881104.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881104= sqlContext.sql("select distinct editdate,number,category,words from category") val p881104= value881104.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881104.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881105 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881105.煤炭开采.txt") val rowRDDlog881105=log881105.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881105=rowRDDlog881105.map(line=>(date,"881105","煤炭开采",line._4)) import sqlContext.implicits._ val s2881105= slog881105.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881105= sqlContext.sql("select distinct editdate,number,category,words from category") val p881105= value881105.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881105.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881106 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881106.石油矿业开采.txt") val rowRDDlog881106=log881106.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881106=rowRDDlog881106.map(line=>(date,"881106","石油矿业开采",line._4)) import sqlContext.implicits._ val s2881106= slog881106.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881106= sqlContext.sql("select distinct editdate,number,category,words from category") val p881106= value881106.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881106.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881107 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881107.采掘服务.txt") val rowRDDlog881107=log881107.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881107=rowRDDlog881107.map(line=>(date,"881107","采掘服务",line._4)) import sqlContext.implicits._ val s2881107= slog881107.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881107= sqlContext.sql("select distinct editdate,number,category,words from category") val p881107= value881107.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881107.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881108 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881108.基础化学.txt") val rowRDDlog881108=log881108.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881108=rowRDDlog881108.map(line=>(date,"881108","基础化学",line._4)) import sqlContext.implicits._ val s2881108= slog881108.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881108= sqlContext.sql("select distinct editdate,number,category,words from category") val p881108= value881108.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881108.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881109 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881109.化学制品.txt") val rowRDDlog881109=log881109.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881109=rowRDDlog881109.map(line=>(date,"881109","化学制品",line._4)) import sqlContext.implicits._ val s2881109= slog881109.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881109= sqlContext.sql("select distinct editdate,number,category,words from category") val p881109= value881109.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881109.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881110 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881110.化工合成材料.txt") val rowRDDlog881110=log881110.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881110=rowRDDlog881110.map(line=>(date,"881110","化工合成材料",line._4)) import sqlContext.implicits._ val s2881110= slog881110.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881110= sqlContext.sql("select distinct editdate,number,category,words from category") val p881110= value881110.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881110.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881111 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881111.化工新材料.txt") val rowRDDlog881111=log881111.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881111=rowRDDlog881111.map(line=>(date,"881111","化工新材料",line._4)) import sqlContext.implicits._ val s2881111= slog881111.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881111= sqlContext.sql("select distinct editdate,number,category,words from category") val p881111= value881111.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881111.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881112 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881112.钢铁.txt") val rowRDDlog881112=log881112.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881112=rowRDDlog881112.map(line=>(date,"881112","钢铁",line._4)) import sqlContext.implicits._ val s2881112= slog881112.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881112= sqlContext.sql("select distinct editdate,number,category,words from category") val p881112= value881112.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881112.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881113 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881113.有色冶炼加工.txt") val rowRDDlog881113=log881113.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881113=rowRDDlog881113.map(line=>(date,"881113","有色冶炼加工",line._4)) import sqlContext.implicits._ val s2881113= slog881113.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881113= sqlContext.sql("select distinct editdate,number,category,words from category") val p881113= value881113.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881113.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881114 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881114.新材料.txt") val rowRDDlog881114=log881114.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881114=rowRDDlog881114.map(line=>(date,"881114","新材料",line._4)) import sqlContext.implicits._ val s2881114= slog881114.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881114= sqlContext.sql("select distinct editdate,number,category,words from category") val p881114= value881114.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881114.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881115 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881115.建筑材料.txt") val rowRDDlog881115=log881115.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881115=rowRDDlog881115.map(line=>(date,"881115","建筑材料",line._4)) import sqlContext.implicits._ val s2881115= slog881115.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881115= sqlContext.sql("select distinct editdate,number,category,words from category") val p881115= value881115.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881115.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881116 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881116.建筑装饰.txt") val rowRDDlog881116=log881116.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881116=rowRDDlog881116.map(line=>(date,"881116","建筑材料",line._4)) import sqlContext.implicits._ val s2881116= slog881116.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881116= sqlContext.sql("select distinct editdate,number,category,words from category") val p881116= value881116.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881116.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881117 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881117.通用设备.txt") val rowRDDlog881117=log881117.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881117=rowRDDlog881117.map(line=>(date,"881117","通用设备",line._4)) import sqlContext.implicits._ val s2881117= slog881117.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881117= sqlContext.sql("select distinct editdate,number,category,words from category") val p881117= value881117.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881117.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881118 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881118.专用设备.txt") val rowRDDlog881118=log881118.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881118=rowRDDlog881118.map(line=>(date,"881118","专用设备",line._4)) import sqlContext.implicits._ val s2881118= slog881118.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881118= sqlContext.sql("select distinct editdate,number,category,words from category") val p881118= value881118.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881118.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881119 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881119.仪器仪表.txt") val rowRDDlog881119=log881119.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881119=rowRDDlog881119.map(line=>(date,"881119","仪器仪表",line._4)) import sqlContext.implicits._ val s2881119= slog881119.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881119= sqlContext.sql("select distinct editdate,number,category,words from category") val p881119= value881119.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881119.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881120 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881120.电气设备.txt") val rowRDDlog881120=log881120.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881120=rowRDDlog881120.map(line=>(date,"881120","电气设备",line._4)) import sqlContext.implicits._ val s2881120= slog881120.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881120= sqlContext.sql("select distinct editdate,number,category,words from category") val p881120= value881120.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881120.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881121 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881121.半导体及元件.txt") val rowRDDlog881121=log881121.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881121=rowRDDlog881121.map(line=>(date,"881121","半导体及元件",line._4)) import sqlContext.implicits._ val s2881121= slog881121.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881121= sqlContext.sql("select distinct editdate,number,category,words from category") val p881121= value881121.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881121.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881122 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881122.光学光电子.txt") val rowRDDlog881122=log881122.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881122=rowRDDlog881122.map(line=>(date,"881122","光学光电子",line._4)) import sqlContext.implicits._ val s2881122= slog881122.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881122= sqlContext.sql("select distinct editdate,number,category,words from category") val p881122= value881122.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881122.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881123 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881123.其他电子.txt") val rowRDDlog881123=log881123.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881123=rowRDDlog881123.map(line=>(date,"881123","其他电子",line._4)) import sqlContext.implicits._ val s2881123= slog881123.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881123= sqlContext.sql("select distinct editdate,number,category,words from category") val p881123= value881123.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881123.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881124 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881124.电子制造.txt") val rowRDDlog881124=log881124.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881124=rowRDDlog881124.map(line=>(date,"881124","电子制造",line._4)) import sqlContext.implicits._ val s2881124= slog881124.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881124= sqlContext.sql("select distinct editdate,number,category,words from category") val p881124= value881124.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881124.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881125 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881125.汽车整车.txt") val rowRDDlog881125=log881125.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881125=rowRDDlog881125.map(line=>(date,"881125","汽车整车",line._4)) import sqlContext.implicits._ val s2881125= slog881125.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881125= sqlContext.sql("select distinct editdate,number,category,words from category") val p881125= value881125.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881125.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881126 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881126.汽车零部件.txt") val rowRDDlog881126=log881126.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881126=rowRDDlog881126.map(line=>(date,"881126","汽车零部件",line._4)) import sqlContext.implicits._ val s2881126= slog881126.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881126= sqlContext.sql("select distinct editdate,number,category,words from category") val p881126= value881126.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881126.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881127 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881127.非汽车交运.txt") val rowRDDlog881127=log881127.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881127=rowRDDlog881127.map(line=>(date,"881127","非汽车交运",line._4)) import sqlContext.implicits._ val s2881127= slog881127.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881127= sqlContext.sql("select distinct editdate,number,category,words from category") val p881127= value881127.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881127.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881128 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881128.交运设备服务.txt") val rowRDDlog881128=log881128.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881128=rowRDDlog881128.map(line=>(date,"881128","交运设备服务",line._4)) import sqlContext.implicits._ val s2881128= slog881128.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881128= sqlContext.sql("select distinct editdate,number,category,words from category") val p881128= value881128.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881128.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881129 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881129.通信设备.txt") val rowRDDlog881129=log881129.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881129=rowRDDlog881129.map(line=>(date,"881129","通信设备",line._4)) import sqlContext.implicits._ val s2881129= slog881129.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881129= sqlContext.sql("select distinct editdate,number,category,words from category") val p881129= value881129.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881129.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881130 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881130.计算机设备.txt") val rowRDDlog881130=log881130.map(line=>(line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").head.trim(),line.split(" ").last.trim)) val slog881130=rowRDDlog881130.map(line=>(date,"881130","计算机设备",line._4)) import sqlContext.implicits._ val s2881130= slog881130.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881130= sqlContext.sql("select distinct editdate,number,category,words from category") val p881130= value881130.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0,v1,v2,v3) }) p881130.foreach(p => { val v0=p._1 val v1=p._2.toString val v2=p._3.toString val v3=p._4.toString insert(v0,v1,v2,v3) }) val log881131 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881131.白色家电.txt") val rowRDDlog881131 = log881131.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881131 = rowRDDlog881131.map(line => (date, "881131", "白色家电", line._4)) import sqlContext.implicits._ val s2881131 = slog881131.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881131 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881131 = value881131.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881131.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881132 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881132.视听器材.txt") val rowRDDlog881132 = log881132.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881132 = rowRDDlog881132.map(line => (date, "881132", "视听器材", line._4)) import sqlContext.implicits._ val s2881132 = slog881132.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881132 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881132 = value881132.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881132.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881133 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881133.饮料制造.txt") val rowRDDlog881133 = log881133.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881133 = rowRDDlog881133.map(line => (date, "881133", "饮料制造", line._4)) import sqlContext.implicits._ val s2881133 = slog881133.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881133 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881133 = value881133.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881133.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881134 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881134.食品加工制造.txt") val rowRDDlog881134 = log881134.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881134 = rowRDDlog881134.map(line => (date, "881134", "食品加工制造", line._4)) import sqlContext.implicits._ val s2881134 = slog881134.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881134 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881134 = value881134.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881134.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881135 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881135.纺织制造.txt") val rowRDDlog881135 = log881135.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881135 = rowRDDlog881135.map(line => (date, "881135", "纺织制造", line._4)) import sqlContext.implicits._ val s2881135 = slog881135.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881135 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881135 = value881135.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881135.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881136 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881136.服装家纺.txt") val rowRDDlog881136 = log881136.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881136 = rowRDDlog881136.map(line => (date, "881136", "服装家纺", line._4)) import sqlContext.implicits._ val s2881136 = slog881136.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881136 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881136 = value881136.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881136.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881137 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881137.造纸.txt") val rowRDDlog881137 = log881137.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881137 = rowRDDlog881137.map(line => (date, "881137", "造纸", line._4)) import sqlContext.implicits._ val s2881137 = slog881137.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881137 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881137 = value881137.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881137.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881138 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881138.包装印刷.txt") val rowRDDlog881138 = log881138.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881138 = rowRDDlog881138.map(line => (date, "881138", "包装印刷", line._4)) import sqlContext.implicits._ val s2881138 = slog881138.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881138 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881138 = value881138.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881138.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881139 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881139.家用轻工.txt") val rowRDDlog881139 = log881139.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881139 = rowRDDlog881139.map(line => (date, "881139", "家用轻工", line._4)) import sqlContext.implicits._ val s2881139 = slog881139.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881139 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881139 = value881139.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881139.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881140 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881140.化学制药.txt") val rowRDDlog881140 = log881140.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881140 = rowRDDlog881140.map(line => (date, "881140", "化学制药", line._4)) import sqlContext.implicits._ val s2881140 = slog881140.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881140 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881140 = value881140.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881140.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881141 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881141.中药.txt") val rowRDDlog881141 = log881141.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881141 = rowRDDlog881141.map(line => (date, "881141", "中药", line._4)) import sqlContext.implicits._ val s2881141 = slog881141.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881141 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881141 = value881141.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881141.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881142 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881142.生物制品.txt") val rowRDDlog881142 = log881142.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881142 = rowRDDlog881142.map(line => (date, "881142", "生物制品", line._4)) import sqlContext.implicits._ val s2881142 = slog881142.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881142 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881142 = value881142.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881142.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881143 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881143.医药商业.txt") val rowRDDlog881143 = log881143.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881143 = rowRDDlog881143.map(line => (date, "881143", "医药商业", line._4)) import sqlContext.implicits._ val s2881143 = slog881143.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881143 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881143 = value881143.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881143.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881144 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881144.医疗器械服务.txt") val rowRDDlog881144 = log881144.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881144 = rowRDDlog881144.map(line => (date, "881144", "医疗器械服务", line._4)) import sqlContext.implicits._ val s2881144 = slog881144.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881144 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881144 = value881144.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881144.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881145 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881145.电力.txt") val rowRDDlog881145 = log881145.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881145 = rowRDDlog881145.map(line => (date, "881145", "电力", line._4)) import sqlContext.implicits._ val s2881145 = slog881145.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881145 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881145 = value881145.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881145.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881146 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881146.燃气水务.txt") val rowRDDlog881146 = log881146.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881146 = rowRDDlog881146.map(line => (date, "881146", "燃气水务", line._4)) import sqlContext.implicits._ val s2881146 = slog881146.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881146 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881146 = value881146.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881146.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881147 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881147.环保工程.txt") val rowRDDlog881147 = log881147.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881147 = rowRDDlog881147.map(line => (date, "881147", "环保工程", line._4)) import sqlContext.implicits._ val s2881147 = slog881147.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881147 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881147 = value881147.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881147.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881148 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881148.港口航运.txt") val rowRDDlog881148 = log881148.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881148 = rowRDDlog881148.map(line => (date, "881148", "港口航运", line._4)) import sqlContext.implicits._ val s2881148 = slog881148.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881148 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881148 = value881148.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881148.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881149 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881149.公路铁路运输.txt") val rowRDDlog881149 = log881149.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881149 = rowRDDlog881149.map(line => (date, "881149", "公路铁路运输", line._4)) import sqlContext.implicits._ val s2881149 = slog881149.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881149 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881149 = value881149.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881149.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881150 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881150.公交.txt") val rowRDDlog881150 = log881150.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881150 = rowRDDlog881150.map(line => (date, "881150", "公交", line._4)) import sqlContext.implicits._ val s2881150 = slog881150.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881150 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881150 = value881150.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881150.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881151 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881151.机场航运.txt") val rowRDDlog881151 = log881151.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881151 = rowRDDlog881151.map(line => (date, "881151", "机场航运", line._4)) import sqlContext.implicits._ val s2881151 = slog881151.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881151 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881151 = value881151.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881151.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881152 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881152.物流.txt") val rowRDDlog881152 = log881152.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881152 = rowRDDlog881152.map(line => (date, "881152", "物流", line._4)) import sqlContext.implicits._ val s2881152 = slog881152.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881152 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881152 = value881152.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881152.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881153 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881153.房地产开发.txt") val rowRDDlog881153 = log881153.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881153 = rowRDDlog881153.map(line => (date, "881153", "房地产开发", line._4)) import sqlContext.implicits._ val s2881153 = slog881153.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881153 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881153 = value881153.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881153.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881154 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881154.园区开发.txt") val rowRDDlog881154 = log881154.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881154 = rowRDDlog881154.map(line => (date, "881154", "园区开发", line._4)) import sqlContext.implicits._ val s2881154 = slog881154.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881154 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881154 = value881154.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881154.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881155 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881155.银行.txt") val rowRDDlog881155 = log881155.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881155 = rowRDDlog881155.map(line => (date, "881155", "银行", line._4)) import sqlContext.implicits._ val s2881155 = slog881155.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881155 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881155 = value881155.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881155.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881156 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881156.保险及其他.txt") val rowRDDlog881156 = log881156.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881156 = rowRDDlog881156.map(line => (date, "881156", "保险及其他", line._4)) import sqlContext.implicits._ val s2881156 = slog881156.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881156 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881156 = value881156.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881156.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881157 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881157.证券.txt") val rowRDDlog881157 = log881157.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881157 = rowRDDlog881157.map(line => (date, "881157", "证券", line._4)) import sqlContext.implicits._ val s2881157 = slog881157.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881157 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881157 = value881157.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881157.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881158 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881158.零售.txt") val rowRDDlog881158 = log881158.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881158 = rowRDDlog881158.map(line => (date, "881158", "零售", line._4)) import sqlContext.implicits._ val s2881158 = slog881158.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881158 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881158 = value881158.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881158.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881159 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881159.贸易.txt") val rowRDDlog881159 = log881159.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881159 = rowRDDlog881159.map(line => (date, "881159", "贸易", line._4)) val s2881159 = slog881159.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881159 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881159 = value881159.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881159.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881160 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881160.景点及旅游.txt") val rowRDDlog881160 = log881160.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881160 = rowRDDlog881160.map(line => (date, "881160", "景点及旅游", line._4)) import sqlContext.implicits._ val s2881160 = slog881160.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881160 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881160 = value881160.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881160.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881161 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881161.酒店及餐饮.txt") val rowRDDlog881161 = log881161.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881161 = rowRDDlog881161.map(line => (date, "881161", "酒店及餐饮", line._4)) import sqlContext.implicits._ val s2881161 = slog881161.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881161 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881161 = value881161.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881161.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881162 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881162.通信服务.txt") val rowRDDlog881162 = log881162.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881162 = rowRDDlog881162.map(line => (date, "881162", "通信服务", line._4)) import sqlContext.implicits._ val s2881162 = slog881162.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881162 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881162 = value881162.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881162.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881163 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881163.计算机应用.txt") val rowRDDlog881163 = log881163.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881163 = rowRDDlog881163.map(line => (date, "881163", "计算机应用", line._4)) import sqlContext.implicits._ val s2881163 = slog881163.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881163 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881163 = value881163.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881163.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881164 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881164.传媒.txt") val rowRDDlog881164 = log881164.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881164 = rowRDDlog881164.map(line => (date, "881164", "传媒", line._4)) import sqlContext.implicits._ val s2881164 = slog881164.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881164 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881164 = value881164.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881164.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881165 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881165.综合.txt") val rowRDDlog881165 = log881165.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881165 = rowRDDlog881165.map(line => (date, "881165", "综合", line._4)) import sqlContext.implicits._ val s2881165 = slog881165.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881165 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881165 = value881165.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881165.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) val log881166 = sc.textFile("hdfs://192.168.0.211:9000/user/hadoop/emotion/881166.国防军工.txt") val rowRDDlog881166 = log881166.map(line => (line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").head.trim(), line.split(" ").last.trim)) val slog881166 = rowRDDlog881166.map(line => (date, "881166", "国防军工", line._4)) import sqlContext.implicits._ val s2881166 = slog881166.toDF("editdate", "number", "category", "words").registerTempTable("category") val value881166 = sqlContext.sql("select distinct editdate,number,category,words from category") val p881166 = value881166.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p881166.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert(v0, v1, v2, v3) }) // 解析结果去重后插入category2 val job = sqlContext.jdbc("jdbc:mysql://192.168.0.37:3306/emotional?user=root&password=123456", "category") val jo = job.toDF().registerTempTable("job") val value = sqlContext.sql("select distinct editdate,number,category,words from job") val p = value.map(p => { val v0 = p.getString(0) val v1 = p.getString(1).toString val v2 = p.getString(2).toString val v3 = p.getString(3).toString (v0, v1, v2, v3) }) p.foreach(p => { val v0 = p._1 val v1 = p._2.toString val v2 = p._3.toString val v3 = p._4.toString insert2(v0, v1, v2, v3) }) conn.close() } def insert(value0: String, value1: String, value2: String, value3: String): Unit = { //val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE) // CREATE TABLE words2(innersessionId VARCHAR(100),words VARCHAR(100), VARCHAR(100),posit VARCHAR(100),va VARCHAR(100)) try { val prep = conn.prepareStatement("INSERT INTO category(editdate,number,category,words) VALUES (?,?,?,?) ") prep.setString(1, value0) prep.setString(2, value1) prep.setString(3, value2) prep.setString(4, value3) prep.executeUpdate } catch { case e: Exception => e.printStackTrace } } def insert2(value0: String, value1: String, value2: String, value3: String): Unit = { //val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE) // CREATE TABLE words2(innersessionId VARCHAR(100),words VARCHAR(100), VARCHAR(100),posit VARCHAR(100),va VARCHAR(100)) try { val prep = conn.prepareStatement("INSERT INTO category2(editdate,number,category,words) VALUES (?,?,?,?) ") prep.setString(1, value0) prep.setString(2, value1) prep.setString(3, value2) prep.setString(4, value3) prep.executeUpdate } catch { case e: Exception => e.printStackTrace } } }