[root@hadoop1 conf]# cd /home/tuzq/software/hive/apache-hive-1.2.1-bin
[root@hadoop1 conf]# cp hive-site.xml $SPARK_HOME/conf
[root@hadoop1 spark-1.6.2-bin-hadoop2.6]# cd $HADOOP_HOME
[root@hadoop1 hadoop]# cp core-site.xml $SPARK_HOME/conf
[root@hadoop1 hadoop]# cp hdfs-site.xml $SPARK_HOME/conf
[root@hadoop1 conf]# scp -r * root@hadoop2:$PWD
[root@hadoop1 conf]# scp -r * root@hadoop3:$PWD
[root@hadoop1 conf]# scp -r * root@hadoop4:$PWD
[root@hadoop1 conf]# scp -r * root@hadoop5:$PWD
- 1
- 1
bin/spark-shell --master spark://hadoop1:7077,hadoop2:7077 --executor-memory 1g --total-executor-cores 2 --driver-class-path /home/tuzq/software/spark-1.6.2-bin-hadoop2.6/lib/mysql-connector-java-5.1.38.jar
- 1
- 1
hive> create table person(id bigint,name string,age int) row format delimited fields terminated by " " ;
Time taken: 2.152 seconds
hive> show tables;
Time taken: 0.269 seconds, Fetched: 3 row(s)
[root@hadoop3 ~]# hdfs dfs -cat /person.txt
1 zhangsan 19
2 lisi 20
3 wangwu 28
4 zhaoliu 26
5 tianqi 24
6 chengnong 55
7 zhouxingchi 58
8 mayun 50
9 yangliying 30
10 lilianjie 51
11 zhanghuimei 35
12 lian 53
13 zhangyimou 54
[root@hadoop3 ~]# hdfs dfs -cat hdfs://mycluster/person.txt
1 zhangsan 19
2 lisi 20
3 wangwu 28
4 zhaoliu 26
5 tianqi 24
6 chengnong 55
7 zhouxingchi 58
8 mayun 50
9 yangliying 30
10 lilianjie 51
11 zhanghuimei 35
12 lian 53
13 zhangyimou 54
hive> load data inpath '/person.txt' into table person;
Loading data to table default.person
Table default.person stats: [numFiles=1, totalSize=193]
Time taken: 1.634 seconds
hive> select * from person;
1 zhangsan 19
2 lisi 20
3 wangwu 28
4 zhaoliu 26
5 tianqi 24
6 chengnong 55
7 zhouxingchi 58
8 mayun 50
9 yangliying 30
10 lilianjie 51
11 zhanghuimei 35
12 lian 53
13 zhangyimou 54
Time taken: 0.164 seconds, Fetched: 13 row(s)
如果是spark-2.1.1-bin-hadoop2.7,它没有sqlContext,所以要先执行:val sqlContext = new org.apache.spark.sql.SQLContext(sc)
如果是spark-1.6.2-bin-hadoop2.6,不用执行:val sqlContext = new org.apache.spark.sql.SQLContext(sc)
scala> sqlContext.sql("select * from person limit 2")
| id| name|age|
| 1|zhangsan| 19|
| 2| lisi| 20|
或使用org.apache.spark.sql.hive.HiveContext (同样是在spark-sql这个shell命令下)
scala> import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.HiveContext
scala> val hiveContext = new HiveContext(sc)
Wed Jul 12 12:43:36 CST 2017 WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.
Wed Jul 12 12:43:36 CST 2017 WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.
hiveContext: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@6d9a46d7
scala> hiveContext.sql("select * from person")
res2: org.apache.spark.sql.DataFrame = [id: bigint, name: string, age: int]
scala> hiveContext.sql("select * from person").show
| id| name|age|
| 1| zhangsan| 19|
| 2| lisi| 20|
| 3| wangwu| 28|
| 4| zhaoliu| 26|
| 5| tianqi| 24|
| 6| chengnong| 55|
| 7|zhouxingchi| 58|
| 8| mayun| 50|
| 9| yangliying| 30|
| 10| lilianjie| 51|
| 11|zhanghuimei| 35|
| 12| lian| 53|
| 13| zhangyimou| 54|
bin/spark-sql \
–master spark://hadoop1:7077,hadoop2:7077 \
–executor-memory 1g \
–total-executor-cores 2 \
–driver-class-path /home/tuzq/software/spark-1.6.2-bin-hadoop2.6/lib/mysql-connector-Java-5.1.38.jar
bin/spark-shell --master spark://hadoop1:7077,hadoop2:7077 --executor-memory 1g --total-executor-cores 2 --driver-class-path /home/tuzq/software/spark-1.6.2-bin-hadoop2.6/lib/mysql-connector-java-5.1.38.jar
scala> sqlContext.sql("select * from person limit 2")
res0: org.apache.spark.sql.DataFrame = [id: bigint, name: string, age: int]
scala> sqlContext.sql("select * from person limit 2").show
| id| name|age|
| 1|zhangsan| 19|
| 2| lisi| 20|
scala> import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.HiveContext
scala> val hiveContext = new HiveContext(sc)
scala> hiveContext.sql("select * from person")
res2: org.apache.spark.sql.DataFrame = [id: bigint, name: string, age: int]
scala> hiveContext.sql("select * from person").show
| id| name|age|
| 1| zhangsan| 19|
| 2| lisi| 20|
| 3| wangwu| 28|
| 4| zhaoliu| 26|
| 5| tianqi| 24|
| 6| chengnong| 55|
| 7|zhouxingchi| 58|
| 8| mayun| 50|
| 9| yangliying| 30|
| 10| lilianjie| 51|
| 11|zhanghuimei| 35|
| 12| lian| 53|
| 13| zhangyimou| 54|
