测验-大数据样卷
package web; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WebSubmitter { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf=new Configuration(); FileSystem fs=FileSystem.get(conf); if(fs.exists(new Path(args[1]))) { fs.delete(new Path(args[1]), true); } Job job=Job.getInstance(); job.setJarByClass(WebSubmitter.class); job.setMapperClass(WebMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(WebReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }
package web; import java.io.IOException; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WebReducer extends Reducer< Text, Text, Text, NullWritable>{ @Override protected void reduce(Text k, Iterable<Text> v, Reducer<Text, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { context.write(k,NullWritable.get()); } }
package web; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WebMapper extends Mapper<LongWritable, Text, Text, NullWritable>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException { String[] line=value.toString().split(","); String phone=line[0]; String sex=line[1]; String wang=line[4]; String time=line[line.length-2]; String age=line[line.length-1]; String we=phone+"\t"+sex+"\t"+wang+"\t"+time+"\t"+age; context.write(new Text(we),NullWritable.get()); } }
customer.txt
13726230503,0,00-FD-07-A4-72-B8:CMCC,192.168.33.3,http://www.imooc.cn/stu,2017-08-04 15:30:20,20 13826544101,1,5C-0E-8B-C7-F1-E0:CMCC,192.168.33.3,http://www.edu360.cn/teach,2017-08-04 15:35:20,19 13926435656,1,20-10-7A-28-CC-0A:CMCC,192.168.33.4,http://www.csdn.cn/stu,2017-08-04 15:30:20,28 13926251106,0,5C-0E-8B-8B-B1-50:CMCC,192.168.33.4,http://www.edu360.cn/job,2017-08-04 16:30:20,34 18211575961,1,94-71-AC-CD-E6-18:CMCC-EASY,192.168.33.5,http://www.imooc.cn/job,2017-08-04 15:40:20,12 13560439658,1,C4-17-FE-BA-DE-D9:CMCC,192.168.33.3,http://www.edu360.cn/stu,2017-08-05 15:30:20,34 15920133257,0,5C-0E-8B-C7-BA-20:CMCC,192.168.44.3,http://www.edu360.cn/teach,2017-08-05 15:35:20,26 13719199419,1,68-A1-B7-03-07-B1:CMCC-EASY,192.168.33.44,http://www.edu360.cn/stu,2017-08-05 15:30:20,40 15013685858,1,5C-0E-8B-C7-F7-90:CMCC,192.168.33.46,http://www.edu360.cn/job,2017-08-05 16:30:20,45 15989002119,0,E8-99-C4-4E-93-E0:CMCC-EASY,192.168.33.55,http://www.edu360.cn/job,2017-08-05 15:40:20,56 13560439658,0,C4-17-FE-BA-DE-D9:CMCC,192.168.133.3,http://www.csdn.cn/register,2017-08-06 15:30:20,27 13480253104,0,5C-0E-8B-C7-FC-80:CMCC-EASY,192.168.111.3,http://www.edu360.cn/register,2017-08-06 15:35:20,19 13602846565,1,5C-0E-8B-8B-B6-00:CMCC,192.168.34.44,http://www.imooc.cn/pay,2017-08-06 15:30:20,21 13922314466,0,00-FD-07-A2-EC-BA:CMCC,192.168.33.46,http://www.imooc0.cn/excersize,2017-08-06 16:30:20,28 13502468823,1,5C-0A-5B-6A-0B-D4:CMCC-EASY,192.168.33.55,http://www.csdn.cn/job,2017-08-06 15:40:20,35 13726230503,1,84-25-DB-4F-10-1A:CMCC-EASY,192.168.33.46,http://www.edu360.cn/excersize,2017-08-06 16:30:20,38 13925057413,0,00-1F-64-E1-E6-9A:CMCC,192.168.33.25,http://www.edu360.cn/job,2017-08-06 15:40:20,24 13760778710,0,00-FD-07-A4-7B-08:CMCC,192.168.33.36,http://www.csdn.cn/excersize,2017-08-06 16:30:20,42 13726230503,0,C4-17-FE-BA-DE-D9:CMCC,192.168.33.55,http://www.imooc.cn/job,2017-08-06 15:40:20,61
2.虚拟机运行
- hadoop jar /opt/web.jar web.WebSubmitter /text/customer.txt /jie
3.hive创库创表
-create database ren;
-use ren;
-create table cum_backup(phone bigint,sex string,wang string,time string,age int)row format delimited fields terminated by '\t';
4.加载数据
-load data inpath '/jie/part-r-00000'into table cum_backup;
5.mysql创库创表
-create database ren;
-use ren;
-create table cum_backup(phone bigint,sex varchar(47),wang varchar(47),time varchar(47),age int);
6.
(1) 使用Hive SQL实现查询年龄在20~40岁之间的用户信息(我改了一下名字,表名按代码上的来)
select * from cum_backup where age>=20 and age<=40;
(2) 使用Hive SQL实现根据性别统计“男”,”女”人数
select count(*),sex from cum_backup group by sex;
(3) 使用Hive SQL实现查询2017-08-04后访问http://www.edu360.cn/stu 的女性用户
select * from cum_backup where time>'2017-08-04' and wang ='http://www.edu360.cn/stu' and sex='1';
7.sqoop把hive里的数据导出到mysql里面
sqoop export --connect jdbc:mysql://master:3306/ren --table cum_backup --username root --password 123456 --export-dir '/user/hive/warehouse/ren.db/cum_backupa/part-r-00000' --columns phone,sex,wang,time,age --fields-terminated-by '\t' --m 1;
(暂时就到这里,题目在文件里,名字为 "大数据样卷 ")