大数据课堂实验
今天我先搭建了环境
自己修改了网上的代码分为两个类 LogBean对数据进行封装打包
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | package org.apache.hadoop.examples; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class LogBean implements WritableComparable<LogBean> { private String id; private String time; private String traffic; private String book; private String voide; public LogBean() { super (); } public LogBean(String id, String traffic, String time,String book,String voide) { this .time = time; this .id = id; this .traffic = traffic; this .book=book; this .voide=voide; } @Override public String toString() { return "LogBean{" + "id='" + id + ' ' + ", time='" + time+ ' ' + ", traffic='" + traffic + ' ' + ", book='" + book + ' ' + ", voide='" + voide + ' ' + '}' ; } public String getTime() { return time; } public void setTime(String time) { this .time = time; } public String getId() { return id; } public void setIp(String id) { this .id = id; } public String getBook() { return book; } public void setBook(String book) { this .book = book; } public String getVoide() { return voide; } public void setUrl(String voide) { this .voide = voide; } @Override public int compareTo(LogBean o) { return 0 ; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(id); out.writeUTF(time); out.writeUTF(traffic); out.writeUTF(book); out.writeUTF(voide); } @Override public void readFields(DataInput in) throws IOException { id = in.readUTF(); time = in.readUTF(); traffic =in.readUTF(); book =in.readUTF(); voide =in.readUTF(); } } |
BaiDuLog.java
负责对数据进行处理然后筛选
package org.apache.hadoop.examples;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class BaiduLog {
public static class BaiduLogMapper extends Mapper<LongWritable,Text, Text, LogBean> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// super.map(key, value, context);
String log = value.toString();
String str = "(cn.baidu.core.inteceptor.LogInteceptor:55)";
if (log.indexOf(str)!=-1){
String[] log_arr = log.split(str);
String time = log_arr[0].substring(1, 10);
String[] log_arr2 = log_arr[1].split("\t");
String id = log_arr2[1];
String traffic=log_arr2[2];
String book = log_arr2[3];
String voide =log_arr2[4];
if (id.equals("null")){
id = log_arr2[1];
}
LogBean logbean = new LogBean(id,time,traffic,book,voide);
context.write(new Text(ip),logbean);
}
}
}
public static class BaiduLogReducer extends Reducer<Text,LogBean,IntWritable,Text>{
public static class BaiduLogMapper extends Mapper<LongWritable,Text, Text, LogBean> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// super.map(key, value, context);
String log = value.toString();
String str = "(cn.baidu.core.inteceptor.LogInteceptor:55)";
if (log.indexOf(str)!=-1){
String[] log_arr = log.split(str);
String time = log_arr[0].substring(1, 10);
String[] log_arr2 = log_arr[1].split("\t");
String id = log_arr2[1];
String traffic=log_arr2[2];
String book = log_arr2[3];
String voide =log_arr2[4];
if (id.equals("null")){
id = log_arr2[1];
}
LogBean logbean = new LogBean(id,time,traffic,book,voide);
context.write(new Text(ip),logbean);
}
}
}
public static class BaiduLogReducer extends Reducer<Text,LogBean,IntWritable,Text>{
@Override
protected void reduce(Text key, Iterable<LogBean> values, Context context) throws IOException, InterruptedException {
// super.reduce(key, values, context);
int sum = 0;
protected void reduce(Text key, Iterable<LogBean> values, Context context) throws IOException, InterruptedException {
// super.reduce(key, values, context);
int sum = 0;
StringBuffer str = new StringBuffer();
int flag = 0;
for (LogBean logbean:values){
sum++;
if (flag==0){
str.append(logbean.toString());
flag = 1;
}
}
context.write(new IntWritable(sum),new Text(str.toString()));
int flag = 0;
for (LogBean logbean:values){
sum++;
if (flag==0){
str.append(logbean.toString());
flag = 1;
}
}
context.write(new IntWritable(sum),new Text(str.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "avg");
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "avg");
job.setJarByClass(BaiduLog.class);
job.setMapperClass(BaiduLog.BaiduLogMapper.class);
job.setReducerClass(BaiduLog.BaiduLogReducer.class);
job.setMapperClass(BaiduLog.BaiduLogMapper.class);
job.setReducerClass(BaiduLog.BaiduLogReducer.class);
// job.setCombinerClass(BaiduLog.BaiduLogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LogBean.class);
job.setOutputValueClass(LogBean.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}
但是最后hive数据库没装好,所以导致后续的发展受到了限制