集技术与颜值于一身

日就月将,学有缉熙于光明

导航

【慕课网实战】Spark Streaming实时流处理项目实战笔记十七之铭文升级版

铭文一级:

功能1:今天到现在为止 实战课程 的访问量

yyyyMMdd courseid

使用数据库来进行存储我们的统计结果
Spark Streaming把统计结果写入到数据库里面
可视化前端根据:yyyyMMdd courseid 把数据库里面的统计结果展示出来


选择什么数据库作为统计结果的存储呢?
RDBMS: MySQL、Oracle...
day course_id click_count
20171111 1 10
20171111 2 10

下一个批次数据进来以后:
20171111 + 1 ==> click_count + 下一个批次的统计结果 ==> 写入到数据库中

NoSQL: HBase、Redis....
HBase: 一个API就能搞定,非常方便
20171111 + 1 ==> click_count + 下一个批次的统计结果
本次课程为什么要选择HBase的一个原因所在

前提:
HDFS
Zookeeper
HBase

HBase表设计
创建表
create 'imooc_course_clickcount', 'info'
Rowkey设计
day_courseid

如何使用Scala来操作HBase

 

铭文二级:

启动Hbase要先启动HDFS、ZooKeeper

Hadoop的启动,sbin文件夹:

./start-dfs.sh

HBase的启动,bin文件夹:

./start-hbase.sh

1、建表:create 'imooc_course_clickcount','info'

查看表:list

查看表详情:desc imooc_course_clickcount  //desc 'imooc_course_clickcount'

2、Rowkey的设计:day_courseid

3、建CourseClickCount类(day_course,click_count)

4、HBaseUtils工具类的实现

package com.imooc.spark.project.utils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
/**
 * HBase操作工具类:Java工具类建议采用单例模式封装
 */
public class HBaseUtils {
    HBaseAdmin admin = null;
    Configuration configuration = null;
    /**
     * 私有改造方法
     */
    private HBaseUtils(){
        configuration = new Configuration();
        configuration.set("hbase.zookeeper.quorum", "hadoop000:2181");
        configuration.set("hbase.rootdir", "hdfs://hadoop000:8020/hbase");
        try {
            admin = new HBaseAdmin(configuration);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    private static HBaseUtils instance = null;
    public  static synchronized HBaseUtils getInstance() {
        if(null == instance) {
            instance = new HBaseUtils();
        }
        return instance;
    }
    /**
     * 根据表名获取到HTable实例
     */
    public HTable getTable(String tableName) {
        HTable table = null;
        try {
            table = new HTable(configuration, tableName);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return table;
    }
    /**
     * 添加一条记录到HBase表
     * @param tableName HBase表名
     * @param rowkey  HBase表的rowkey
     * @param cf HBase表的columnfamily
     * @param column HBase表的列
     * @param value  写入HBase表的值
     */
    public void put(String tableName, String rowkey, String cf, String column, String value) {
        HTable table = getTable(tableName);
        Put put = new Put(Bytes.toBytes(rowkey));
        put.add(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
        try {
            table.put(put);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public static void main(String[] args) {
        //HTable table = HBaseUtils.getInstance().getTable("imooc_course_clickcount");
        //System.out.println(table.getName().getNameAsString());
        String tableName = "imooc_course_clickcount" ;
        String rowkey = "20171111_88";
        String cf = "info" ;
        String column = "click_count";
        String value = "2";
        HBaseUtils.getInstance().put(tableName, rowkey, cf, column, value);
    }
}

 

 关键:

HBaseAdmin、Configuration
configuration.set("hbase.zookeeper.quorum", "hadoop000:2181");
configuration.set("hbase.rootdir", "hdfs://hadoop000:8020/hbase");
private static HBaseUtils instance = null;
    public  static synchronized HBaseUtils getInstance() {
        if(null == instance) {
            instance = new HBaseUtils();
        }
        return instance;
}
HBaseUtils.getInstance().put(tableName, rowkey, cf, column, value);

 

5、CourseClickCountDAO类数据访问层的实现

package com.imooc.spark.project.dao
import com.imooc.spark.project.domain.CourseClickCount
import com.imooc.spark.project.utils.HBaseUtils
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.util.Bytes
import scala.collection.mutable.ListBuffer
/**
  * 实战课程点击数-数据访问层
  */
object CourseClickCountDAO {
  val tableName = "imooc_course_clickcount"
  val cf = "info"
  val qualifer = "click_count"
  /**
    * 保存数据到HBase
    * @param list  CourseClickCount集合
    */
  def save(list: ListBuffer[CourseClickCount]): Unit = {

    val table = HBaseUtils.getInstance().getTable(tableName)

    for(ele <- list) {
      table.incrementColumnValue(Bytes.toBytes(ele.day_course),
        Bytes.toBytes(cf),
        Bytes.toBytes(qualifer),
        ele.click_count)
    }
  }
  /**
    * 根据rowkey查询值
    */
  def count(day_course: String):Long = {
    val table = HBaseUtils.getInstance().getTable(tableName)
    val get = new Get(Bytes.toBytes(day_course))
    val value = table.get(get).getValue(cf.getBytes, qualifer.getBytes)
    if(value == null) {
      0L
    }else{
      Bytes.toLong(value)
    }
  }
  def main(args: Array[String]): Unit = {
    val list = new ListBuffer[CourseClickCount]
    list.append(CourseClickCount("20171111_8",8))
    list.append(CourseClickCount("20171111_9",9))
    list.append(CourseClickCount("20171111_1",100))
    save(list)
    println(count("20171111_8") + " : " + count("20171111_9")+ " : " + count("20171111_1"))
  }
}

 

 关键:

val tableName = "imooc_course_clickcount"
val cf = "info"
val qualifer = "click_count"
def save(list: ListBuffer[CourseClickCount]): Unit = {
for(ele <- list) {
      table.incrementColumnValue(Bytes.toBytes(ele.day_course),
        Bytes.toBytes(cf),
        Bytes.toBytes(qualifer),
        ele.click_count)
    }
def count(day_course: String):Long = {
val value = table.get(get).getValue(cf.getBytes, qualifer.getBytes)

HBaseUtils.getInstance().put(tableName, rowkey, cf, column, value);
Bytes.toBytes(day_course) 等价 day_course.getBytes
if(value == null) {}  //scala里面跟java的equals一样
list.append(CourseClickCount("20171111_8",8))
list.append(CourseClickCount("20171111_9",9))


6、在ImoocStatStreamingApp里原先代码参考:

// 测试步骤一:测试数据接收
    //messages.map(_._2).count().print
    // 测试步骤二:数据清洗
    val logs = messages.map(_._2)
    val cleanData = logs.map(line => {
      val infos = line.split("\t")
      // infos(2) = "GET /class/130.html HTTP/1.1"
      // url = /class/130.html
      val url = infos(2).split(" ")(1)
      var courseId = 0
      // 把实战课程的课程编号拿到了
      if (url.startsWith("/class")) {
        val courseIdHTML = url.split("/")(2)
        courseId = courseIdHTML.substring(0, courseIdHTML.lastIndexOf(".")).toInt
      }
      ClickLog(infos(0), DateUtils.parseToMinute(infos(1)), courseId, infos(3).toInt, infos(4))
    }).filter(clicklog => clicklog.courseId != 0)

  添加代码:

    // 测试步骤三:统计今天到现在为止实战课程的访问量
    cleanData.map(x => {
      // HBase rowkey设计: 20171111_88
      (x.time.substring(0, 8) + "_" + x.courseId, 1)
    }).reduceByKey(_ + _).foreachRDD(rdd => {
      rdd.foreachPartition(partitionRecords => {
        val list = new ListBuffer[CourseClickCount]
        partitionRecords.foreach(pair => {
          list.append(CourseClickCount(pair._1, pair._2))
        })
        CourseClickCountDAO.save(list)
      })
    })

  

 

 

 

 

 

 

 

  

 

posted on 2018-02-02 18:46  旷课小王子  阅读(323)  评论(0编辑  收藏  举报