熟悉HBase基本操作

1.

ssh localhost
start-dfs.sh
start-hbase.sh
hbase shell
create 'Student', 'S_No', 'S_Name', 'S_Sex', 'S_Age'

put 'Student', '001', 'S_No', '2015001'
put 'Student', '001', 'S_Name', 'Zhangsan'
put 'Student', '001', 'S_Sex', 'male'
put 'Student', '001', 'S_Age', '23'

put 'Student', '002', 'S_No', '2015002'
put 'Student', '002', 'S_Name', 'Marry'
put 'Student', '002', 'S_Sex', 'female'
put 'Student', '002', 'S_Age', '22'

put 'Student', '003', 'S_No', '2015003'
put 'Student', '003', 'S_Name', 'Lisi'
put 'Student', '003', 'S_Sex', 'male'
put 'Student', '003', 'S_Age', '24'

2.

list
scan 'Student'
alter 'Student', NAME=>'S_Course'
put 'Student', '001', 'S_Course:math', '85'
alter 'Student', {NAME=>'S_Course', METHOD=>'delete'}
count 'Student'
truncate 'Student'

3.

hdfs dfs -rm input/*.txt
hdfs dfs -put ~/lyric.txt input/

4.

import sys

for line in sys.stdin:
    line = line.strip()
    words = line.split()
    for word in words:
        print('%s\t%s' % (word, 1))

5.

from operator import itemgetter
import sys

current_word = None
current_count = 0
word = None

for line in sys.stdin:
    line = line.strip()
    word, count = line.split('\t', 1)
    try:
        count = int(count)
    except ValueError:
        continue

    if current_word == word:
        current_count += count
    else:
        if current_word:
            print '%s\t%s' % (current_word, current_count)
        current_count = count
        current_word = word

if current_word == word:
    print '%s\t%s' % (current_word, current_count)

6.

export HADOOP_HOME=/usr/local/hadoop
export STREAM=$HADOOP_HOME/share/hadoop/tools/lib/hadoop-streaming-*.jar

7.

hadoop jar $STREAM \
-D stream.non.zero.exit.is.failure=false \
-file /home/hadoop/mapper.py \
-mapper 'python /home/hadoop/mapper.py' \
-file /home/hadoop/reducer.py \
-reducer 'python /home/hadoop/reducer.py' \
-input /user/hadoop/input/*.txt \
-output /user/hadoop/wcoutput

 

posted @ 2018-05-10 21:53  博威袁他就是袁威博  阅读(169)  评论(0编辑  收藏  举报