install pig --simple operation
1.下载pig-0.12.0.tar.gz,并解压2.设置环境变量
vi .bash_profile
export PIG_HOME=/home/hduser/pig
export PATH=$PIG_HOME/bin:$HADOOP_HOME/bin:$HOME/bin:$PATH
#PATH=$PATH:$HOME/bin
export PATH
export PIG_CLASSPATH=$HADOOP_HOME/conf
$HADOOP_HOME/bin一定要在PATH中
2.
pig -x local
A = LOAD '/home/hduser/hadoop/access_log.txt' USING PigStorage(' ');
B = FOREACH A GENERATE $0 as IP,(CHARARRAY)$6 as page;
C = FILTER B BY page matches '.*html|.*php';
D = GROUP C BY $0;
E = FOREACH D GENERATE group as IP,COUNT(C) AS HITS;
STORE E INTO 'res';