Hadoop本身并不支持lzo压缩,其配置步骤如下
1 maven安装
#下载
https://maven.apache.org/download.cgi #最新版本为apache-maven-3.8.5-bin.tar.gz
#解压
tar -zxvf /opt/software/apache-maven-3.8.5-bin.tar.gz -C /opt/module
#添加环境
vi /etc/profile
#MAVEN_HOME
MAVEN_HOME=/opt/module/maven
export PATH=$PATH:$MAVEN_HOME/bin
source /etc/profile
#查看
mvn –v
2 其他安装
yum -y install gcc-c++ lzo-devel zlib-devel autoconf automake libtool
3 下载 安装 编译 lzo
wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz
tar -zxvf lzo-2.10.tar.gz
cd lzo-2.10
./configure -prefix=/usr/local/hadoop/lzo/
make
make install
4 编译hadoop-lzo源码
#下载hadoop-lzo的源码
https://github.com/twitter/hadoop-lzo/archive/master.zip
#解压之后,上传文件夹hadoop-lzo-master至/opt/module,修改其中pom.xml
<hadoop.current.version>2.7.7</hadoop.current.version>
#/etc/profile中声明两个临时环境变量
export C_INCLUDE_PATH=/usr/local/hadoop/lzo/include
export LIBRARY_PATH=/usr/local/hadoop/lzo/lib
#执行maven编译命令
mvn package -Dmaven.test.skip=true
#target目录下,hadoop-lzo-0.4.21-SNAPSHOT.jar 即编译成功的hadoop-lzo组件
5 配置hadoop-lzo
cp /opt/module/hadoop-lzo-master/target/hadoop-lzo-0.4.21-SNAPSHOT.jar /opt/module/hadoop-2.7.7/share/hadoop/common
#分发
xsync /opt/module/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar
#core-site.xml增加配置信息
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
#分发
xsync core-site.xml
#启动集群