数仓数据
修改两个文件
安装目录:
/mnt/home/work/hadoop-3.3.1/bin
[root@test11.bj.sm etc]# cd hadoop/ [root@test11.bj.sm hadoop]# ls capacity-scheduler.xml hadoop-metrics2.properties httpfs-log4j.properties log4j.properties ssl-client.xml.example yarnservice-log4j.properties configuration.xsl hadoop-policy.xml httpfs-site.xml mapred-env.cmd ssl-server.xml.example yarn-site.xml container-executor.cfg hadoop-user-functions.sh.example kms-acls.xml mapred-env.sh user_ec_policies.xml.template core-site.xml hdfs-rbf-site.xml kms-env.sh mapred-queues.xml.template workers hadoop-env.cmd hdfs-site.xml kms-log4j.properties mapred-site.xml yarn-env.cmd hadoop-env.sh httpfs-env.sh kms-site.xml shellprofile.d yarn-env.sh [root@test11.bj.sm hadoop]# mv hdfs-site.xml hdfs-site.xmlbak [root@test11.bj.sm hadoop]# mv core-site.xml core-site.xmlbak [root@test11.bj.sm hadoop]# rz -E rz waiting to receive. [root@test11.bj.sm hadoop]# rz -E
修改host
root@test11.bj.sm hadoop]# hadoop fs -ls / ^C[root@test11.bj.sm hadoop]# hadoop fs -ls /user/data 2021-08-17 21:03:54,780 INFO retry.RetryInvocationHandler: org.apache.hadoop.net.ConnectTimeoutException: Call From java.net.UnknownHostException: test11.bj.sm: test11.bj.sm: 未知的名称或服务 to master2.bjtx.ishumei.com:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=master2.bjtx.ishumei.com/140.143.158.40:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout, while invoking ClientNamenodeProtocolTranslatorPB.getFileInfo over master2.bjtx.ishumei.com/140.143.158.40:8020 after 1 failover attempts. Trying to failover after sleeping for 1462ms. ^Cls: Interrupted [root@test11.bj.sm hadoop]# vim /etc/hosts [root@test11.bj.sm hadoop]# ping 10.163.45.80 PING 10.163.45.80 (10.163.45.80) 56(84) bytes of data. 64 bytes from 10.163.45.80: icmp_seq=1 ttl=64 time=3.93 ms 64 bytes from 10.163.45.80: icmp_seq=2 ttl=64 time=3.88 ms 64 bytes from 10.163.45.80: icmp_seq=3 ttl=64 time=3.88 ms ^C --- 10.163.45.80 ping statistics --- 3 packets transmitted, 3 received, 0% packet loss, time 2023ms rtt min/avg/max/mdev = 3.881/3.900/3.939/0.077 ms [root@test11.bj.sm hadoop]# vim /etc/hosts [root@test11.bj.sm hadoop]# hadoop fs -ls /user/data Found 157 items drwxr-xr-x - xudeqing supergroup 0 2021-06-11 13:03 /user/data/ExtractToken drwxrwxrwx - azkaban supergroup 0 2021-08-17 12:41 /user/data/OfflineClipsMonitor drwxrwxrwx - duanjiaojiao supergroup 0 2021-07-24 18:01 /user/data/StrategyInspectionTool drwxrwxrwx - yangguang supergroup 0 2021-07-25 10:21 /user/data/advanced_features drwxrwxrwx - liangkun supergroup 0 2020-03-16 17:57 /user/data/archives drwxrwxr-x - shaoli asrtext 0 2021-05-09 22:09 /user/data/asr_text drwxrwxrwx - hdfs supergroup 0 2021-04-29 14:25 /user/data/asr_train_text
文件操作
[root@test11.bj.sm hadoop]# hadoop fs -ls /user/tmp/hjc/dt=20210817/bj_text Found 2 items -rw-r--r-- 2 huangjincheng supergroup 0 2021-08-17 21:03 /user/tmp/hjc/dt=20210817/bj_text/_SUCCESS -rw-r--r-- 2 huangjincheng supergroup 878018961 2021-08-17 21:03 /user/tmp/hjc/dt=20210817/bj_text/part-00000-540292af-76df-4bc0-92e7-038465869363.txt [root@test11.bj.sm hadoop]# hadoop fs -du -h /user/tmp/hjc/dt=20210817/bj_text 0 0 /user/tmp/hjc/dt=20210817/bj_text/_SUCCESS 837.3 M 1.6 G /user/tmp/hjc/dt=20210817/bj_text/part-00000-540292af-76df-4bc0-92e7-038465869363.txt [root@test11.bj.sm hadoop]# cd [root@test11.bj.sm ~]# ls 1 bash_new Conf download financeTask.log packages pkg_zabbix tmp zookeeper-3.4.9 zookeeper-3.4.9.tar.gz zook.sh zook.txt [root@test11.bj.sm ~]# cd /home/ [root@test11.bj.sm hmee]# ls centos compile dongming huangyunpeng log nginx putpkg weipingshun work yangxueyi zhangjuntao zhangyaping zuohaitao [root@test11.bj.sm hmee]# cd huangyunpeng/ [root@test11.bj.sm huangyunpeng]# hadoop fs -get /user/tmp/hjc/dt=20210817/bj_text [root@test11.bj.sm huangyunpeng]# cd bj_text/ [root@test11.bj.sm bj_text]# ls part-00000-540292af-76df-4bc0-92e7-038465869363.txt _SUCCESS [root@test11.bj.sm bj_text]# du -h -s * 839M part-00000-540292af-76df-4bc0-92e7-038465869363.txt 0 _SUCCESS [root@test11.bj.sm bj_text]# cat part-00000-540292af-76df-4bc0-92e7-038465869363.txt | head -n 2 /v2/saas/anti_fraud/text {"data":{"role":"USER","tokenId":"51C3245477349583010038124B","nickname":"hm心心","channel":"FX_SEARCH","text":"","room":"0"},"accessKey":"C5hq0GNPeaa2fBDoBY04","type":"ZHIBO"} /v2/saas/anti_fraud/text {"accessKey":"binqUxtuzA8MZXwBcPpH","appId":"default","data":{"channel":"SIXINSHENHE","ip":"182.88.141.8","nickname":"","receiveTokenId":"747984577","text":"睡觉啦","tokenId":"744103320"},"type":"ZHIBO"}