kettle 连接Hadoop
http://wiki.pentaho.com/display/BAD/Additional+Configuration+for+YARN+Shims
Copy *-site.xml Cluster Files To PDI Directories
我是在windows上的kettle
所以用filezilla将/usr/crh/current/hadoop-client/conf/下的core-site.xml和hdfs-site.xml下载到本地E盘,然后复制到E:\pdi-ce-6.1.0.1-196\data-integration\plugins\pentaho-big-data-plugin\hadoop-configurations\hdp23下
把hdfs-site.xml、core-site.xml文件中的主机名换成相应的IP
<!--Tue Jul 19 02:33:02 2016--> <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://bigdata-server-1:8020</value> <final>true</final> </property> <property> <name>fs.trash.interval</name> <value>360</value> </property> <property> <name>ha.failover-controller.active-standby-elector.zk.op.retries</name> <value>120</value> </property> <property> <name>hadoop.http.authentication.simple.anonymous.allowed</name> <value>true</value> </property> <property> <name>hadoop.proxyuser.hcat.groups</name> <value>users</value> </property> <property> <name>hadoop.proxyuser.hcat.hosts</name> <value>bigdata-server-2</value> </property> <property> <name>hadoop.proxyuser.hdfs.groups</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.hdfs.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.hive.groups</name> <value>users</value> </property> <property> <name>hadoop.proxyuser.hive.hosts</name> <value>bigdata-server-2</value> </property> <property> <name>hadoop.security.auth_to_local</name> <value>DEFAULT</value> </property> <property> <name>hadoop.security.authentication</name> <value>simple</value> </property> <property> <name>hadoop.security.authorization</name> <value>false</value> </property> <property> <name>hadoop.security.key.provider.path</name> <value></value> </property> <property> <name>io.compression.codecs</name> <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>io.serializations</name> <value>org.apache.hadoop.io.serializer.WritableSerialization</value> </property> <property> <name>ipc.client.connect.max.retries</name> <value>50</value> </property> <property> <name>ipc.client.connection.maxidletime</name> <value>30000</value> </property> <property> <name>ipc.client.idlethreshold</name> <value>8000</value> </property> <property> <name>ipc.server.tcpnodelay</name> <value>true</value> </property> <property> <name>mapreduce.jobtracker.webinterface.trusted</name> <value>false</value> </property> <property> <name>net.topology.script.file.name</name> <value>/etc/hadoop/conf/topology_script.py</value> </property> <property> <name>proxyuser_group</name> <value>users</value> </property>
1 <!--Tue Jul 19 02:33:02 2016--> 2 <configuration> 3 4 <property> 5 <name>dfs.block.access.token.enable</name> 6 <value>true</value> 7 </property> 8 9 <property> 10 <name>dfs.blockreport.initialDelay</name> 11 <value>120</value> 12 </property> 13 14 <property> 15 <name>dfs.blocksize</name> 16 <value>134217728</value> 17 </property> 18 19 <property> 20 <name>dfs.client.read.shortcircuit</name> 21 <value>true</value> 22 </property> 23 24 <property> 25 <name>dfs.client.read.shortcircuit.streams.cache.size</name> 26 <value>4096</value> 27 </property> 28 29 <property> 30 <name>dfs.client.retry.policy.enabled</name> 31 <value>false</value> 32 </property> 33 34 <property> 35 <name>dfs.cluster.administrators</name> 36 <value> hdfs</value> 37 </property> 38 39 <property> 40 <name>dfs.datanode.address</name> 41 <value>0.0.0.0:50010</value> 42 </property> 43 44 <property> 45 <name>dfs.datanode.balance.bandwidthPerSec</name> 46 <value>6250000</value> 47 </property> 48 49 <property> 50 <name>dfs.datanode.data.dir</name> 51 <value>/hadoop/hdfs/data</value> 52 </property> 53 54 <property> 55 <name>dfs.datanode.data.dir.perm</name> 56 <value>750</value> 57 </property> 58 59 <property> 60 <name>dfs.datanode.du.reserved</name> 61 <value>1073741824</value> 62 </property> 63 64 <property> 65 <name>dfs.datanode.failed.volumes.tolerated</name> 66 <value>0</value> 67 </property> 68 69 <property> 70 <name>dfs.datanode.http.address</name> 71 <value>0.0.0.0:50075</value> 72 </property> 73 74 <property> 75 <name>dfs.datanode.https.address</name> 76 <value>0.0.0.0:50475</value> 77 </property> 78 79 <property> 80 <name>dfs.datanode.ipc.address</name> 81 <value>0.0.0.0:8010</value> 82 </property> 83 84 <property> 85 <name>dfs.datanode.max.transfer.threads</name> 86 <value>4096</value> 87 </property> 88 89 <property> 90 <name>dfs.domain.socket.path</name> 91 <value>/var/lib/hadoop-hdfs/dn_socket</value> 92 </property> 93 94 <property> 95 <name>dfs.encrypt.data.transfer.cipher.suites</name> 96 <value>AES/CTR/NoPadding</value> 97 </property> 98 99 <property> 100 <name>dfs.encryption.key.provider.uri</name> 101 <value></value> 102 </property> 103 104 <property> 105 <name>dfs.heartbeat.interval</name> 106 <value>3</value> 107 </property> 108 109 <property> 110 <name>dfs.hosts.exclude</name> 111 <value>/etc/hadoop/conf/dfs.exclude</value> 112 </property> 113 114 <property> 115 <name>dfs.http.policy</name> 116 <value>HTTP_ONLY</value> 117 </property> 118 119 <property> 120 <name>dfs.https.port</name> 121 <value>50470</value> 122 </property> 123 124 <property> 125 <name>dfs.journalnode.edits.dir</name> 126 <value>/hadoop/hdfs/journalnode</value> 127 </property> 128 129 <property> 130 <name>dfs.journalnode.http-address</name> 131 <value>0.0.0.0:8480</value> 132 </property> 133 134 <property> 135 <name>dfs.journalnode.https-address</name> 136 <value>0.0.0.0:8481</value> 137 </property> 138 139 <property> 140 <name>dfs.namenode.accesstime.precision</name> 141 <value>0</value> 142 </property> 143 144 <property> 145 <name>dfs.namenode.audit.log.async</name> 146 <value>true</value> 147 </property> 148 149 <property> 150 <name>dfs.namenode.avoid.read.stale.datanode</name> 151 <value>true</value> 152 </property> 153 154 <property> 155 <name>dfs.namenode.avoid.write.stale.datanode</name> 156 <value>true</value> 157 </property> 158 159 <property> 160 <name>dfs.namenode.checkpoint.dir</name> 161 <value>/hadoop/hdfs/namesecondary</value> 162 </property> 163 164 <property> 165 <name>dfs.namenode.checkpoint.edits.dir</name> 166 <value>${dfs.namenode.checkpoint.dir}</value> 167 </property> 168 169 <property> 170 <name>dfs.namenode.checkpoint.period</name> 171 <value>21600</value> 172 </property> 173 174 <property> 175 <name>dfs.namenode.checkpoint.txns</name> 176 <value>1000000</value> 177 </property> 178 179 <property> 180 <name>dfs.namenode.fslock.fair</name> 181 <value>false</value> 182 </property> 183 184 <property> 185 <name>dfs.namenode.handler.count</name> 186 <value>200</value> 187 </property> 188 189 <property> 190 <name>dfs.namenode.http-address</name> 191 <value>bigdata-server-1:50070</value> 192 <final>true</final> 193 </property> 194 195 <property> 196 <name>dfs.namenode.https-address</name> 197 <value>bigdata-server-1:50470</value> 198 </property> 199 200 <property> 201 <name>dfs.namenode.name.dir</name> 202 <value>/hadoop/hdfs/namenode</value> 203 </property> 204 205 <property> 206 <name>dfs.namenode.name.dir.restore</name> 207 <value>true</value> 208 </property> 209 210 <property> 211 <name>dfs.namenode.rpc-address</name> 212 <value>bigdata-server-1:8020</value> 213 </property> 214 215 <property> 216 <name>dfs.namenode.safemode.threshold-pct</name> 217 <value>1</value> 218 </property> 219 220 <property> 221 <name>dfs.namenode.secondary.http-address</name> 222 <value>bigdata-server-2:50090</value> 223 </property> 224 225 <property> 226 <name>dfs.namenode.stale.datanode.interval</name> 227 <value>30000</value> 228 </property> 229 230 <property> 231 <name>dfs.namenode.startup.delay.block.deletion.sec</name> 232 <value>3600</value> 233 </property> 234 235 <property> 236 <name>dfs.namenode.write.stale.datanode.ratio</name> 237 <value>1.0f</value> 238 </property> 239 240 <property> 241 <name>dfs.permissions.enabled</name> 242 <value>true</value> 243 </property> 244 245 <property> 246 <name>dfs.permissions.superusergroup</name> 247 <value>hdfs</value> 248 </property> 249 250 <property> 251 <name>dfs.replication</name> 252 <value>3</value> 253 </property> 254 255 <property> 256 <name>dfs.replication.max</name> 257 <value>50</value> 258 </property> 259 260 <property> 261 <name>dfs.support.append</name> 262 <value>true</value> 263 <final>true</final> 264 </property> 265 266 <property> 267 <name>dfs.webhdfs.enabled</name> 268 <value>true</value> 269 </property> 270 271 <property> 272 <name>fs.permissions.umask-mode</name> 273 <value>022</value> 274 </property> 275 276 <property> 277 <name>nfs.exports.allowed.hosts</name> 278 <value>* rw</value> 279 </property> 280 281 <property> 282 <name>nfs.file.dump.dir</name> 283 <value>/tmp/.hdfs-nfs</value> 284 </property>