kettle 连接Hadoop

http://wiki.pentaho.com/display/BAD/Additional+Configuration+for+YARN+Shims

Copy *-site.xml Cluster Files To PDI Directories

我是在windows上的kettle

所以用filezilla将/usr/crh/current/hadoop-client/conf/下的core-site.xml和hdfs-site.xml下载到本地E盘,然后复制到E:\pdi-ce-6.1.0.1-196\data-integration\plugins\pentaho-big-data-plugin\hadoop-configurations\hdp23下

把hdfs-site.xml、core-site.xml文件中的主机名换成相应的IP

<!--Tue Jul 19 02:33:02 2016-->
    <configuration>
    
    <property>
      <name>fs.defaultFS</name>
      <value>hdfs://bigdata-server-1:8020</value>
      <final>true</final>
    </property>
    
    <property>
      <name>fs.trash.interval</name>
      <value>360</value>
    </property>
    
    <property>
      <name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
      <value>120</value>
    </property>
    
    <property>
      <name>hadoop.http.authentication.simple.anonymous.allowed</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hcat.groups</name>
      <value>users</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hcat.hosts</name>
      <value>bigdata-server-2</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hdfs.groups</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hdfs.hosts</name>
      <value>*</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hive.groups</name>
      <value>users</value>
    </property>
    
    <property>
      <name>hadoop.proxyuser.hive.hosts</name>
      <value>bigdata-server-2</value>
    </property>
    
    <property>
      <name>hadoop.security.auth_to_local</name>
      <value>DEFAULT</value>
    </property>
    
    <property>
      <name>hadoop.security.authentication</name>
      <value>simple</value>
    </property>
    
    <property>
      <name>hadoop.security.authorization</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hadoop.security.key.provider.path</name>
      <value></value>
    </property>
    
    <property>
      <name>io.compression.codecs</name>
      <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
    </property>
    
    <property>
      <name>io.file.buffer.size</name>
      <value>131072</value>
    </property>
    
    <property>
      <name>io.serializations</name>
      <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
    </property>
    
    <property>
      <name>ipc.client.connect.max.retries</name>
      <value>50</value>
    </property>
    
    <property>
      <name>ipc.client.connection.maxidletime</name>
      <value>30000</value>
    </property>
    
    <property>
      <name>ipc.client.idlethreshold</name>
      <value>8000</value>
    </property>
    
    <property>
      <name>ipc.server.tcpnodelay</name>
      <value>true</value>
    </property>
    
    <property>
      <name>mapreduce.jobtracker.webinterface.trusted</name>
      <value>false</value>
    </property>
    
    <property>
      <name>net.topology.script.file.name</name>
      <value>/etc/hadoop/conf/topology_script.py</value>
    </property>
    
    <property>
      <name>proxyuser_group</name>
      <value>users</value>
    </property>
core-site.xml
  1 <!--Tue Jul 19 02:33:02 2016-->
  2     <configuration>
  3     
  4     <property>
  5       <name>dfs.block.access.token.enable</name>
  6       <value>true</value>
  7     </property>
  8     
  9     <property>
 10       <name>dfs.blockreport.initialDelay</name>
 11       <value>120</value>
 12     </property>
 13     
 14     <property>
 15       <name>dfs.blocksize</name>
 16       <value>134217728</value>
 17     </property>
 18     
 19     <property>
 20       <name>dfs.client.read.shortcircuit</name>
 21       <value>true</value>
 22     </property>
 23     
 24     <property>
 25       <name>dfs.client.read.shortcircuit.streams.cache.size</name>
 26       <value>4096</value>
 27     </property>
 28     
 29     <property>
 30       <name>dfs.client.retry.policy.enabled</name>
 31       <value>false</value>
 32     </property>
 33     
 34     <property>
 35       <name>dfs.cluster.administrators</name>
 36       <value> hdfs</value>
 37     </property>
 38     
 39     <property>
 40       <name>dfs.datanode.address</name>
 41       <value>0.0.0.0:50010</value>
 42     </property>
 43     
 44     <property>
 45       <name>dfs.datanode.balance.bandwidthPerSec</name>
 46       <value>6250000</value>
 47     </property>
 48     
 49     <property>
 50       <name>dfs.datanode.data.dir</name>
 51       <value>/hadoop/hdfs/data</value>
 52     </property>
 53     
 54     <property>
 55       <name>dfs.datanode.data.dir.perm</name>
 56       <value>750</value>
 57     </property>
 58     
 59     <property>
 60       <name>dfs.datanode.du.reserved</name>
 61       <value>1073741824</value>
 62     </property>
 63     
 64     <property>
 65       <name>dfs.datanode.failed.volumes.tolerated</name>
 66       <value>0</value>
 67     </property>
 68     
 69     <property>
 70       <name>dfs.datanode.http.address</name>
 71       <value>0.0.0.0:50075</value>
 72     </property>
 73     
 74     <property>
 75       <name>dfs.datanode.https.address</name>
 76       <value>0.0.0.0:50475</value>
 77     </property>
 78     
 79     <property>
 80       <name>dfs.datanode.ipc.address</name>
 81       <value>0.0.0.0:8010</value>
 82     </property>
 83     
 84     <property>
 85       <name>dfs.datanode.max.transfer.threads</name>
 86       <value>4096</value>
 87     </property>
 88     
 89     <property>
 90       <name>dfs.domain.socket.path</name>
 91       <value>/var/lib/hadoop-hdfs/dn_socket</value>
 92     </property>
 93     
 94     <property>
 95       <name>dfs.encrypt.data.transfer.cipher.suites</name>
 96       <value>AES/CTR/NoPadding</value>
 97     </property>
 98     
 99     <property>
100       <name>dfs.encryption.key.provider.uri</name>
101       <value></value>
102     </property>
103     
104     <property>
105       <name>dfs.heartbeat.interval</name>
106       <value>3</value>
107     </property>
108     
109     <property>
110       <name>dfs.hosts.exclude</name>
111       <value>/etc/hadoop/conf/dfs.exclude</value>
112     </property>
113     
114     <property>
115       <name>dfs.http.policy</name>
116       <value>HTTP_ONLY</value>
117     </property>
118     
119     <property>
120       <name>dfs.https.port</name>
121       <value>50470</value>
122     </property>
123     
124     <property>
125       <name>dfs.journalnode.edits.dir</name>
126       <value>/hadoop/hdfs/journalnode</value>
127     </property>
128     
129     <property>
130       <name>dfs.journalnode.http-address</name>
131       <value>0.0.0.0:8480</value>
132     </property>
133     
134     <property>
135       <name>dfs.journalnode.https-address</name>
136       <value>0.0.0.0:8481</value>
137     </property>
138     
139     <property>
140       <name>dfs.namenode.accesstime.precision</name>
141       <value>0</value>
142     </property>
143     
144     <property>
145       <name>dfs.namenode.audit.log.async</name>
146       <value>true</value>
147     </property>
148     
149     <property>
150       <name>dfs.namenode.avoid.read.stale.datanode</name>
151       <value>true</value>
152     </property>
153     
154     <property>
155       <name>dfs.namenode.avoid.write.stale.datanode</name>
156       <value>true</value>
157     </property>
158     
159     <property>
160       <name>dfs.namenode.checkpoint.dir</name>
161       <value>/hadoop/hdfs/namesecondary</value>
162     </property>
163     
164     <property>
165       <name>dfs.namenode.checkpoint.edits.dir</name>
166       <value>${dfs.namenode.checkpoint.dir}</value>
167     </property>
168     
169     <property>
170       <name>dfs.namenode.checkpoint.period</name>
171       <value>21600</value>
172     </property>
173     
174     <property>
175       <name>dfs.namenode.checkpoint.txns</name>
176       <value>1000000</value>
177     </property>
178     
179     <property>
180       <name>dfs.namenode.fslock.fair</name>
181       <value>false</value>
182     </property>
183     
184     <property>
185       <name>dfs.namenode.handler.count</name>
186       <value>200</value>
187     </property>
188     
189     <property>
190       <name>dfs.namenode.http-address</name>
191       <value>bigdata-server-1:50070</value>
192       <final>true</final>
193     </property>
194     
195     <property>
196       <name>dfs.namenode.https-address</name>
197       <value>bigdata-server-1:50470</value>
198     </property>
199     
200     <property>
201       <name>dfs.namenode.name.dir</name>
202       <value>/hadoop/hdfs/namenode</value>
203     </property>
204     
205     <property>
206       <name>dfs.namenode.name.dir.restore</name>
207       <value>true</value>
208     </property>
209     
210     <property>
211       <name>dfs.namenode.rpc-address</name>
212       <value>bigdata-server-1:8020</value>
213     </property>
214     
215     <property>
216       <name>dfs.namenode.safemode.threshold-pct</name>
217       <value>1</value>
218     </property>
219     
220     <property>
221       <name>dfs.namenode.secondary.http-address</name>
222       <value>bigdata-server-2:50090</value>
223     </property>
224     
225     <property>
226       <name>dfs.namenode.stale.datanode.interval</name>
227       <value>30000</value>
228     </property>
229     
230     <property>
231       <name>dfs.namenode.startup.delay.block.deletion.sec</name>
232       <value>3600</value>
233     </property>
234     
235     <property>
236       <name>dfs.namenode.write.stale.datanode.ratio</name>
237       <value>1.0f</value>
238     </property>
239     
240     <property>
241       <name>dfs.permissions.enabled</name>
242       <value>true</value>
243     </property>
244     
245     <property>
246       <name>dfs.permissions.superusergroup</name>
247       <value>hdfs</value>
248     </property>
249     
250     <property>
251       <name>dfs.replication</name>
252       <value>3</value>
253     </property>
254     
255     <property>
256       <name>dfs.replication.max</name>
257       <value>50</value>
258     </property>
259     
260     <property>
261       <name>dfs.support.append</name>
262       <value>true</value>
263       <final>true</final>
264     </property>
265     
266     <property>
267       <name>dfs.webhdfs.enabled</name>
268       <value>true</value>
269     </property>
270     
271     <property>
272       <name>fs.permissions.umask-mode</name>
273       <value>022</value>
274     </property>
275     
276     <property>
277       <name>nfs.exports.allowed.hosts</name>
278       <value>* rw</value>
279     </property>
280     
281     <property>
282       <name>nfs.file.dump.dir</name>
283       <value>/tmp/.hdfs-nfs</value>
284     </property>
hdfs-site.xml

 

posted @ 2016-07-29 16:10  派。  阅读(2888)  评论(0编辑  收藏  举报