在hdfs上存取xml文件的实现代码

要读取的文件为:/user/hdfs/stdin.xml

<?xml version="1.0" encoding="UTF-8"?>
<request>
	<jobinstanceid>SK9cohJD4yklcD8dJuZXDA</jobinstanceid>
	<context>
		<property name="userName" value="xdf"/>
		<property name="queueName" value="queue1"/>
		<property name="processId" value="dns"/>
		<property name="jobId" value="jobID"/>
		<property name="hiveServerAddress" value="IP:port "/>
		<property name="databaseName" value="wx"/>
		<property name="basePath" value="HDFS_BasePath1/20141216/jobinstanceid/${operator.name}"/>
	</context>

	<operator name="convert" alias="lowerUpperCaseConvert" class="lowerUpperCaseConvert">
		<parameterlist name="fields">
			<parametermap fieldname="name" fieldvalue="m_uuid()" fieldtype="String"/>
		</parameterlist>
	</operator>
	<datasets>
		<dataset name="inport1">
			<row>default.test1</row>
		</dataset>
	</datasets>
</request>

要存的文件为:/user/hdfs/stdin.xml

<?xml version="1.0" encoding="UTF-8"?>

<response>
  <jobinstanceid>SK9cohJD4yklcD8dJuZXDA</jobinstanceid>
  <datasets>
    <dataset name="outport1">
      <row>default.tmp_e93eba2c_f22d_4dc1_9e86_a342a0ea0625</row>
    </dataset>
  </datasets>
  <operatortracker>
    <portcounter name="inport1" dataCount="4"/>
    <portcounter name="outport1" dataCount="4"/>
  </operatortracker>
</response>

读stdin.xml文件的实现如下:

public List<Map> parseStdinXml(String xmlParams) throws Exception {

		String userName = null;
		String operatorName = null;
		String dbName = null;
		String inputTabName = null;
		String strs = null;
		String fieldName = null;
		String fieldType = null;
		String jobinstanceid = null;
		int fieldCount = 0;

		List<Map> list = new ArrayList<Map>();
		Map<String, String> map = new HashMap<String, String>();
		Document document = DocumentHelper.parseText(xmlParams); // 将字符串转化为xml
		Element node1 = document.getRootElement(); // 获得根节点
		Iterator iter1 = node1.elementIterator(); // 获取根节点下的子节点
		while (iter1.hasNext()) {
			Element node2 = (Element) iter1.next();

			// 获取jobinstanceid
			if ("jobinstanceid".equals(node2.getName())) {
				jobinstanceid = node2.getText();
				map.put("jobinstanceid", jobinstanceid);
			}
			// 获取通用参数
			if ("context".equals(node2.getName())) {
				Iterator iter2 = node2.elementIterator();
				while (iter2.hasNext()) {
					Element node3 = (Element) iter2.next();
					if ("property".equals(node3.getName())) {
						if ("userName".equals(node3.attributeValue("name"))) {
							userName = node3.attributeValue("value");
						}
					}
					map.put("userName", userName);
				}
			}

			// 获取算子参数
			if ("operator".equals(node2.getName())) {
				operatorName = node2.attributeValue("name");
				map.put("operatorName", operatorName);
				Iterator iter2 = node2.elementIterator();
				while (iter2.hasNext()) {
					Element node3 = (Element) iter2.next();
					if ("parameterlist".equals(node3.getName())) {
						if ("fields".equals(node3.attributeValue("name"))) {
							Iterator iter3 = node3.elementIterator();
							while (iter3.hasNext()) {
								Element node4 = (Element) iter3.next();
								if ("parametermap".equals(node4.getName())) {
									fieldName = node4
											.attributeValue("fieldname");
									fieldType = node4
											.attributeValue("fieldtype");
									fieldCount++;
									map.put("fieldName" + fieldCount, fieldName);
									map.put("fieldType" + fieldCount, fieldType);
								}
							}
						}
					}
				}
				map.put("fieldCount", Integer.toString(fieldCount));
			}
			// 获取输入数据库
			if ("datasets".equals(node2.getName())) {
				Iterator iter2 = node2.elementIterator();
				while (iter2.hasNext()) {
					Element node3 = (Element) iter2.next();
					if ("inport1".equals(node3.attributeValue("name"))) {
						Iterator iter3 = node3.elementIterator();
						while (iter3.hasNext()) {
							Element node4 = (Element) iter3.next();
							strs = node4.getText();
						}
					}
					if (!"".equals(strs.trim())) {
						String[] arr = strs.split("\\.");
						dbName = arr[0];
						inputTabName = arr[1];
					}
					map.put("dbName", dbName);
					map.put("inputTabName", inputTabName);
				}
			}
		}
		list.add(map);
		return list;
	}

存stdout.xml文件的实现如下:

public void genStdoutXml(String fileName, List<Map> listOut) {

		String jobinstance = null;
		String dbName = null;
		String outputTable = null;
		String outputDataCount = null;
		String inputDataCount = null;

		dbName = listOut.get(0).get("dbName").toString();
		jobinstance = listOut.get(0).get("jobinstanceid").toString();
		outputTable = listOut.get(0).get("outputTable").toString();
		inputDataCount = listOut.get(0).get("inputDataCount").toString();
		outputDataCount = listOut.get(0).get("outputDataCount").toString();

		Document document = DocumentHelper.createDocument();
		Element response = document.addElement("response");
		Element jobinstanceid = response.addElement("jobinstanceid");
		jobinstanceid.setText(jobinstance);
		Element datasets = response.addElement("datasets");
		Element dataset = datasets.addElement("dataset");
		dataset.addAttribute("name", "outport1");
		Element row = dataset.addElement("row");
		row.setText(dbName + "." + outputTable);
		Element operatortracker = response.addElement("operatortracker");
		Element portcounter1 = operatortracker.addElement("portcounter");
		portcounter1.addAttribute("name", "inport1");
		portcounter1.addAttribute("dataCount", inputDataCount);
		Element portcounter2 = operatortracker.addElement("portcounter");
		portcounter2.addAttribute("name", "outport1");
		portcounter2.addAttribute("dataCount", outputDataCount);

		try {
			Configuration conf = new Configuration();
			FileSystem fs = FileSystem.get(URI.create(fileName), conf);
			OutputStream out = fs.create(new Path(fileName),
					new Progressable() {
						public void progress() {
						}
					});
			OutputFormat format = OutputFormat.createPrettyPrint();
			format.setEncoding("UTF-8");
			XMLWriter xmlWriter = new XMLWriter(out, format);
			xmlWriter.write(document);
			xmlWriter.close();
		} catch (IOException e) {
			System.out.println(e.getMessage());
		}

	}


posted on 2014-12-24 11:13  XIAO的博客  阅读(641)  评论(0编辑  收藏  举报

导航