读取XML文件

package com.app;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class Rule {

	/**
	 * 分类规则内容,键值为类别(大类#中类#小类),Value值为关键字
	 */
	private LinkedHashMap<String, ArrayList<KeyWords>> rule = new LinkedHashMap<String, ArrayList<KeyWords>>();

	/**
	 * 加载分类规则文件
	 * 
	 * @param path
	 */
	public Rule(String path) {
		if (this.rule.isEmpty()) {
			try {
				loadXml(path);
			} catch (DocumentException e) {
				e.printStackTrace();
			}
		}
	}

	/**
	 * 使用dom4j 中saxreader 获取Document容器,利用此容器的elementIterator读取xml文件
	 */
	public void loadXml(String rulePath) throws DocumentException {
		// 获取读取xml的对象
		SAXReader sr = new SAXReader();
		// 得到xml所在位置,然后开始读取,并将数据放入doc中
		Document doc = sr.read(rulePath);
		// 向外取数据,获取xml的根节点
		Element root = doc.getRootElement();
		ArrayList<KeyWords> keyWords = new ArrayList<KeyWords>();
		iteElement(root, "", keyWords);
	}

	public void iteElement(Element element, String className,
			ArrayList<KeyWords> keyWords) {
		// 遍历该子节点
		Iterator it = element.elementIterator();
		while (it.hasNext()) {
			ArrayList<KeyWords> keyWords_clone = (ArrayList<KeyWords>) keyWords
					.clone();
			// 获取节点
			Element firstClass = (Element) it.next();
			// 到达叶子节点
			if (firstClass.elements().size() == 0) {
				String word = firstClass.getText();
				String weight = firstClass.attributeValue("weight");
				KeyWords words = new KeyWords(new HashSet<String>(
						Arrays.asList(word.split("\\s+"))),
						Double.valueOf(weight));
				keyWords_clone.add(words);
				rule.put(className, keyWords_clone);
				return;
			} else {
				String dalei = firstClass.attributeValue("name");
				String feature = firstClass.attributeValue("feature");
				String weight = firstClass.attributeValue("weight");
				KeyWords firWords = new KeyWords(new HashSet<String>(
						Arrays.asList(feature.split("\\s+"))),
						Double.valueOf(weight));
				keyWords_clone.add(firWords);
				// 递归调用
				if (className.length() < 1) {
					iteElement(firstClass, className + dalei, keyWords_clone);
				} else {
					iteElement(firstClass, className + "#" + dalei,
							keyWords_clone);
				}

			}

		}

	}

	/**
	 * 每一类别的规则关键词
	 */
	class KeyWords {
		/**
		 * 关键词列表
		 */
		HashSet<String> value;
		/**
		 * 权重
		 */
		double weight;

		public KeyWords(HashSet<String> value, double weight) {
			this.value = value;
			this.weight = weight;
		}

		/**
		 * @return the value
		 */
		public HashSet<String> getValue() {
			return value;
		}

		/**
		 * @param value
		 *            the value to set
		 */
		public void setValue(HashSet<String> value) {
			this.value = value;
		}

		/**
		 * @return the weight
		 */
		public double getWeight() {
			return weight;
		}

		/**
		 * @param weight
		 *            the weight to set
		 */
		public void setWeight(double weight) {
			this.weight = weight;
		}

	}

	/**
	 * @return the rule
	 */
	public LinkedHashMap<String, ArrayList<KeyWords>> getRule() {
		return rule;
	}

	public static void main(String[] args) {
		Rule r = new Rule("rule2.xml");
		LinkedHashMap<String, ArrayList<KeyWords>> rule = r.getRule();
		for (String className : rule.keySet()) {
			System.out.println(className + "---------------");
			ArrayList<KeyWords> keyWords = rule.get(className);
			for (KeyWords words : keyWords) {
				HashSet<String> value = words.getValue();
				System.out.println(value.toString());
			}
		}
	}
}

  

posted on 2014-05-14 15:18  ywf—java  阅读(182)  评论(0编辑  收藏  举报

导航