使用SAX解析msn聊天记录文档的java小程序
SAX是一种基于事件驱动模式的XML解析API,好处是快,而且节省内存,坏处是程序编写起来相对复杂,而且给人感觉不够OO。最好用的还是DOM,但是占内存太多,极限的时候会引起OutOfMemory异常。
闲话就说道这里,下面开始说程序。sun的JDK1.5中包含我下面程序将用到的所有import的类,所以运行的时候不需要第三方jar包。
//-------------------------------解析消息的主类----------------------------------------
package sax;
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SAXParser extends DefaultHandler {
private String filename = null;
// 定义msn消息中的标签名称,为解析作准备
private static final String FirstSessionID = "FirstSessionID";
private static final String LastSessionID = "LastSessionID";
private static final String Message = "Message";
private static final String Date = "Data";
private static final String Time = "Time";
private static final String DateTime = "DateTime";
private static final String SessionID = "SessionID";
private static final String From = "From";
private static final String To = "To";
private static final String Log = "Log";
private static final String FriendlyName = "FriendlyName";
private static final String User = "User";
private static final String Text = "Text";
// 保存一条聊天信息的内容,在消息结束的时候打印
private String data;
private String time;
private String dataTime;
private int sessionID;
private String from;
private String to;
private String text;
// 标记是不是可以打印一个消息了
private boolean textComming = false;
public SAXParser(String filename) {
this.filename = filename;
}
public void showMessage() {
// boolean validation = false;
// 创建一个解析工厂
SAXParserFactory spf = SAXParserFactory.newInstance();
// 声明一个解析器
javax.xml.parsers.SAXParser saxParser = null;
try {
// 创建一个解析器,并借西文档,将this作为事件监听器
saxParser = spf.newSAXParser();
saxParser.parse(new File(filename), this);
} catch (SAXException se) {
System.err.println(se.getMessage());
System.exit(1);
} catch (IOException ioe) {
System.err.println(ioe);
System.exit(1);
} catch (ParserConfigurationException e) {
System.err.println(e);
System.exit(1);
}
}
/*
* SAX会将URL中的一些特殊符号做出一些不符合我们想象的分割,可以尝试将这个判断去掉,然后打开一个包含&等特殊符号的msn聊天记录看看会发生什么
*/
private void dealMessage() {
//
if (from == null && to == null) {
logWithoutLinebreak(this.text);
} else {
log("(" + this.dataTime + ")" + from + " 对 " + to + " 说:"
+ this.text);
this.from = null;
this.to = null;
this.text = null;
}
}
// 以下动作会按照顺序发生,其中startElement-characters-endElement是循环的。
@Override
public void startDocument() throws SAXException {
super.startDocument();
log("--------开始打印信息---------");
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
if (Message.equalsIgnoreCase(qName)) {
this.data = attributes.getValue(Date);
this.time = attributes.getValue(Time);
this.dataTime = attributes.getValue(DateTime);
} else if (User.equalsIgnoreCase(qName)) {
if (this.from == null) {
this.from = attributes.getValue("FriendlyName");
} else {
this.to = attributes.getValue("FriendlyName");
}
} else if (Text.equalsIgnoreCase(qName)) {
textComming = true;
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
super.characters(ch, start, length);
if (this.textComming) {
this.text = new String(ch, start, length);
dealMessage();
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
super.endElement(uri, localName, qName);
if (this.textComming) {
this.textComming = false;
}
}
@Override
public void endDocument() throws SAXException {
super.endDocument();
log("--------打印信息结束---------");
}
private void log(String str) {
System.out.print(" " + str);
}
private void logWithoutLinebreak(String str) {
System.out.print(str);
}
}
//-------测试---------
/**
*
*/
package sax;
import java.awt.FileDialog;
import java.awt.Frame;
/**
* @author zhuge
*
*/
public class XMLParserTest {
public static void main(String[] args) {
Frame f = new Frame();
FileDialog dlg = new FileDialog(f, "Open", FileDialog.LOAD);
dlg.setVisible(true);
String filename = dlg.getDirectory() + dlg.getFile();
SAXParser parser = new SAXParser(filename);
parser.showMessage();
dlg = null;
f.dispose();
f = null;
}
}
package sax;
import java.io.File;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SAXParser extends DefaultHandler {
private String filename = null;
// 定义msn消息中的标签名称,为解析作准备
private static final String FirstSessionID = "FirstSessionID";
private static final String LastSessionID = "LastSessionID";
private static final String Message = "Message";
private static final String Date = "Data";
private static final String Time = "Time";
private static final String DateTime = "DateTime";
private static final String SessionID = "SessionID";
private static final String From = "From";
private static final String To = "To";
private static final String Log = "Log";
private static final String FriendlyName = "FriendlyName";
private static final String User = "User";
private static final String Text = "Text";
// 保存一条聊天信息的内容,在消息结束的时候打印
private String data;
private String time;
private String dataTime;
private int sessionID;
private String from;
private String to;
private String text;
// 标记是不是可以打印一个消息了
private boolean textComming = false;
public SAXParser(String filename) {
this.filename = filename;
}
public void showMessage() {
// boolean validation = false;
// 创建一个解析工厂
SAXParserFactory spf = SAXParserFactory.newInstance();
// 声明一个解析器
javax.xml.parsers.SAXParser saxParser = null;
try {
// 创建一个解析器,并借西文档,将this作为事件监听器
saxParser = spf.newSAXParser();
saxParser.parse(new File(filename), this);
} catch (SAXException se) {
System.err.println(se.getMessage());
System.exit(1);
} catch (IOException ioe) {
System.err.println(ioe);
System.exit(1);
} catch (ParserConfigurationException e) {
System.err.println(e);
System.exit(1);
}
}
/*
* SAX会将URL中的一些特殊符号做出一些不符合我们想象的分割,可以尝试将这个判断去掉,然后打开一个包含&等特殊符号的msn聊天记录看看会发生什么
*/
private void dealMessage() {
//
if (from == null && to == null) {
logWithoutLinebreak(this.text);
} else {
log("(" + this.dataTime + ")" + from + " 对 " + to + " 说:"
+ this.text);
this.from = null;
this.to = null;
this.text = null;
}
}
// 以下动作会按照顺序发生,其中startElement-characters-endElement是循环的。
@Override
public void startDocument() throws SAXException {
super.startDocument();
log("--------开始打印信息---------");
}
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
super.startElement(uri, localName, qName, attributes);
if (Message.equalsIgnoreCase(qName)) {
this.data = attributes.getValue(Date);
this.time = attributes.getValue(Time);
this.dataTime = attributes.getValue(DateTime);
} else if (User.equalsIgnoreCase(qName)) {
if (this.from == null) {
this.from = attributes.getValue("FriendlyName");
} else {
this.to = attributes.getValue("FriendlyName");
}
} else if (Text.equalsIgnoreCase(qName)) {
textComming = true;
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
super.characters(ch, start, length);
if (this.textComming) {
this.text = new String(ch, start, length);
dealMessage();
}
}
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
super.endElement(uri, localName, qName);
if (this.textComming) {
this.textComming = false;
}
}
@Override
public void endDocument() throws SAXException {
super.endDocument();
log("--------打印信息结束---------");
}
private void log(String str) {
System.out.print(" " + str);
}
private void logWithoutLinebreak(String str) {
System.out.print(str);
}
}
//-------测试---------
/**
*
*/
package sax;
import java.awt.FileDialog;
import java.awt.Frame;
/**
* @author zhuge
*
*/
public class XMLParserTest {
public static void main(String[] args) {
Frame f = new Frame();
FileDialog dlg = new FileDialog(f, "Open", FileDialog.LOAD);
dlg.setVisible(true);
String filename = dlg.getDirectory() + dlg.getFile();
SAXParser parser = new SAXParser(filename);
parser.showMessage();
dlg = null;
f.dispose();
f = null;
}
}