将网页中的table导入excel2007

公司为了测试单板的性能,使用firefox工具查看每个请求的,因此使用了firebug的YSlow工具,但是每个选项都必须复制黏贴很是麻烦,因此这里写了一个小程序,将网页中的table导入到excel中

 

以前使用的jxl.jar这个工具包不支持office2007,为了解决这方面的问题,可以使用apache的POI。

 

现在大家已经慢慢的熟悉了office2007的操作,因此我也就用07作为示范了例子

 

模拟一个table例子

View Code
<html> 
<head>
<TITLE>class_obj_js_class</TITLE>
<body >
<table>
<tr>
<td style="color:#00ff00">01</td>
<td>02</td>
<td>03</td>
</tr>
<tr>
<td>11
</td>
<td>12</td>
<td>13</td>
</tr>
<tr>
<td style="color:#ff0000">2
1</td>
<td>22</td>
<td>23</td>
</tr>
</table>
</body>
</html>

读取网页中的table

package hb.com;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class TableToExcel {

//因为网页中可能对标签设置样式,因此只能根据前面的一部分可以判断
private static String ROW_FLAG_BEGIN = "<tr";
private static String ROW_FLAG_END = "</tr";
private static String COL_FLAG_BEGIN = "<td";
private static String COL_FLAG_END = "</td";
private static int NOT_HAS_STRING = -1;

public static void main(String[] args) {

try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("temp.html")));
String temp;
String result = "";
//XSSFWorkbook创建的对象可以操作office2007,而HSSFWorkbook()创建的对象只能控制2003之前的版本
Workbook wb = new XSSFWorkbook();
// Workbook wb = new HSSFWorkbook();
Sheet sheet = wb.createSheet("huangbiao test");
int begin,end;
int rowNum = -1, colNum = -1;
Row row = null;
Cell cell = null;
//判断某一列内容是否已经结束
boolean hasContent = false;

while((temp = br.readLine()) != null){
begin = temp.indexOf(TableToExcel.COL_FLAG_BEGIN);
end = temp.indexOf(TableToExcel.COL_FLAG_END);
if(temp.indexOf(TableToExcel.ROW_FLAG_BEGIN) != TableToExcel.NOT_HAS_STRING){
rowNum = rowNum + 1;
colNum = -1;
row = sheet.createRow(rowNum);
}else{

}
//当前行有<td>标签
if(begin != TableToExcel.NOT_HAS_STRING)
{
colNum = colNum + 1;
hasContent = true;
cell = row.createCell(colNum);
//表明<td></td>两个标签在同一行
if(end != TableToExcel.NOT_HAS_STRING)
{
result = TableToExcel.getInSameRow(temp);
cell.setCellValue(result);
hasContent = false;
}else{
//表明<td></td>两个标签不在同一行,且只有开始标签,没有结束标签
result = TableToExcel.getInDiffRow(temp);
}

}
//当前行没有<td>标签
else{
//当前行有</td>标签且没有开始标签,需要把前面的数据插入到excel中
if(end != TableToExcel.NOT_HAS_STRING)
{
result = result + TableToExcel.getInBeforRow(temp);
cell.setCellValue(result);
hasContent = false;
}
}
}

// Write the output to a file
FileOutputStream fileOut = new FileOutputStream("test.xlsx");
wb.write(fileOut);
fileOut.close();

} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

//获取<td></td>标签中间的字符串
public static String getInSameRow(String str){
//<(.*)> .* <\/\1>
Pattern p = Pattern.compile("(<td(.*)>)(.*)<\\/td>");
Matcher m = p.matcher(str);
//m.find()方法一定不能少,否则后面的group()方法通不过
System.out.println(m.find());
System.out.println(m.group(3));
return m.group(3);
}

//获取<td标签后面的字符串
public static String getInDiffRow(String str){
//<(.*)> .* <\/\1>
Pattern p = Pattern.compile("(<td(.*)>)(.*)");
Matcher m = p.matcher(str);
//m.find()方法一定不能少,否则后面的group()方法通不过
System.out.println(m.find());
System.out.println(m.group(3));
return m.group(3);
}

//获取结束行前面的字符串
public static String getInBeforRow(String str){
//<(.*)> .* <\/\1>
Pattern p = Pattern.compile("(.*)<\\/td>");
Matcher m = p.matcher(str);
//m.find()方法一定不能少,否则后面的group()方法通不过
System.out.println(m.find());
System.out.println(m.group(1));
return m.group(1);
}
}

读取excel中的数据,然后做相应的修改

View Code
package hb.com;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class ReadExcel {

public static String fileName = "test.xlsx";

public static void main(String[] args) {
ReadExcel.readAndRewrite(fileName);
}

public static void readAndRewrite(String fileName){
InputStream inp;
try {
inp = new FileInputStream(fileName);
Workbook wb = WorkbookFactory.create(inp);
//遍历所有的数据
ReadExcel.IteratorRowCell(wb);
//将excel的第一行第一列的数据改为huangbiao
Sheet sheet = wb.getSheetAt(0);
Row row = sheet.getRow(0);
Cell cell = row.getCell(0);
//给指定的单元格设定值
if (cell != null){
cell.setCellValue("huangbiao");
}
//设定完值之后然后通过数据流写入文件中保存起来
FileOutputStream fileOut = new FileOutputStream(fileName);
wb.write(fileOut);
fileOut.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

//遍历所有的数据
public static void IteratorRowCell(Workbook wb){
Sheet sheet = wb.getSheetAt(0);
for (Iterator<Row> rit = sheet.rowIterator(); rit.hasNext(); ) {
Row row = rit.next();
for (Iterator<Cell> cit = row.cellIterator(); cit.hasNext(); ) {
Cell cell = cit.next();
// Do something here
System.out.println(cell);
}
}
}
}

 

posted @ 2012-01-10 20:18  胖鹅  阅读(424)  评论(0编辑  收藏  举报