提取网页的图片链接的Java程序
输入网页文件名,和资源列表文件名
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能,因为我没有时间写,相关的代码以后再贴。
第二个文件时解析文件
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能,因为我没有时间写,相关的代码以后再贴。
1
//AnalizeIMG.java
2![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
3
//主程序
4![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
5
import java.io.BufferedReader;
6
import java.io.File;
7
import java.io.FileReader;
8
import java.io.FileWriter;
9
import java.io.IOException;
10![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
11![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
12
public class AnalizeIMG {
13![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
14
public void p(String s)
15
{
16
System.out.println(s);
17
}
18![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
19
public void analizeFile(String infile,String outfile) throws Exception
20
{
21
File file = new File(infile);
22
if (file == null || !file.exists()) {
23
p("File " + infile + " not exits !");
24
}
25![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
26
if (!file.canRead()) {
27
p("File " + infile + " can't read !");
28![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
29
}
30
31
String strLine = null;
32
FileReader frd = new FileReader(infile);
33
BufferedReader bufferedReader = new BufferedReader(frd);
34
try {
35
AnalizeWebParse parse = new AnalizeWebParse();
36
String s = parse.parse(bufferedReader);
37
38
createFile(outfile,s);
39
40
} catch (Exception ex) {
41
throw ex;
42
} finally {
43
frd.close();
44
bufferedReader.close();
45
}
46
}
47![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
48
private void createFile(String filename, String content) {
49
FileWriter f = null;
50
try {
51
f = new FileWriter(filename);
52
if (f == null || content == null) {
53
return;
54
}
55![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
56
f.write(content);
57
f.flush();
58
f.close();
59![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
60
} catch (Exception e) {
61![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
62
} finally {
63
if (f != null) {
64
try {
65
f.close();
66
} catch (Exception e) {
67![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
68
}
69
}
70
}
71
}
72![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
73
public static void main(String arg[])
74
{
75
AnalizeIMG ana = new AnalizeIMG();
76
try{
77
ana.analizeFile("E:\\1.txt","E:\\out.lst");
78
}catch (Exception ex) {
79
ex.printStackTrace();
80
}
81
}
82
}
83![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
84![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
2
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
3
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
4
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
5
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
6
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
7
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
8
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
9
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
10
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
11
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
12
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockStart.gif)
13
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
14
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
15
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
16
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
17
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
18
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
19
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
20
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
21
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
22
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
23
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
24
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
25
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
26
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
27
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
28
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
29
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
30
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
31
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
32
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
33
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
34
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
35
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
36
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
37
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
38
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
39
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
40
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
41
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
42
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
43
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
44
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
45
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
46
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
47
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
48
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
49
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
50
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
51
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
52
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
53
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
54
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
55
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
56
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
57
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
58
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
59
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
60
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
61
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
62
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
63
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
64
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
65
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
66
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
67
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
68
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
69
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
70
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
71
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
72
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
73
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
74
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
75
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
76
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
77
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
78
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
79
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
80
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
81
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
82
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockEnd.gif)
83
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
84
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
第二个文件时解析文件
1
//AnalizeWebParse.java
2![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
3
//网页分析代码,需要用户根据自己需要做适当修改
4![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
5
import java.io.BufferedReader;
6
import java.io.StringReader;
7
import java.util.regex.Pattern;
8![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
9
import javax.swing.text.MutableAttributeSet;
10
import javax.swing.text.html.HTML;
11
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
12
import javax.swing.text.html.parser.ParserDelegator;
13![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
14
public class AnalizeWebParse extends ParserCallback {
15![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
16
StringBuffer sb = new StringBuffer();
17![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
18
boolean start = false;
19
boolean finished = false;
20![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
21
public void p(String s)
22
{
23
System.out.println(s);
24
}
25![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
26
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs,
27
int pos) {
28
29
if(finished == true)
30
{
31
return;
32
}
33
34
if (start == false) {
35
if (tag == HTML.Tag.DIV) {
36
String cla = (String) attribs
37
.getAttribute(HTML.Attribute.CLASS);
38
if (cla == null) {
39
return;
40
}
41![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
42
if (cla.indexOf("body") != -1) {
43
// Start
44
start = true;
45
}
46
}
47
}
48
}
49![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
50
public void handleEndTag(HTML.Tag tag, int pos) {
51
if (tag == HTML.Tag.DIV && start == true && finished == false) {
52
finished = true;
53
}
54
}
55![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
56
public void handleText(char[] text, int pos) {
57![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
58
}
59![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
60
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
61
if (t == HTML.Tag.IMG) {
62
// get a src
63
String src = (String) a.getAttribute(HTML.Attribute.SRC);
64
if (src == null) {
65
return;
66
}
67
68
if (Pattern.matches("^(http://.+)", src)) {
69
sb.append(src).append("\n");
70
}
71
}
72
}
73![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
74
public String parse(BufferedReader file) throws Exception {
75
if(file==null)
76
{
77
return null;
78
}
79
80
ParserDelegator pd = new ParserDelegator();
81
try {
82
pd.parse(file, this, true);
83
} catch (Exception e) {
84
throw e;
85
}
86
87
return sb.toString();
88
}
89
}
90![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
2
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
3
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
4
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
5
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
6
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
7
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
8
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
9
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
10
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
11
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
12
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
13
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)
14
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockStart.gif)
15
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
16
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
17
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
18
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
19
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
20
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
21
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
22
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
23
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
24
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
25
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
26
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
27
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
28
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
29
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
30
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
31
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
32
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
33
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
34
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
35
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
36
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
37
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
38
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
39
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
40
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
41
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
42
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
43
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
44
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
45
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
46
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
47
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
48
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
49
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
50
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
51
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
52
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
53
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
54
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
55
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
56
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
57
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
58
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
59
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
60
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
61
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
62
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
63
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
64
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
65
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
66
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
67
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
68
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
69
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
70
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
71
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
72
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
73
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
74
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
75
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
76
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
77
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
78
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
79
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
80
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
81
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
82
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
83
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
84
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
85
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
86
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
87
![](https://www.cnblogs.com/Images/OutliningIndicators/InBlock.gif)
88
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
89
![](https://www.cnblogs.com/Images/OutliningIndicators/ExpandedBlockEnd.gif)
90
![](https://www.cnblogs.com/Images/OutliningIndicators/None.gif)