提取网页的图片链接的Java程序
输入网页文件名,和资源列表文件名
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能,因为我没有时间写,相关的代码以后再贴。
第二个文件时解析文件
输出资源列表文件供迅雷下载。
适用于批量下载图片。
由两个文件组成。
没有提供网页下载功能,因为我没有时间写,相关的代码以后再贴。
1//AnalizeIMG.java
2
3//主程序
4
5import java.io.BufferedReader;
6import java.io.File;
7import java.io.FileReader;
8import java.io.FileWriter;
9import java.io.IOException;
10
11
12public class AnalizeIMG {
13
14public void p(String s)
15{
16 System.out.println(s);
17}
18
19public void analizeFile(String infile,String outfile) throws Exception
20{
21 File file = new File(infile);
22 if (file == null || !file.exists()) {
23 p("File " + infile + " not exits !");
24 }
25
26 if (!file.canRead()) {
27 p("File " + infile + " can't read !");
28
29 }
30
31 String strLine = null;
32 FileReader frd = new FileReader(infile);
33 BufferedReader bufferedReader = new BufferedReader(frd);
34 try {
35 AnalizeWebParse parse = new AnalizeWebParse();
36 String s = parse.parse(bufferedReader);
37
38 createFile(outfile,s);
39
40 } catch (Exception ex) {
41 throw ex;
42 } finally {
43 frd.close();
44 bufferedReader.close();
45 }
46}
47
48private void createFile(String filename, String content) {
49 FileWriter f = null;
50 try {
51 f = new FileWriter(filename);
52 if (f == null || content == null) {
53 return;
54 }
55
56 f.write(content);
57 f.flush();
58 f.close();
59
60 } catch (Exception e) {
61
62 } finally {
63 if (f != null) {
64 try {
65 f.close();
66 } catch (Exception e) {
67
68 }
69 }
70 }
71}
72
73public static void main(String arg[])
74{
75 AnalizeIMG ana = new AnalizeIMG();
76 try{
77 ana.analizeFile("E:\\1.txt","E:\\out.lst");
78 }catch (Exception ex) {
79 ex.printStackTrace();
80 }
81}
82}
83
84
2
3//主程序
4
5import java.io.BufferedReader;
6import java.io.File;
7import java.io.FileReader;
8import java.io.FileWriter;
9import java.io.IOException;
10
11
12public class AnalizeIMG {
13
14public void p(String s)
15{
16 System.out.println(s);
17}
18
19public void analizeFile(String infile,String outfile) throws Exception
20{
21 File file = new File(infile);
22 if (file == null || !file.exists()) {
23 p("File " + infile + " not exits !");
24 }
25
26 if (!file.canRead()) {
27 p("File " + infile + " can't read !");
28
29 }
30
31 String strLine = null;
32 FileReader frd = new FileReader(infile);
33 BufferedReader bufferedReader = new BufferedReader(frd);
34 try {
35 AnalizeWebParse parse = new AnalizeWebParse();
36 String s = parse.parse(bufferedReader);
37
38 createFile(outfile,s);
39
40 } catch (Exception ex) {
41 throw ex;
42 } finally {
43 frd.close();
44 bufferedReader.close();
45 }
46}
47
48private void createFile(String filename, String content) {
49 FileWriter f = null;
50 try {
51 f = new FileWriter(filename);
52 if (f == null || content == null) {
53 return;
54 }
55
56 f.write(content);
57 f.flush();
58 f.close();
59
60 } catch (Exception e) {
61
62 } finally {
63 if (f != null) {
64 try {
65 f.close();
66 } catch (Exception e) {
67
68 }
69 }
70 }
71}
72
73public static void main(String arg[])
74{
75 AnalizeIMG ana = new AnalizeIMG();
76 try{
77 ana.analizeFile("E:\\1.txt","E:\\out.lst");
78 }catch (Exception ex) {
79 ex.printStackTrace();
80 }
81}
82}
83
84
第二个文件时解析文件
1//AnalizeWebParse.java
2
3//网页分析代码,需要用户根据自己需要做适当修改
4
5import java.io.BufferedReader;
6import java.io.StringReader;
7import java.util.regex.Pattern;
8
9import javax.swing.text.MutableAttributeSet;
10import javax.swing.text.html.HTML;
11import javax.swing.text.html.HTMLEditorKit.ParserCallback;
12import javax.swing.text.html.parser.ParserDelegator;
13
14public class AnalizeWebParse extends ParserCallback {
15
16StringBuffer sb = new StringBuffer();
17
18boolean start = false;
19boolean finished = false;
20
21public void p(String s)
22{
23 System.out.println(s);
24}
25
26public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs,
27 int pos) {
28
29 if(finished == true)
30 {
31 return;
32 }
33
34 if (start == false) {
35 if (tag == HTML.Tag.DIV) {
36 String cla = (String) attribs
37 .getAttribute(HTML.Attribute.CLASS);
38 if (cla == null) {
39 return;
40 }
41
42 if (cla.indexOf("body") != -1) {
43 // Start
44 start = true;
45 }
46 }
47 }
48}
49
50public void handleEndTag(HTML.Tag tag, int pos) {
51 if (tag == HTML.Tag.DIV && start == true && finished == false) {
52 finished = true;
53 }
54}
55
56public void handleText(char[] text, int pos) {
57
58}
59
60public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
61 if (t == HTML.Tag.IMG) {
62 // get a src
63 String src = (String) a.getAttribute(HTML.Attribute.SRC);
64 if (src == null) {
65 return;
66 }
67
68 if (Pattern.matches("^(http://.+)", src)) {
69 sb.append(src).append("\n");
70 }
71 }
72}
73
74public String parse(BufferedReader file) throws Exception {
75 if(file==null)
76 {
77 return null;
78 }
79
80 ParserDelegator pd = new ParserDelegator();
81 try {
82 pd.parse(file, this, true);
83 } catch (Exception e) {
84 throw e;
85 }
86
87 return sb.toString();
88}
89}
90
2
3//网页分析代码,需要用户根据自己需要做适当修改
4
5import java.io.BufferedReader;
6import java.io.StringReader;
7import java.util.regex.Pattern;
8
9import javax.swing.text.MutableAttributeSet;
10import javax.swing.text.html.HTML;
11import javax.swing.text.html.HTMLEditorKit.ParserCallback;
12import javax.swing.text.html.parser.ParserDelegator;
13
14public class AnalizeWebParse extends ParserCallback {
15
16StringBuffer sb = new StringBuffer();
17
18boolean start = false;
19boolean finished = false;
20
21public void p(String s)
22{
23 System.out.println(s);
24}
25
26public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs,
27 int pos) {
28
29 if(finished == true)
30 {
31 return;
32 }
33
34 if (start == false) {
35 if (tag == HTML.Tag.DIV) {
36 String cla = (String) attribs
37 .getAttribute(HTML.Attribute.CLASS);
38 if (cla == null) {
39 return;
40 }
41
42 if (cla.indexOf("body") != -1) {
43 // Start
44 start = true;
45 }
46 }
47 }
48}
49
50public void handleEndTag(HTML.Tag tag, int pos) {
51 if (tag == HTML.Tag.DIV && start == true && finished == false) {
52 finished = true;
53 }
54}
55
56public void handleText(char[] text, int pos) {
57
58}
59
60public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
61 if (t == HTML.Tag.IMG) {
62 // get a src
63 String src = (String) a.getAttribute(HTML.Attribute.SRC);
64 if (src == null) {
65 return;
66 }
67
68 if (Pattern.matches("^(http://.+)", src)) {
69 sb.append(src).append("\n");
70 }
71 }
72}
73
74public String parse(BufferedReader file) throws Exception {
75 if(file==null)
76 {
77 return null;
78 }
79
80 ParserDelegator pd = new ParserDelegator();
81 try {
82 pd.parse(file, this, true);
83 } catch (Exception e) {
84 throw e;
85 }
86
87 return sb.toString();
88}
89}
90