原帖:Java 之正则表达式
JDK中的正则表达式处理代码:
JDK正则表达式
1 package example.regularexpressions;
2
3 import java.util.regex.MatchResult;
4 import java.util.regex.Matcher;
5 import java.util.regex.Pattern;
6
7 import junit.framework.TestCase;
8
9 public class Basics extends TestCase {
10
11 /**
12 * Pattern类:
13 * Pattern类的静态方法compile用来编译正则表达式,在此[,\\s]+表示若干个","或者若干个空格匹配
14 * split方法使用正则匹配将字符串切割成各子串并且返回
15 * @throws Exception
16 */
17 public void _test1() throws Exception {
18 Pattern pattern = Pattern.compile("[,\\s]+");
19 String[] result = pattern.split("one two three,four , five,six");
20 for (String str : result) {
21 System.out.println(str);
22 }
23 }
24
25 /**
26 * Matcher类:
27 * 注意,Matcher的获得是通过Pattern.matcher(CharSequence charSequence);输入必须是实现了CharSequence接口的类
28 * 常用方法:
29 * matches()判断整个输入串是否匹配,整个匹配则返回true
30 * lookingAt()从头开始寻找,找到匹配则返回true
31 * @throws Exception
32 */
33 public void _test2() throws Exception {
34 String str1 = "hello";
35 Pattern pattern1 = Pattern.compile("hello");
36 Matcher matcher1 = pattern1.matcher(str1);
37 System.out.println("matcher1.matches()=>" + matcher1.matches());
38
39 String str2 = "hello world";
40 Pattern pattern2 = Pattern.compile("hello");
41 Matcher matcher2 = pattern2.matcher(str2);
42 System.out.println("matcher2.matches()=>" + matcher2.matches());
43 System.out.println("matcher2.lookingAt()=>" + matcher2.lookingAt());
44 }
45
46 /**
47 * find()扫描输入串,寻找下一个匹配子串,存在则返回true
48 * @throws Exception
49 */
50 public void _test3() throws Exception {
51 Pattern pattern = Pattern.compile("hello");
52 Matcher matcher = pattern.matcher("hello world, hello world, hello_world");
53 StringBuffer sb = new StringBuffer();
54 boolean find = matcher.find();
55 while(find) {
56 matcher.appendReplacement(sb, "haha"); //实现非终端添加和替换步骤
57 find = matcher.find();
58 System.out.println("sb=>" + sb);
59 }
60 matcher.appendTail(sb); //实现终端添加和替换步骤
61 System.out.println(sb.toString());
62 }
63
64 /**
65 * 匹配IP地址
66 * IP地址中的句点字符必须进行转义处理(前面加上“\”),因为IP地址中的句点具有它本来的含义,
67 * 而不是采用正则表达式语法中的特殊含义。句点在正则表达式中的特殊含义本文前面已经介绍。
68 * 日志记录的时间部分由一对方括号包围。你可以按照如下思路提取出方括号里面的所有内容:
69 * 首先搜索起始方括号字符(“[”),提取出所有不超过结束方括号字符(“]”)的内容,向前寻找直至找到结束方括号字符。
70 * @throws Exception
71 */
72 public void _test4() throws Exception {
73 String logEntry = "192.168.0.1 - - [26/Feb/2009:14:56:43 -0500]\"GET /lsAlive/ht HTTP/1.0\"200 15\r\n"
74 +"192.168.0.2 - - [25/Feb/2009:14:56:43 -0500]\"GET /lsAlive/ht HTTP/1.0\"200 15";
75 String regexp = "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})\\s-\\s-\\s\\[([^\\]]+)\\]";
76 Pattern pattern = Pattern.compile(regexp);
77 Matcher matcher = pattern.matcher(logEntry);
78 boolean find = matcher.find();
79 while(find) {
80 MatchResult result = matcher.toMatchResult();
81 System.out.println("IP=>" + result.group(1));
82 System.out.println("Timestamp=>" + result.group(2));
83 find = matcher.find();
84 }
85 }
86
87 /**
88 * HTML处理
89 * 分析HTML页面内FONT标记的所有属性
90 * @throws Exception
91 */
92 public void _test5() throws Exception {
93 String html = "<font face=\"Arial Serif\" size=\"+2\" color=\"red\">";
94 String regexForTag = "<\\s*font\\s+([^>]*)\\s*>";
95
96 Pattern pattern = Pattern.compile(regexForTag, Pattern.CASE_INSENSITIVE);
97 Matcher matcher = pattern.matcher(html);
98
99 boolean find = matcher.find();
100
101 String attribute = matcher.group(1);
102 System.out.println("属性字符串为:" + attribute);
103
104 String regexForAttribute = "([a-z]+)\\s*=\\s*\"([^\"]+)\"";
105 Pattern pattern2 = Pattern.compile(regexForAttribute, Pattern.CASE_INSENSITIVE);
106 Matcher matcher2 = pattern2.matcher(attribute);
107
108 boolean find2 = matcher2.find();
109
110 while(find2) {
111 MatchResult result = matcher2.toMatchResult();
112 System.out.println(result.group(1) + "=" + result.group(2));
113 find2 = matcher2.find();
114 }
115 }
116
117 /**
118 * HTML处理
119 * 修改一些页面中的链接
120 * @throws Exception
121 */
122 public void test6() throws Exception {
123 String url = "<a href=\"http://192.168.0.1:8080/test/index.jsp#test...\">"
124 + "< a href = \"http://192.168.0.1:8080/test/index.jsp#?hahahaha...\">";
125 String regex = "(<\\s*a\\s+href\\s*=\\s*\"http://192.168.0.1:8080/test/index.jsp[^\"]+\">)";
126 Pattern pattern = Pattern.compile(regex);
127 Matcher matcher = pattern.matcher(url);
128 boolean find = matcher.find();
129 System.out.println("find=>" + find);
130 while(find) {
131 MatchResult result = matcher.toMatchResult();
132 String temp = result.group(1);
133 System.out.println("替换前=>" + temp);
134 temp = temp.replace("192.168.0.1", "localhost");
135 System.out.println("替换后=>" + temp);
136 find = matcher.find();
137 }
138 }
139
140 /**
141 * 4种常用功能:
142 * 1、查询:
143 * 如果str中有regEx,那么rs为true,否则为flase。如果想在查找时忽略大小写,
144 * 则可以写成Pattern p=Pattern.compile(regEx,Pattern.CASE_INSENSITIVE);
145 * @throws Exception
146 */
147 public void _testQuery() throws Exception {
148 String str = "abc efg ABC";
149 String regEx = "a|f";
150 Pattern pattern = Pattern.compile(regEx);
151 Matcher matcher = pattern.matcher(str);
152 boolean rs = matcher.find();
153 System.out.println("rs=>" + rs);
154 }
155
156 /**
157 * 2、提取:
158 * 执行结果为name.txt,提取的字符串储存在m.group(i)中,其中i最大值为m.groupCount();
159 * @throws Exception
160 */
161 public void _testGet() throws Exception {
162 String regEx = ".+\\\\(.+)$";
163 String str = "c:\\dir1\\dir2\\name.txt";
164 Pattern pattern = Pattern.compile(regEx);
165 Matcher matcher = pattern.matcher(str);
166 boolean rs = matcher.find();
167 for (int i = 1; i <= matcher.groupCount(); i++) {
168 System.out.println(matcher.group(i));
169 }
170 }
171
172 /**
173 * 3、分割:
174 * @throws Exception
175 */
176 public void _testSplit() throws Exception {
177 String regex = "::";
178 Pattern pattern = Pattern.compile(regex);
179 String[] result = pattern.split("aa::bb::cc");
180 for (String str : result)
181 System.out.println("result=>" + str);
182
183 System.out.println("---------");
184 String[] normal = "aa::bb::cc".split(regex);
185 for (String str : normal)
186 System.out.println("nornal=>" + str);
187 }
188
189 /**
190 * 4、替换(删除):
191 * 如果写成空串,既可达到删除的功能
192 * @throws Exception
193 */
194 public void _testReplaceOrDelete() throws Exception {
195 String regex = "a+";
196 Pattern pattern = Pattern.compile(regex);
197 Matcher matcher = pattern.matcher("aaabbced a ccdeaa");
198 System.out.println("replaceFirst=>" + matcher.replaceFirst("A"));
199 String result = matcher.replaceAll("A");
200 System.out.println("replaceAll=>" + result);
201 String delete = matcher.replaceAll("");
202 System.out.println("替换为空即可达到删除的功能");
203 }
204
205 @Override
206 protected void setUp() throws Exception {
207 // TODO Auto-generated method stub
208 super.setUp();
209 }
210
211 @Override
212 protected void tearDown() throws Exception {
213 // TODO Auto-generated method stub
214 super.tearDown();
215 }
216
217 }
218