正则表达式
所有对于正则表达式的操作位于java.util.regex包下。
两个重要的类:Matcher Pattern
package com.anllin.regex; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Matches { public static void main(String[] args) { Matcher matcher = Pattern.compile("\\w+").matcher( "this is a regex test, the fisrt program"); while (matcher.find()) { System.out.println("[" + matcher.group() + "]"); } System.out.println("---------------------------"); int i = 0; while (matcher.find(i)) { System.out.print("[" + matcher.group() + "]"); i++; } } } |
输出结果
[this] [is] [a] [regex] [test] [the] [fisrt] [program] --------------------------- [this][his][is][s][is][is][s][a][a][regex][regex][egex][gex][ex][x][test][test][est][st][t][the][the][the][he][e][fisrt][fisrt][isrt][srt][rt][t][program][program][rogram][ogram][gram][ram][am][m] |
分组
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Group { public static void main(String[] args) { String statement = "this is a test about the regex group, the method
groupCount is used"; Matcher m = Pattern.compile("(?m)(\\S+)\\s+((\\S+)\\s+(\\S+))") .matcher(statement); while (m.find()) { for (int i = 0; i < m.groupCount(); i++) { System.out.println("[" +
m.group(i) + "]"); } } } } |
输出结果
[this is a] [this] [is a] [is] [test about the] [test] [about the] [about] [regex group,
the] [regex] [group, the] [group,] [method
groupCount is] [method] [groupCount is] [groupCount] |
Start() 和end()的使用
import java.util.regex.Matcher; import java.util.regex.Pattern; public class StartEnd { public static void main(String[] args) { String[] input = new String[] { "java has regular expressing in 1.4", "regular expressing now expressing in java", "java represses oracular expressions" }; Pattern p1 = Pattern.compile("re\\w*"); Pattern p2 = Pattern.compile("java.*"); for (int i = 0; i < input.length; i++) { System.out.println("input" + i + ":" +
input[i]); Matcher m1 = p1.matcher(input[i]); Matcher m2 = p2.matcher(input[i]); while (m1.find()) { System.out.println("m1.find() [" + m1.group() + "] start =
" + m1.start() + ",end = " + m1.end()); } while (m2.find()) { System.out.println("m2.find() [" + m2.group() + "] start =
" + m2.start() + ",end = " + m2.end()); } if(m1.lookingAt()) { System.out.println("m1.lookingAt() [" + m1.group() + "] start =
" + m1.start() + ",end = " + m1.end()); } if(m2.lookingAt()) { System.out.println("m2.lookingAt() [" + m2.group() + "] start =
" + m2.start() + ",end = " + m2.end()); } if(m1.matches()) { System.out.println("m1.matches() [" + m1.group() + "] start =
" + m1.start() + ",end = " + m1.end()); } if(m2.matches()) { System.out.println("m2.matches() [" + m2.group() + "] start =
" + m2.start() + ",end = " + m2.end()); } System.out.println(); } } } |
输出结果:
input0:java has
regular expressing in 1.4 m1.find()
[regular] start = 9,end = 16 m1.find()
[ressing] start = 20,end = 27 m2.find() [java
has regular expressing in 1.4] start = 0,end = 34 m2.lookingAt()
[java has regular expressing in 1.4] start = 0,end = 34 m2.matches()
[java has regular expressing in 1.4] start = 0,end = 34 input1:regular
expressing now expressing in java m1.find()
[regular] start = 0,end = 7 m1.find()
[ressing] start = 11,end = 18 m1.find()
[ressing] start = 26,end = 33 m2.find() [java]
start = 37,end = 41 m1.lookingAt()
[regular] start = 0,end = 7 input2:java
represses oracular expressions m1.find()
[represses] start = 5,end = 14 m1.find()
[ressions] start = 27,end = 35 m2.find() [java
represses oracular expressions] start = 0,end = 35 m2.lookingAt()
[java represses oracular expressions] start = 0,end = 35 m2.matches()
[java represses oracular expressions] start = 0,end = 35 |
模式标记
public static Pattern compile(String regex,int flags)
import java.util.regex.Matcher; import java.util.regex.Pattern; public class Flag { public static void main(String[] args) { Pattern p = Pattern.compile("^java", Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); Matcher m = p.matcher("java has regex \njava has regex \n" + "JAVA has pretty good regular expressions\n" + "Regular expressions are in java"); while (m.find()) { System.out.println(m.group()); } } } |
输出结果: java java JAVA |
Split()方法的使用
public String[] split(CharSequence input,int limit)
public String[] split(CharSequence input)
import java.util.Arrays; import java.util.regex.Pattern; public class SplitDemo { public static void main(String[] args) { String input = "This!!unusual use!!of exclamation!!points"; System.out.println(Arrays.asList(Pattern.compile("!!").split(input))); System.out .println(Arrays.asList(Pattern.compile("!!").split(input,
3))); System.out.println(Arrays.asList("Aha! String has a split() built in" .split(" "))); } } |
输出结果:
[This, unusual
use, of exclamation, points] [This, unusual
use, of exclamation!!points] [Aha!, String,
has, a, split(), built, in] |
替换操作
import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ReplaceTest { public static void main(String[] args) throws Exception { BufferedReader reader = new BufferedReader(new
InputStreamReader( new FileInputStream("src/com/anllin/regex/ReplaceTest.java"))); String str = null; StringBuffer sb = new StringBuffer(); while (null != (str =
reader.readLine())) { sb.append(str); } String s = sb.toString(); // Match the specially-commented block of text above: Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL) .matcher(s); if (mInput.find()) { // Captured by parentheses s = mInput.group(1); } // Replace two or more spaces with a single space: s = s.replaceAll(" {2,}", " "); // Replace one or more spaces at the beginning of each line with no // spaces.Must enable MULTILINE mode. s = s.replaceAll("(?m)^+", ""); System.out.println(s); s = s.replaceFirst("[aeiou]", "(VOWEL1)"); StringBuffer sbuf = new StringBuffer(); Pattern p = Pattern.compile("[aeiou]"); Matcher m = p.matcher(s); // Process the find information as you perform the replacements: while (m.find()) { m.appendReplacement(sbuf,
m.group().toUpperCase()); } // Put in the remainder of ther text: m.appendTail(sbuf); System.out.println(sbuf); } } |
输出结果
package
com.anllin.regex;import java.io.BufferedReader;import
java.io.FileInputStream;import java.io.InputStreamReader;import
java.util.regex.Matcher;import java.util.regex.Pattern;public class
ReplaceTest{ public static void
main(String[] args) throws Exception { BufferedReader reader = new
BufferedReader(new InputStreamReader( new
FileInputStream("src/com/anllin/regex/ReplaceTest.java"))); String str = null; StringBuffer sb = new StringBuffer(); while (null != (str = reader.readLine())) { sb.append(str); } String
s = sb.toString(); // Match the
specially-commented block of text above: Matcher
mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL) .matcher(s); if (mInput.find()) { //
Captured by parentheses s =
mInput.group(1); } // Replace two or more spaces with a
single space: s =
s.replaceAll(" {2,}", " "); // Replace one or more spaces at the beginning of each line
with no // spaces.Must enable
MULTILINE mode. s =
s.replaceAll("(?m)^+", ""); System.out.println(s); s = s.replaceFirst("[aeiou]",
"(VOWEL1)"); StringBuffer
sbuf = new StringBuffer(); Pattern
p = Pattern.compile("[aeiou]"); Matcher m = p.matcher(s); // Process the find information as you
perform the replacements: while
(m.find()) { m.appendReplacement(sbuf,
m.group().toUpperCase()); } // Put in the remainder of ther text: m.appendTail(sbuf); System.out.println(sbuf); }} -------------------------------------------------------------------- p(VOWEL1)ckAgE
cOm.AnllIn.rEgEx;ImpOrt jAvA.IO.BUffErEdREAdEr;ImpOrt
jAvA.IO.FIlEInpUtStrEAm;ImpOrt jAvA.IO.InpUtStrEAmREAdEr;ImpOrt
jAvA.UtIl.rEgEx.MAtchEr;ImpOrt jAvA.UtIl.rEgEx.PAttErn;pUblIc clAss
REplAcETEst{ pUblIc stAtIc vOId
mAIn(StrIng[] Args) thrOws ExcEptIOn { BUffErEdREAdEr rEAdEr = nEw BUffErEdREAdEr(nEw
InpUtStrEAmREAdEr( nEw
FIlEInpUtStrEAm("src/cOm/AnllIn/rEgEx/REplAcETEst.jAvA"))); StrIng str = nUll; StrIngBUffEr sb = nEw StrIngBUffEr(); whIlE (nUll != (str = rEAdEr.rEAdLInE())) { sb.AppEnd(str); } StrIng
s = sb.tOStrIng(); // MAtch thE
spEcIAlly-cOmmEntEd blOck Of tExt AbOvE: MAtchEr
mInpUt = PAttErn.cOmpIlE("/\\*!(.*)!\\*/", PAttErn.DOTALL) .mAtchEr(s); If (mInpUt.fInd()) { //
CAptUrEd by pArEnthEsEs s =
mInpUt.grOUp(1); } // REplAcE twO Or mOrE spAcEs wIth A
sInglE spAcE: s =
s.rEplAcEAll(" {2,}", " "); // REplAcE OnE Or mOrE spAcEs At thE bEgInnIng Of EAch lInE
wIth nO // spAcEs.MUst EnAblE
MULTILINE mOdE. s =
s.rEplAcEAll("(?m)^+", ""); SystEm.OUt.prIntln(s); s
= s.rEplAcEFIrst("[AEIOU]", "(VOWEL1)"); StrIngBUffEr sbUf = nEw StrIngBUffEr(); PAttErn p =
PAttErn.cOmpIlE("[AEIOU]"); MAtchEr m = p.mAtchEr(s); // PrOcEss thE fInd InfOrmAtIOn As yOU
pErfOrm thE rEplAcEmEnts: whIlE
(m.fInd()) { m.AppEndREplAcEmEnt(sbUf,
m.grOUp().tOUppErCAsE()); } // PUt In thE rEmAIndEr Of thEr tExt: m.AppEndTAIl(sbUf); SystEm.OUt.prIntln(sbUf); }} |
Reset()方法的使用
import java.util.regex.Matcher; import java.util.regex.Pattern; public class ResetTest { public static void main(String[] args) { Matcher m = Pattern.compile("[frb][aiu][gx]").matcher( "fix the rug with bags"); while (m.find()) { System.out.println(m.group()); } m.reset("fix the rig with rags"); while (m.find()) { System.out.println(m.group()); } } } |
Output:
fix rug bag fix rig rag |
Summary of regular-expression
constructs
|