

两个重要的类:Matcher  Pattern

package com.anllin.regex;


import java.util.regex.Matcher;

import java.util.regex.Pattern;


public class Matches


    public static void main(String[] args)


       Matcher matcher = Pattern.compile("\\w+").matcher(

              "this is a regex test, the fisrt program");


       while (matcher.find())


           System.out.println("[" + matcher.group() + "]");





       int i = 0;

       while (matcher.find(i))


           System.out.print("[" + matcher.group() + "]");



















import java.util.regex.Matcher;

import java.util.regex.Pattern;


public class Group


    public static void main(String[] args)


       String statement = "this is a test about the regex group, the method groupCount is used";


       Matcher m = Pattern.compile("(?m)(\\S+)\\s+((\\S+)\\s+(\\S+))")



       while (m.find())


           for (int i = 0; i < m.groupCount(); i++)


              System.out.println("[" + m.group(i) + "]");






[this is a]


[is a]


[test about the]


[about the]


[regex group, the]


[group, the]


[method groupCount is]


[groupCount is]



Start() end()的使用

import java.util.regex.Matcher;

import java.util.regex.Pattern;


public class StartEnd


    public static void main(String[] args)


       String[] input = new String[] { "java has regular expressing in 1.4",

              "regular expressing now expressing in java",

              "java represses oracular expressions" };


       Pattern p1 = Pattern.compile("re\\w*");

       Pattern p2 = Pattern.compile("java.*");


       for (int i = 0; i < input.length; i++)


           System.out.println("input" + i + ":" + input[i]);


           Matcher m1 = p1.matcher(input[i]);

           Matcher m2 = p2.matcher(input[i]);


           while (m1.find())


              System.out.println("m1.find() [" + m1.group() + "] start = "

                     + m1.start() + ",end = " + m1.end());



           while (m2.find())


              System.out.println("m2.find() [" + m2.group() + "] start = "

                     + m2.start() + ",end = " + m2.end());





              System.out.println("m1.lookingAt() [" + m1.group() + "] start = "

                     + m1.start() + ",end = " + m1.end());





              System.out.println("m2.lookingAt() [" + m2.group() + "] start = "

                     + m2.start() + ",end = " + m2.end());





              System.out.println("m1.matches() [" + m1.group() + "] start = "

                     + m1.start() + ",end = " + m1.end());





              System.out.println("m2.matches() [" + m2.group() + "] start = "

                     + m2.start() + ",end = " + m2.end());








input0:java has regular expressing in 1.4

m1.find() [regular] start = 9,end = 16

m1.find() [ressing] start = 20,end = 27

m2.find() [java has regular expressing in 1.4] start = 0,end = 34

m2.lookingAt() [java has regular expressing in 1.4] start = 0,end = 34

m2.matches() [java has regular expressing in 1.4] start = 0,end = 34


input1:regular expressing now expressing in java

m1.find() [regular] start = 0,end = 7

m1.find() [ressing] start = 11,end = 18

m1.find() [ressing] start = 26,end = 33

m2.find() [java] start = 37,end = 41

m1.lookingAt() [regular] start = 0,end = 7


input2:java represses oracular expressions

m1.find() [represses] start = 5,end = 14

m1.find() [ressions] start = 27,end = 35

m2.find() [java represses oracular expressions] start = 0,end = 35

m2.lookingAt() [java represses oracular expressions] start = 0,end = 35

m2.matches() [java represses oracular expressions] start = 0,end = 35




public static Pattern compile(String regex,int flags)

import java.util.regex.Matcher;

import java.util.regex.Pattern;


public class Flag


    public static void main(String[] args)


       Pattern p = Pattern.compile("^java", Pattern.CASE_INSENSITIVE

              | Pattern.MULTILINE);

       Matcher m = p.matcher("java has regex \njava has regex \n"

              + "JAVA has pretty good regular expressions\n"

              + "Regular expressions are in java");


       while (m.find())













public String[] split(CharSequence input,int limit)

public String[] split(CharSequence input)

import java.util.Arrays;

import java.util.regex.Pattern;


public class SplitDemo


    public static void main(String[] args)


       String input = "This!!unusual use!!of exclamation!!points";




              .println(Arrays.asList(Pattern.compile("!!").split(input, 3)));

       System.out.println(Arrays.asList("Aha! String has a split() built in"

              .split(" ")));




[This, unusual use, of exclamation, points]

[This, unusual use, of exclamation!!points]

[Aha!, String, has, a, split(), built, in]


import java.io.BufferedReader;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import java.util.regex.Matcher;

import java.util.regex.Pattern;


public class ReplaceTest


    public static void main(String[] args) throws Exception


       BufferedReader reader = new BufferedReader(new InputStreamReader(

              new FileInputStream("src/com/anllin/regex/ReplaceTest.java")));


       String str = null;

       StringBuffer sb = new StringBuffer();

       while (null != (str = reader.readLine()))





       String s = sb.toString();


       // Match the specially-commented block of text above:

       Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL)



       if (mInput.find())


           // Captured by parentheses

           s = mInput.group(1);


       // Replace two or more spaces with a single space:

       s = s.replaceAll(" {2,}", " ");

       // Replace one or more spaces at the beginning of each line with no

       // spaces.Must enable MULTILINE mode.

       s = s.replaceAll("(?m)^+", "");



       s = s.replaceFirst("[aeiou]", "(VOWEL1)");

       StringBuffer sbuf = new StringBuffer();


       Pattern p = Pattern.compile("[aeiou]");

       Matcher m = p.matcher(s);

       // Process the find information as you perform the replacements:

       while (m.find())


           m.appendReplacement(sbuf, m.group().toUpperCase());


       // Put in the remainder of ther text:







package com.anllin.regex;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.regex.Matcher;import java.util.regex.Pattern;public class ReplaceTest{  public static void main(String[] args) throws Exception    {      BufferedReader reader = new BufferedReader(new InputStreamReader(              new FileInputStream("src/com/anllin/regex/ReplaceTest.java")));    String str = null;       StringBuffer sb = new StringBuffer();     while (null != (str = reader.readLine()))       {          sb.append(str);       }      String s = sb.toString();       // Match the specially-commented block of text above:      Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL)               .matcher(s);      if (mInput.find())       {          // Captured by parentheses          s = mInput.group(1);     }      // Replace two or more spaces with a single space:       s = s.replaceAll(" {2,}", " ");       // Replace one or more spaces at the beginning of each line with no     // spaces.Must enable MULTILINE mode.     s = s.replaceAll("(?m)^+", "");     System.out.println(s);       s = s.replaceFirst("[aeiou]", "(VOWEL1)");    StringBuffer sbuf = new StringBuffer();      Pattern p = Pattern.compile("[aeiou]");       Matcher m = p.matcher(s);       // Process the find information as you perform the replacements:       while (m.find())     {          m.appendReplacement(sbuf, m.group().toUpperCase());     }      // Put in the remainder of ther text:     m.appendTail(sbuf);      System.out.println(sbuf);   }}


p(VOWEL1)ckAgE cOm.AnllIn.rEgEx;ImpOrt jAvA.IO.BUffErEdREAdEr;ImpOrt jAvA.IO.FIlEInpUtStrEAm;ImpOrt jAvA.IO.InpUtStrEAmREAdEr;ImpOrt jAvA.UtIl.rEgEx.MAtchEr;ImpOrt jAvA.UtIl.rEgEx.PAttErn;pUblIc clAss REplAcETEst{  pUblIc stAtIc vOId mAIn(StrIng[] Args) thrOws ExcEptIOn    {      BUffErEdREAdEr rEAdEr = nEw BUffErEdREAdEr(nEw InpUtStrEAmREAdEr(              nEw FIlEInpUtStrEAm("src/cOm/AnllIn/rEgEx/REplAcETEst.jAvA")));    StrIng str = nUll;       StrIngBUffEr sb = nEw StrIngBUffEr();     whIlE (nUll != (str = rEAdEr.rEAdLInE()))       {          sb.AppEnd(str);       }      StrIng s = sb.tOStrIng();       // MAtch thE spEcIAlly-cOmmEntEd blOck Of tExt AbOvE:      MAtchEr mInpUt = PAttErn.cOmpIlE("/\\*!(.*)!\\*/", PAttErn.DOTALL)               .mAtchEr(s);      If (mInpUt.fInd())       {          // CAptUrEd by pArEnthEsEs          s = mInpUt.grOUp(1);     }      // REplAcE twO Or mOrE spAcEs wIth A sInglE spAcE:       s = s.rEplAcEAll(" {2,}", " ");       // REplAcE OnE Or mOrE spAcEs At thE bEgInnIng Of EAch lInE wIth nO     // spAcEs.MUst EnAblE MULTILINE mOdE.     s = s.rEplAcEAll("(?m)^+", "");     SystEm.OUt.prIntln(s);      s = s.rEplAcEFIrst("[AEIOU]", "(VOWEL1)");    StrIngBUffEr sbUf = nEw StrIngBUffEr();      PAttErn p = PAttErn.cOmpIlE("[AEIOU]");       MAtchEr m = p.mAtchEr(s);       // PrOcEss thE fInd InfOrmAtIOn As yOU pErfOrm thE rEplAcEmEnts:       whIlE (m.fInd())     {          m.AppEndREplAcEmEnt(sbUf, m.grOUp().tOUppErCAsE());     }      // PUt In thE rEmAIndEr Of thEr tExt:     m.AppEndTAIl(sbUf);      SystEm.OUt.prIntln(sbUf);   }}




import java.util.regex.Matcher;

import java.util.regex.Pattern;


public class ResetTest


    public static void main(String[] args)


       Matcher m = Pattern.compile("[frb][aiu][gx]").matcher(

              "fix the rug with bags");


       while (m.find())





       m.reset("fix the rig with rags");


       while (m.find())
















Summary of regular-expression constructs







The character x


The backslash character


The character with octal value 0n (0 <= n <= 7)


The character with octal value 0nn (0 <= n <= 7)


The character with octal value 0mnn (0 <= m <= 3, 0 <= n <= 7)


The character with hexadecimal value 0xhh


The character with hexadecimal value 0xhhhh


The tab character ('\u0009')


The newline (line feed) character ('\u000A')


The carriage-return character ('\u000D')


The form-feed character ('\u000C')


The alert (bell) character ('\u0007')


The escape character ('\u001B')


The control character corresponding to x



Character classes


a, b, or c (simple class)


Any character except a, b, or c (negation)


a through z or A through Z, inclusive (range)


a through d, or m through p: [a-dm-p] (union)


d, e, or f (intersection)


a through z, except for b and c: [ad-z] (subtraction)


a through z, and not m through p: [a-lq-z](subtraction)



Predefined character classes


Any character (may or may not match line terminators)


A digit: [0-9]


A non-digit: [^0-9]


A whitespace character: [ \t\n\x0B\f\r]


A non-whitespace character: [^\s]


A word character: [a-zA-Z_0-9]


A non-word character: [^\w]



POSIX character classes (US-ASCII only)


A lower-case alphabetic character: [a-z]


An upper-case alphabetic character:[A-Z]


All ASCII:[\x00-\x7F]


An alphabetic character:[\p{Lower}\p{Upper}]


A decimal digit: [0-9]


An alphanumeric character:[\p{Alpha}\p{Digit}]


Punctuation: One of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


A visible character: [\p{Alnum}\p{Punct}]


A printable character: [\p{Graph}\x20]


A space or a tab: [ \t]


A control character: [\x00-\x1F\x7F]


A hexadecimal digit: [0-9a-fA-F]


A whitespace character: [ \t\n\x0B\f\r]



java.lang.Character classes (simple java character type)


Equivalent to java.lang.Character.isLowerCase()


Equivalent to java.lang.Character.isUpperCase()


Equivalent to java.lang.Character.isWhitespace()


Equivalent to java.lang.Character.isMirrored()



Classes for Unicode blocks and categories


A character in the Greek block (simple block)


An uppercase letter (simple category)


A currency symbol


Any character except one in the Greek block (negation)


Any letter except an uppercase letter (subtraction)



Boundary matchers


The beginning of a line


The end of a line


A word boundary


A non-word boundary


The beginning of the input


The end of the previous match


The end of the input but for the final terminator, if any


The end of the input



Greedy quantifiers


X, once or not at all


X, zero or more times


X, one or more times


X, exactly n times


X, at least n times


X, at least n but not more than m times



Reluctant quantifiers


X, once or not at all


X, zero or more times


X, one or more times


X, exactly n times


X, at least n times


X, at least n but not more than m times



Possessive quantifiers


X, once or not at all


X, zero or more times


X, one or more times


X, exactly n times


X, at least n times


X, at least n but not more than m times



Logical operators


X followed by Y


Either X or Y


X, as a capturing group



Back references


Whatever the nth capturing group matched





Nothing, but quotes the following character


Nothing, but quotes all characters until \E


Nothing, but ends quoting started by \Q



Special constructs (non-capturing)


X, as a non-capturing group


Nothing, but turns match flags i d m s u x on - off


X, as a non-capturing group with the given flags i d m s u x on - off


X, via zero-width positive lookahead


X, via zero-width negative lookahead


X, via zero-width positive lookbehind


X, via zero-width negative lookbehind


X, as an independent, non-capturing group


