java正则表达式

案例一:对QQ号进行验证

  要求:必须是0-15位,0不能开头,只能是数字

在没学过正则表达式,首先想到的是:

public static void check()
    {
        String qq="0123019";
        int len = qq.length();
        if(len>=5&&len<=15)
        {
            if(!qq.startsWith("0"))
            {
                char[] arr= qq.toCharArray();
                boolean flag = true;
                for(int x=0;x<arr.length;x++)
                {
                    if(arr[x]>='0'&&arr[x]<=9)
                    {
                        flag=false;
                    }
                }
                if(flag)
                {
                    System.out.println("QQ:"+qq);
                }
                else{
                    System.out.println("出现非法字符");
                }
            }
            else{
                System.out.println("不可以以0开头");
            }
        }
        else{
            System.out.println("小于5或者大于15");
        }
        
    }

再优化一下,可以这么写:

public static void check()
    {
        String qq="0123019";
        int len = qq.length();
        if(len>=5&&len<=15)
        {
            if(!qq.startsWith("0"))
            {   
                try {
                long l = Long.parseLong(qq);
                } catch (NumberFormatException e) {
                // TODO Auto-generated catch block
                System.out.println("数字异常");
                //e.printStackTrace();
                } 
            }
            else{
                System.out.println("不可以以0开头");
            }
        }
        else{
            System.out.println("小于5或者大于15");
        }    
    }

}

接下来,最重要,简便的正则表达式派上用场:

public static void regeCheck()
    {
        String qq = "012345";
        String rege = "[1-9][0-9]{4,14}";
        //【1-9】代表第一位只能是1-9的数字
        //【0-9】{4,14}表示后边是0-0的数字,并且是在4-14位之间
        boolean flag = qq.matches(rege);
        System.out.println(flag);
        if(flag){
            System.out.println(qq+"  is OK");
        }
        else{
            System.out.println(qq+"  is error!");
        }    
    }

 

public static void getMails()
    {
        //System.out.println("getMails");
        try {
            URL url = new URL("https://mail.qq.com/cgi-bin/frame_html?sid=tSk9WTbKfPKOCCVy&r=0cf16a3eabaaac83f3df241863066baf");
            URLConnection conn = null;
            try {
                conn = url.openConnection();
                System.out.println("getMails");
            } catch (IOException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            BufferedReader bufIn = null;
            try {
                bufIn = new BufferedReader(
                        new InputStreamReader(conn.getInputStream()));
            } catch (IOException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            String line = null;
            //String mailreg = "\\w+@\\w+(\\.\\w)+";
            String mailreg ="[\\d.][\\d]";
            Pattern p = Pattern.compile(mailreg);
            try {
                while((line=bufIn.readLine())!=null)
                {  //System.out.println("oks");
                    Matcher m = p.matcher(line);
                    //System.out.println(line);
                    while(m.find())
                    {  System.out.println("getMails");
                        System.out.println(m.group());
                    }
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        } catch (MalformedURLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        
    }
  •  正则表达式(替换)
//这是函数,将字符串,规则,和替换的新字符串传入执行 
public static void repReplace(String str,String reg,String newstr)
     {
         str = str.replaceAll(reg,newstr);
         System.out.println(str);
     }
String str = "saee1213edfe32fdsfeeeewfersa1232434";
         //1.连续三个数字就替换成#
         repReplace(str,"\\d{3,}","#"); 
         //2.将叠词替换成(固定字符)&
         repReplace(str,"(.)\\1+","&");
         //3.将叠词替换成原来的一个字符 例如 qqq--q;
         repReplace(str,"(.)\\1+","$1");
  • 正则表达式(获取)

作用:将字符串中符合规则的字符串取出

步骤:1.将正则表达式封装成对象   Pattern

         2.让正则对象和要操作的字符相关联 

         3.关联后,获取正则匹配引擎

    4.通过引擎对符合规则的子串进行操作,比如取出。

 

爬虫,获取网页的信息

package Reptile;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Set;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class JsoupDownloader {
     public static final String DEFAULT_SAVE_DIR = "D:/download/";
        private static JsoupDownloader downloader;

        private JsoupDownloader() {
        }

        public static JsoupDownloader getInstance() {
            if (downloader == null) {
                synchronized (JsoupDownloader.class) {
                    if (downloader == null) {
                        downloader = new JsoupDownloader();
                    }
                }
            }
            return downloader;
        }

        public Document downloadPage(String url) {
            try {
                System.out.println("正在下载" + url);
                return Jsoup.connect(url).get();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return null;
        }
        public Set<String> parsePage(Document doc, String regex) {
            Set<String> urlSet = new HashSet<>();
            if (doc != null) {
                Elements elements = doc.select("a[href]");
                for (Element element : elements) {
                    String url = element.attr("href");
                    if (url.length() > 6 && !urlSet.contains(url)) {
                        System.out.println(url);
                        
                        if (regex != null && !url.matches(regex)) {
                            continue;
                        }
                        urlSet.add(url);
                    }
                }
            }
            return urlSet;
        }

        public void savePage(Document doc, String saveDir, String saveName, String regex) {
            if (doc == null) {
                return;
            }
            if (regex != null && doc.baseUri() != null && !doc.baseUri().matches(regex)) {
                return;
            }
            saveDir = saveDir == null ? DEFAULT_SAVE_DIR : saveDir;
            saveName = saveName == null ? doc.title().trim().replaceAll("[\\?/:\\*|<>\" ]", "_") + System.nanoTime() + ".html" : saveName;
            File file = new File(saveDir + "/" + saveName);
            File dir = file.getParentFile();
            if (!dir.exists()) {
                dir.mkdirs();
            }
            PrintWriter printWriter;
            try {
                printWriter = new PrintWriter(file); 
                printWriter.write(doc.toString());
                printWriter.close();
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
}
package Reptile;
import java.util.*;
public class UrlQueue {
     private Set<String> visitedSet;// 用来存放已经访问过多url
        private LinkedList<String> unvisitedList;// 用来存放未访问过多url

        public UrlQueue(String[] seeds) {
            visitedSet = new HashSet<>();
            unvisitedList = new LinkedList<>();
            unvisitedList.addAll(Arrays.asList(seeds));
        }

        /**
         * 添加url
         * 
         * @param url
         */
        public void enQueue(String url) {
            if (url != null && !visitedSet.contains(url)) {
                unvisitedList.addLast(url);
            }
        }

        /**
         * 添加url
         * 
         * @param urls
         */
        public void enQueue(Collection<String> urls) {
            for (String url : urls) {
                enQueue(url);
            }
        }

        /**
         * 取出url
         * 
         * @return
         */
        public String deQueue() {
            try {
                String url = unvisitedList.removeFirst();
                while(visitedSet.contains(url)) {
                    url = unvisitedList.removeFirst();
                }
                visitedSet.add(url);
                return url;
            } catch (NoSuchElementException e) {
                System.err.println("URL取光了");
            }
            return null;
        }

        /**
         * 得到已经请求过的url的数目
         * 
         * @return
         */
        public int getVisitedCount() {
            return visitedSet.size();
        }
}
package Reptile;
import java.util.*;
import org.jsoup.*;
import org.jsoup.Jsoup;
import java.util.Set;
import org.jsoup.nodes.Document;
public class Main {
     public static void main(String[] args) {
         //StartUrl("http://www.cnblogs.com/artech/");
         //StartUrl("http://www.innocellence.com/d/en/");
        // String reg = "\\w*\\:\\/\\/\\w*\\.\\w*\\.\\w*\\/";
         //StartUrl("http://www.stats.dl.gov.cn","[\\?/:\\*|<>\" ]");
         //StartUrl("http://www.innocellence.com","[\\?/:\\*|<>\" ]");
        // String s = getUrlList("http://www.innocellence.com");
         String str = "saee1213edfe32fdsfeeeewfersa1232434";
         //1.连续三个数字就替换成#
         repReplace(str,"\\d{3,}","#"); 
         //2.将叠词替换成(固定字符)&
         repReplace(str,"(.)\\1+","&");
         //3.将叠词替换成原来的一个字符 例如 qqq--q;
         repReplace(str,"(.)\\1+","$1");
     }
     public static void repReplace(String str,String reg,String newstr)
     {
         str = str.replaceAll(reg,newstr);
         System.out.println(str);
     }
     
     public static String getUrlList(String str)
     { 
         String s = "Start";
     UrlQueue urlQueue = new UrlQueue(new String[]{str});
     JsoupDownloader downloader = JsoupDownloader.getInstance();
     long start = System.currentTimeMillis();
     while (urlQueue.getVisitedCount() < 5) {
         String url = urlQueue.deQueue();
         System.out.println(url);
          //s=url;
         // return url;
         if (url == null) {
             break;
         }
         Document doc = downloader.downloadPage(url);
         if (doc == null) {
             continue;
         }
         Set<String> urlSet = downloader.parsePage(doc,"\\w*");
         urlQueue.enQueue(urlSet);
        // downloader.savePage(doc, "C:\\Users\\JiangJianMing\\Desktop\\Test\\", null, "\\w*"); 
       
        // System.out.println("已请求" + urlQueue.getVisitedCount() + "个页面");
         }
       return s;
     }
     public static void StartUrl(String str,String reg)
     {
            UrlQueue urlQueue = new UrlQueue(new String[]{str});
            JsoupDownloader downloader = JsoupDownloader.getInstance();
            long start = System.currentTimeMillis();
            while (urlQueue.getVisitedCount() < 5) {
                String url = urlQueue.deQueue();
                System.out.println(str+url);
                if (url == null) {
                    break;
                }
                Document doc = downloader.downloadPage(url);
                if (doc == null) {
                    continue;
                }
              //  Set<String> urlSet = downloader.parsePage(doc, "(http://www.cnblogs.com/artech/p|http://www.cnblogs.com/artech/default|http://www.cnblogs.com/artech/archive/\\d{0}/\\d{2}/\\d{2}/).*");
               // urlQueue.enQueue(urlSet);
              //  downloader.savePage(doc, "C:\\Users\\JiangJianMing\\Desktop\\Test\\", null, "(http://www.cnblogs.com/artech/p|http://www.cnblogs.com/artech/default|http://www.cnblogs.com/artech/archive/\\d{0}/\\d{2}/\\d{2}/).*");
                Set<String> urlSet = downloader.parsePage(doc,reg);
                urlQueue.enQueue(urlSet);
                downloader.savePage(doc, "C:\\Users\\JiangJianMing\\Desktop\\Test\\", null, reg); 
              
                System.out.println("已请求" + urlQueue.getVisitedCount() + "个页面");
            }
            long end = System.currentTimeMillis();
            System.out.println(">>>>>>>>>>抓去完成,共抓取" + urlQueue.getVisitedCount() + "到个页面,用时" + ((end - start) / 1000) + "s<<<<<<<<<<<<");
     }
}
  •  java连接oracle数据库

            java连接oracle数据库大概可以分为以下几个步骤,在做这些步骤之前,我们需要引入jar包,点击项目,鼠标右键-》Properties,在弹出窗口中,点击左侧的Java Build Path, 在右侧选项卡选择Libraries,再再右侧的按钮中点击Add External JARs...然后选择jar包,如果安装了oracle数据库的话,默认我的在D:\Program\Oracle\product\11.2.0\dbhome_1\jdbc\lib下的ojdbc5.jar这样,就可以开始我们代码的编写了

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class Link {

    public static void main(String[] args) {
        
        //1.加载驱动
        try {
            Class.forName("oracle.jdbc.driver.OracleDriver");
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        try {
            //2.得到连接
            Connection ct = DriverManager.getConnection("jdbc:oracle:thin:@127.0.0.1:1521:orcl","scott","tiger");
            Statement sm = ct.createStatement();
            ResultSet rs = sm.executeQuery("select * from emp");
             while(rs.next())
             {     
              System.out.println(rs.getString(1));
                           
             }
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

}

 

posted @ 2016-07-06 21:24  Blue眼泪2016  阅读(398)  评论(0编辑  收藏  举报