java正则表达式
案例一:对QQ号进行验证
要求:必须是0-15位,0不能开头,只能是数字
在没学过正则表达式,首先想到的是:
public static void check() { String qq="0123019"; int len = qq.length(); if(len>=5&&len<=15) { if(!qq.startsWith("0")) { char[] arr= qq.toCharArray(); boolean flag = true; for(int x=0;x<arr.length;x++) { if(arr[x]>='0'&&arr[x]<=9) { flag=false; } } if(flag) { System.out.println("QQ:"+qq); } else{ System.out.println("出现非法字符"); } } else{ System.out.println("不可以以0开头"); } } else{ System.out.println("小于5或者大于15"); } }
再优化一下,可以这么写:
public static void check() { String qq="0123019"; int len = qq.length(); if(len>=5&&len<=15) { if(!qq.startsWith("0")) { try { long l = Long.parseLong(qq); } catch (NumberFormatException e) { // TODO Auto-generated catch block System.out.println("数字异常"); //e.printStackTrace(); } } else{ System.out.println("不可以以0开头"); } } else{ System.out.println("小于5或者大于15"); } } }
接下来,最重要,简便的正则表达式派上用场:
public static void regeCheck() { String qq = "012345"; String rege = "[1-9][0-9]{4,14}"; //【1-9】代表第一位只能是1-9的数字 //【0-9】{4,14}表示后边是0-0的数字,并且是在4-14位之间 boolean flag = qq.matches(rege); System.out.println(flag); if(flag){ System.out.println(qq+" is OK"); } else{ System.out.println(qq+" is error!"); } }
public static void getMails() { //System.out.println("getMails"); try { URL url = new URL("https://mail.qq.com/cgi-bin/frame_html?sid=tSk9WTbKfPKOCCVy&r=0cf16a3eabaaac83f3df241863066baf"); URLConnection conn = null; try { conn = url.openConnection(); System.out.println("getMails"); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } BufferedReader bufIn = null; try { bufIn = new BufferedReader( new InputStreamReader(conn.getInputStream())); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } String line = null; //String mailreg = "\\w+@\\w+(\\.\\w)+"; String mailreg ="[\\d.][\\d]"; Pattern p = Pattern.compile(mailreg); try { while((line=bufIn.readLine())!=null) { //System.out.println("oks"); Matcher m = p.matcher(line); //System.out.println(line); while(m.find()) { System.out.println("getMails"); System.out.println(m.group()); } } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } catch (MalformedURLException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
- 正则表达式(替换)
//这是函数,将字符串,规则,和替换的新字符串传入执行 public static void repReplace(String str,String reg,String newstr) { str = str.replaceAll(reg,newstr); System.out.println(str); }
String str = "saee1213edfe32fdsfeeeewfersa1232434"; //1.连续三个数字就替换成# repReplace(str,"\\d{3,}","#"); //2.将叠词替换成(固定字符)& repReplace(str,"(.)\\1+","&"); //3.将叠词替换成原来的一个字符 例如 qqq--q; repReplace(str,"(.)\\1+","$1");
- 正则表达式(获取)
作用:将字符串中符合规则的字符串取出
步骤:1.将正则表达式封装成对象 Pattern
2.让正则对象和要操作的字符相关联
3.关联后,获取正则匹配引擎
4.通过引擎对符合规则的子串进行操作,比如取出。
爬虫,获取网页的信息
package Reptile; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; import java.util.HashSet; import java.util.Set; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupDownloader { public static final String DEFAULT_SAVE_DIR = "D:/download/"; private static JsoupDownloader downloader; private JsoupDownloader() { } public static JsoupDownloader getInstance() { if (downloader == null) { synchronized (JsoupDownloader.class) { if (downloader == null) { downloader = new JsoupDownloader(); } } } return downloader; } public Document downloadPage(String url) { try { System.out.println("正在下载" + url); return Jsoup.connect(url).get(); } catch (IOException e) { e.printStackTrace(); } return null; } public Set<String> parsePage(Document doc, String regex) { Set<String> urlSet = new HashSet<>(); if (doc != null) { Elements elements = doc.select("a[href]"); for (Element element : elements) { String url = element.attr("href"); if (url.length() > 6 && !urlSet.contains(url)) { System.out.println(url); if (regex != null && !url.matches(regex)) { continue; } urlSet.add(url); } } } return urlSet; } public void savePage(Document doc, String saveDir, String saveName, String regex) { if (doc == null) { return; } if (regex != null && doc.baseUri() != null && !doc.baseUri().matches(regex)) { return; } saveDir = saveDir == null ? DEFAULT_SAVE_DIR : saveDir; saveName = saveName == null ? doc.title().trim().replaceAll("[\\?/:\\*|<>\" ]", "_") + System.nanoTime() + ".html" : saveName; File file = new File(saveDir + "/" + saveName); File dir = file.getParentFile(); if (!dir.exists()) { dir.mkdirs(); } PrintWriter printWriter; try { printWriter = new PrintWriter(file); printWriter.write(doc.toString()); printWriter.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } } }
package Reptile; import java.util.*; public class UrlQueue { private Set<String> visitedSet;// 用来存放已经访问过多url private LinkedList<String> unvisitedList;// 用来存放未访问过多url public UrlQueue(String[] seeds) { visitedSet = new HashSet<>(); unvisitedList = new LinkedList<>(); unvisitedList.addAll(Arrays.asList(seeds)); } /** * 添加url * * @param url */ public void enQueue(String url) { if (url != null && !visitedSet.contains(url)) { unvisitedList.addLast(url); } } /** * 添加url * * @param urls */ public void enQueue(Collection<String> urls) { for (String url : urls) { enQueue(url); } } /** * 取出url * * @return */ public String deQueue() { try { String url = unvisitedList.removeFirst(); while(visitedSet.contains(url)) { url = unvisitedList.removeFirst(); } visitedSet.add(url); return url; } catch (NoSuchElementException e) { System.err.println("URL取光了"); } return null; } /** * 得到已经请求过的url的数目 * * @return */ public int getVisitedCount() { return visitedSet.size(); } }
package Reptile; import java.util.*; import org.jsoup.*; import org.jsoup.Jsoup; import java.util.Set; import org.jsoup.nodes.Document; public class Main { public static void main(String[] args) { //StartUrl("http://www.cnblogs.com/artech/"); //StartUrl("http://www.innocellence.com/d/en/"); // String reg = "\\w*\\:\\/\\/\\w*\\.\\w*\\.\\w*\\/"; //StartUrl("http://www.stats.dl.gov.cn","[\\?/:\\*|<>\" ]"); //StartUrl("http://www.innocellence.com","[\\?/:\\*|<>\" ]"); // String s = getUrlList("http://www.innocellence.com"); String str = "saee1213edfe32fdsfeeeewfersa1232434"; //1.连续三个数字就替换成# repReplace(str,"\\d{3,}","#"); //2.将叠词替换成(固定字符)& repReplace(str,"(.)\\1+","&"); //3.将叠词替换成原来的一个字符 例如 qqq--q; repReplace(str,"(.)\\1+","$1"); } public static void repReplace(String str,String reg,String newstr) { str = str.replaceAll(reg,newstr); System.out.println(str); } public static String getUrlList(String str) { String s = "Start"; UrlQueue urlQueue = new UrlQueue(new String[]{str}); JsoupDownloader downloader = JsoupDownloader.getInstance(); long start = System.currentTimeMillis(); while (urlQueue.getVisitedCount() < 5) { String url = urlQueue.deQueue(); System.out.println(url); //s=url; // return url; if (url == null) { break; } Document doc = downloader.downloadPage(url); if (doc == null) { continue; } Set<String> urlSet = downloader.parsePage(doc,"\\w*"); urlQueue.enQueue(urlSet); // downloader.savePage(doc, "C:\\Users\\JiangJianMing\\Desktop\\Test\\", null, "\\w*"); // System.out.println("已请求" + urlQueue.getVisitedCount() + "个页面"); } return s; } public static void StartUrl(String str,String reg) { UrlQueue urlQueue = new UrlQueue(new String[]{str}); JsoupDownloader downloader = JsoupDownloader.getInstance(); long start = System.currentTimeMillis(); while (urlQueue.getVisitedCount() < 5) { String url = urlQueue.deQueue(); System.out.println(str+url); if (url == null) { break; } Document doc = downloader.downloadPage(url); if (doc == null) { continue; } // Set<String> urlSet = downloader.parsePage(doc, "(http://www.cnblogs.com/artech/p|http://www.cnblogs.com/artech/default|http://www.cnblogs.com/artech/archive/\\d{0}/\\d{2}/\\d{2}/).*"); // urlQueue.enQueue(urlSet); // downloader.savePage(doc, "C:\\Users\\JiangJianMing\\Desktop\\Test\\", null, "(http://www.cnblogs.com/artech/p|http://www.cnblogs.com/artech/default|http://www.cnblogs.com/artech/archive/\\d{0}/\\d{2}/\\d{2}/).*"); Set<String> urlSet = downloader.parsePage(doc,reg); urlQueue.enQueue(urlSet); downloader.savePage(doc, "C:\\Users\\JiangJianMing\\Desktop\\Test\\", null, reg); System.out.println("已请求" + urlQueue.getVisitedCount() + "个页面"); } long end = System.currentTimeMillis(); System.out.println(">>>>>>>>>>抓去完成,共抓取" + urlQueue.getVisitedCount() + "到个页面,用时" + ((end - start) / 1000) + "s<<<<<<<<<<<<"); } }
-
java连接oracle数据库
java连接oracle数据库大概可以分为以下几个步骤,在做这些步骤之前,我们需要引入jar包,点击项目,鼠标右键-》Properties,在弹出窗口中,点击左侧的Java Build Path, 在右侧选项卡选择Libraries,再再右侧的按钮中点击Add External JARs...然后选择jar包,如果安装了oracle数据库的话,默认我的在D:\Program\Oracle\product\11.2.0\dbhome_1\jdbc\lib下的ojdbc5.jar这样,就可以开始我们代码的编写了
import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class Link { public static void main(String[] args) { //1.加载驱动 try { Class.forName("oracle.jdbc.driver.OracleDriver"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } try { //2.得到连接 Connection ct = DriverManager.getConnection("jdbc:oracle:thin:@127.0.0.1:1521:orcl","scott","tiger"); Statement sm = ct.createStatement(); ResultSet rs = sm.executeQuery("select * from emp"); while(rs.next()) { System.out.println(rs.getString(1)); } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }