025.2 正则表达式 练习
内容:对IP地址切割然后按数字排序;判断是不是邮箱;爬一个页面获取里面的邮箱地址
//#########练习1
对一段IP地址切割排序
private static void ipsort() { String ip_source = "192.168.1.200 17.10.10.12 3.3.46.1 127.0.0.1"; //ip每段前面补两个0 ip_source = ip_source.replaceAll("(\\d+)", "00$1"); System.out.println(ip_source); //保留每段后面的3位 ip_source = ip_source.replaceAll("0*(\\d{3})", "$1"); System.out.println(ip_source); String[] ips = ip_source.split(" +"); Arrays.sort(ips); for(String ip : ips){ System.out.println(ip.replaceAll("0*(\\d+)", "$1")); } }
//##############
判断是不是邮箱地址
private static void checkMail() { String mail = "abc123@sina.com"; String regex = "\\w+@[a-zA-Z0-9]+(\\.[a-zA-Z]{2,3}){1,3}"; boolean b = mail.matches(regex); System.out.println(mail+":"+b); }
//########################
网页爬虫
public class NetSpider { public static void main(String[] args) throws IOException { File file = new File("mail.html"); //随便一个文件,里面放多一点邮箱就行了。因为暂时没到网页部分,所以直接这样处理 String mailRegex = "\\w+@\\w+(\\.\\w+)+"; List<String> mailList = getMails(file,mailRegex); for(String s: mailList){ System.out.println(s); } } private static List<String> getMails(File file, String mailRegex) throws IOException { //1、读取数据 BufferedReader bufr = new BufferedReader(new FileReader(file)); //2、将正则封装成对象 Pattern pt = Pattern.compile(mailRegex); //3、定义List集合 List<String> list = new ArrayList<String>(); String line = null; while((line = bufr.readLine()) != null){ Matcher mt = pt.matcher(line); while(mt.find()){ list.add(mt.group()); } } return list; } }
mail.html 是随便一个贴吧很多邮箱的页面