go 抓取页面邮箱

package main

import (
	"flag"
	"fmt"
	"io/ioutil"
	"net/http"
	"regexp"
)
var (
	QQEmall=`\d+@qq.com`
	reEmall=`\w+@\w+\.\w`
	url string
)

func GetEmall(url string){
	//1.选定目标
	pageStr :=GetPageStr(url)
	//fmt.Println(pageStr)
	//过滤邮箱
	exp:=regexp.MustCompile(reEmall)
	email:=exp.FindAllStringSubmatch(pageStr,-1)
	for _,v :=range email{
		fmt.Println(v[0])
	}
}
//抽取根据url获取内容
func GetPageStr(url string)(pageStr string){
	resp,err:=http.Get(url)
	HandleError(err,"http err")
	defer resp.Body.Close()
	//读取页面内容
	respByte,err :=ioutil.ReadAll(resp.Body)
	HandleError(err,"ioutil err")
	pageStr =string(respByte)
	return pageStr
}
//错误处理
func HandleError(err error,message string){
	if err !=nil{
		fmt.Println(err)
	}

}
////定义命令行参数对应的变量
//var urlName=flag.StringVar(&url,"url","https://tieba.baidu.com/p/6505861166","输入抓取地址")

func init(){
	flag.StringVar(&url,"url","https://tieba.baidu.com/p/6505861166","输入抓取地址")
}
func main(){
	flag.Parse() //解析所有命令行参数
   // fmt.Println(url) //"https://tieba.baidu.com/p/6505861166"
	GetEmall(url)
}

 

 

  

posted on 2022-01-27 13:59  kevin_yang123  阅读(72)  评论(0编辑  收藏  举报