使用正则表达式,取得点击次数,函数抽离

import re
'''
1.用正则表达式判断邮箱是否输入正确
2.用正则表达式识别出全部的电话号码(这里用的是固话模式)
3.用正则表达式进行英文单词的分词
'''


def em_match(e):
    try:
        c = re.findall(r'[0-9a-zA-Z][0-9a-zA-z\_]*\@[a-z]+\.[c,o,m,n]+', str(e))
        if c == e:
            print('Success!')
        else:
            print('Wrong layout')
    except Exception :
        print('Wrong layout')
        pass


def tel_match(n):
    try:
        nn = re.findall(r'[0-9]{3,5}-[0-9]{6,8}', str(n))[0]
        if nn == n:
            print('Success!')
        else:
            print('Wrong layout')
    except Exception :
        print('Wrong layout')
        pass


def word_split(mm):
    mess = re.split(r"\s", str(mm))
    print(mess)


e = input("请输入您的email:")
em_match(e)

n = input("请输入您的电话:")
tel_match(n)

m = '''
Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation.
 This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the 
 flames of withering injustice. It came as a joyous daybreak to end the long night of bad captivity.
   '''
word_split(m)
  

  

import re
import requests
from bs4 import BeautifulSoup


def get_ncoding(url):
    coding = re.findall(r'_[0-9]+/(\d+)\.html', str(url))[0]
    return str(coding)


def click_count(num):
    c_url = 'http://oa.gzcc.cn/api.php?op=count&id=' + str(num) + '&modelid=80'
    # print(c_url)
    resc = requests.get(c_url)
    resc.encoding = 'utf-8'
    soupc = BeautifulSoup(resc.text, 'html.parser')
    # print(soupc)
    count = re.findall(r'.html\(\'(\d+)\'\);', str(soupc))[-1]
    # print(count)
    return str(count)


def get_new_details(url):
    res = requests.get(url)
    res.encoding = 'utf-8'
    soup = BeautifulSoup(res.text, 'html.parser')
    title = soup.select('.show-title')[0].get_text()
    info = soup.select('.show-info')[0].get_text()
    content = soup.select('.show-content')[0].get_text().strip()
    return title, info, content


if __name__ == '__main__':
    n_url = 'http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0404/9183.html'
    num_news = int(get_ncoding(n_url))
    cl_times = int(click_count(num_news))
    tit = get_new_details(n_url)[0]
    inf = get_new_details(n_url)[1]
    cont = get_new_details(n_url)[2]
    print(num_news, '\n', cl_times, '\n', tit, '\n', inf, '\n', cont)

  

posted on 2018-04-11 13:25  133饶敏  阅读(130)  评论(0编辑  收藏  举报

导航