获取免费代理IP库

#!/usr/local/bin/python3
# coding:utf-8

# ====================================================
# Author: chang - EMail:changbo@hmg100.com
# Last modified: 2017-4-22
# Filename: iplibrary.py
# Description: get ip library files,base urlib, re
# blog:http://www.cnblogs.com/changbo
# ====================================================

import urllib.request
import re


def filter_tags(htmlstr):
    re_cdata = re.compile('//<!\[CDATA\[[^>]*//\]\]>', re.I)  # 匹配CDATA
    re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I)  # Script
    re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I)  # style
    # re_br = re.compile('<br\s*?/?>')  # 处理换行
    re_h = re.compile('</?\w+[^>]*>')  # HTML标签
    re_comment = re.compile('<!--[^>]*-->')  # HTML注释
    s = re_cdata.sub('', htmlstr)  # 去掉CDATA
    s = re_script.sub('', s)  # 去掉SCRIPT
    s = re_style.sub('', s)  # 去掉style
    # s = re_br.sub('\n', s)  # 将br转换为换行
    # s = re_h.sub('', s)  # 去掉HTML 标签
    s = re_comment.sub('', s)
    blank_line = re.compile('\n+')
    s = blank_line.sub('\n', s)
    return s


def getiplist(ipnumber):
    # url = 'http://ip.taobao.com/service/getIpInfo.php?ip=%s' % ip
    url = 'http://www.89ip.cn/api/?&tqsl=%d&sxa=&sxb=&tta=&ports=&ktip=&cf=1' % ipnumber
    f = urllib.request.Request(url)
    f.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0')
    response = ((urllib.request.urlopen(f)).read()).decode('gbk')
    ipinfo = filter_tags(response)
    # print(response)
    iplist = (((ipinfo.split('<br/>')[1])[:-46]).strip()).split('<BR>')
    for i in iplist:
        iptmp = i.split(':')
        ip = iptmp[0]
        port = iptmp[1]
        print(ip + ' ---- ' + port)
getiplist(30)

END!

posted @ 2017-04-22 13:38  知_行  阅读(1041)  评论(0编辑  收藏  举报