onlyou13

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理
def get_search_github(keyword, language, pageIndex):
    params = {
        "q" : keyword,
        "type": "Repositories",
        "l": language,
        "p": pageIndex
    }
    p = urllib.parse.urlencode(params)
    url = r"https://github.com/search?" + p
    return url

def get_github_source_stars(url):
    html= requests.get(url)
    response = html.content.decode("utf-8")

    getDataPattern = r'repo-list-item[\s|\S]*?mt-n1[\s|\S]*?f4 text-normal">[\s|\S]*?>([\s|\S]*?)</a>[\s|\S]*?mb-1">([\s|\S]+?)</p>[\s|\S]*?octicon octicon-star[\s|\S]*?</path></svg>\s+(\S+)[\s|\S]*?programmingLanguage">(\S+)</span>'
    resultList = re.findall(getDataPattern, response)

    SourceList = []
    for item in resultList:
        star = str(item[2]).lstrip()
        
        k = False
        star_count = star
        if (star.lower().endswith("k")):
            k = True
            star_count = star.lower().replace("k", "").lstrip()
        star_count = float(star_count) 
        if k:
            star_count *= 1000

        sItem = {"title": str(item[0]).lstrip(), 
        "language": str(item[3]).lstrip(),
        "description": str(item[1]).lstrip(),
        "stars": star,
        "start_count": int(star_count)
        }

        SourceList.append(sItem)

    return SourceList

 

    for page in range(1, 15):
        url = get_search_github(r"stars:>10000", "python", page)
        alist = get_github_source_stars(url)

 

posted on 2020-07-04 17:52  onlyou13  阅读(129)  评论(0编辑  收藏  举报