实现关键词查询并协助Aliexpress产品SEO优化

留作纪念!python第一次让我感到兴奋!第一次感受到什么叫科技就是生产力!

还有很多不足的地方,继续学习!

python 版本:3.5.1

# -*- coding: utf-8 -*-

import openpyxl
from bs4 import BeautifulSoup
import urllib.request
from urllib.parse import quote
from multiprocessing import Pool
import time

def sortXlsx(filename):
    '''Here we read the keywords xlsx file and sort them into list'''
    work_book=openpyxl.load_workbook(str(filename))
    select_sheet=work_book.get_sheet_names()[0]
    work_sheet=work_book.get_sheet_by_name(select_sheet)

    key_words_list=[]
    for word in range(2,work_sheet.max_row+1):
           key_words_list.append(work_sheet.cell(row=word,column=2).value)

    return key_words_list

def parseKeyWords(key_words):
    '''Have the keyword searched in Aliexpress site and parse out the search count'''

    if ' ' in key_words:
        key_words=key_words.replace(' ','+')
    aliexpressUrl='http://www.aliexpress.com/wholesale?SearchText='+quote(key_words)
    data=urllib.request.urlopen(aliexpressUrl)
    soup=BeautifulSoup(data,"html.parser")
    search_count=soup.find("strong", "search-count").contents[0]
    # parse the html and find the search-count number
    print('Opening Aliexpress URL %s' % aliexpressUrl)

    return key_words, str(search_count)

def writeToXlsx(result_list, key_words_list, filename):
    '''Have the result write into a new xlsx file'''
    work_book=openpyxl.load_workbook(str(filename))
    select_sheet=work_book.get_sheet_names()[0]
    work_sheet=work_book.get_sheet_by_name(select_sheet)

    for item in result_list:
        if '+' in item[0]:
            newitem=item[0].replace('+',' ')
            index=key_words_list.index(newitem)
        else:
            index=key_words_list.index(item[0])

        work_sheet['K'+str(index+2)]=item[1]

    work_book.save('womens_result.xlsx')    # save to a new xlsx file

def main():
    filename=input('please input the .xlsx filename: ')
    key_words_list=sortXlsx(filename)

    start = time.time()
    p=Pool(processes=4)
    result_list=p.map(parseKeyWords, key_words_list)

    p.close()
    p.join()

    writeToXlsx(result_list, key_words_list, filename)
    print('all result already write to new xlsx file.')

    end = time.time()
    print('All task runs %0.2f seconds.' %(end - start))


if __name__ == '__main__':
    main()

All I want say: It is just like a kind of magic!

posted on 2016-04-21 21:44  NormalProgrammer  阅读(595)  评论(0编辑  收藏  举报

导航