网站更新内容:请访问: https://bigdata.ministep.cn/

selenium_base_file

python\小脚本\selenium_base_file.py

# -*- coding: utf-8 -*-
"""
Created on Wed Feb 28 19:40:07 2018

@author: zhoujunqing
"""

# -*- coding: utf-8 -*-
"""
Created on Mon Jan 09 18:08:58 2017

@author: Acer
"""
import os,re
import shutil
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from sqlalchemy import create_engine
#import sys
#reload(sys)
#sys.setdefaultencoding("utf-8")
#################删除xls文件

class Base_file(object):
    def delete_xlsfile(self,path):
        for root,dirs,files in os.walk(path):##文件夹的路径
            if files:   ##判断是否有文件
                for file_name in files:  ##循环文件的名称
                    if '.csv' in file_name:  ##判断以xlsx结尾的文件是否在文件名称中
                        xls_path = os.path.join(root,file_name)
                        #print(xls_path)
                        taobao_directory = 'E:\\taobao_downloads' ##把文件copy到哪里?
                        if os.path.exists(taobao_directory):
                            
                            shutil.copy(xls_path,taobao_directory)
                            
                        else :
                            os.mkdir(taobao_directory)
                        
                        os.remove(xls_path)
            temp_delete_info=u'csv文件清空完毕'
            return(temp_delete_info)
    #path_downloads = 'C:\\Users\\Acer\\Downloads'
    #info  = delete_xlsfile(path=path_downloads)
    #########################判断xls文件是否下载成功
    ##读取浏览来源数据####################################################################   
    def is_crdownload(self,path):
        xls_crdownload_info = None
        for root,dirs,files in os.walk(path):##文件夹的路径
            if files:   ##判断是否有文件
                for file_name in files:  ##循环文件的名称            
                    if '.crdownload' in file_name:  ##判断以xlsx结尾的文件是否在文件名称中
                        #xls_path = os.path.join(root,file_name)
                        print('chrome正在缓冲下载ing,请等待下载完毕')
                        time.sleep(15)
                    xls_crdownload_info = u'下载超时'
            return(xls_crdownload_info)

    #import os
    #import time 
    #path = 'C:\\Users\\Acer\\Downloads'
    #info  = xls_crdownload(path)    
    #########################读取文件
    ##读取浏览来源数据####################################################################
    def read_table(self,path):
        temp_read_info =None
        table_sheet = None
        for root,dirs,files in os.walk(path):##文件夹的路径
            if files:   ##判断是否有文件
                for file_name in files:  ##循环文件的名称
                    #if '.xls' in file_name:  ##判断以xlsx结尾的文件是否在文件名称中
                    if '.crdownload' in file_name:
                        time.sleep(15)
                    elif file_name.endswith('.csv'): ##判断以xlsx结尾的文件是否在文件名称的末尾
                    #elif '.xls' in file_name:
                        path = os.path.join(root,file_name)
                        print('文件下载成功临时存放地址--'+path+'正在向数据库写入ing')
                        table_sheet=pd.read_csv(path,encoding='oem',engine='python')
                        ##判断表中是否有日期
                        temp_read_info='xls文件读取成功'
                        return(temp_read_info,table_sheet)
                            
    
    #path='C:\\Users\\Acer\\Downloads'
    #sheetname = 'PC流量来源'
    #sheetname = u'无线流量来源'
    #business_name='波奇网旗舰店'
    #info_read,sheet=read_xlsfile(path=path,sheetname=sheetname,business_name=business_name)
    
    def to_mysql(self,local_table,server_table):
        try: 
            engine = create_engine("mysql+pymysql://root:kemi336699@127.0.0.1/hujiang?charset=utf8")            
            local_table.to_sql(name=server_table,con=engine,if_exists='replace',index=False,chunksize=10000)
            temp_sql_info='数据库写入成功'
            engine.drop
            print(temp_sql_info)
        except:
            temp_sql_info='数据库写入失败'
            print(temp_sql_info)
        return(temp_sql_info)
    
    #local=sheet
    #sever_table_name='temp_10'
    #info_sql = to_mysql(local_table=local,server_table= sever_table_name)
    

base_file = Base_file()
path =r'E:\selenium_chrome_download'
base_file.delete_xlsfile(path)
base_file.is_crdownload(path)
temp_read_info,table_sheet = base_file.read_table(path)
base_file.to_mysql(local_table=table_sheet,server_table='xiaoe')

posted @ 2021-03-13 10:13  ministep88  阅读(55)  评论(0编辑  收藏  举报
网站更新内容:请访问:https://bigdata.ministep.cn/