selenium_base_file
python\小脚本\selenium_base_file.py
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 28 19:40:07 2018
@author: zhoujunqing
"""
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 09 18:08:58 2017
@author: Acer
"""
import os,re
import shutil
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from sqlalchemy import create_engine
#import sys
#reload(sys)
#sys.setdefaultencoding("utf-8")
#################删除xls文件
class Base_file(object):
def delete_xlsfile(self,path):
for root,dirs,files in os.walk(path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if '.csv' in file_name: ##判断以xlsx结尾的文件是否在文件名称中
xls_path = os.path.join(root,file_name)
#print(xls_path)
taobao_directory = 'E:\\taobao_downloads' ##把文件copy到哪里?
if os.path.exists(taobao_directory):
shutil.copy(xls_path,taobao_directory)
else :
os.mkdir(taobao_directory)
os.remove(xls_path)
temp_delete_info=u'csv文件清空完毕'
return(temp_delete_info)
#path_downloads = 'C:\\Users\\Acer\\Downloads'
#info = delete_xlsfile(path=path_downloads)
#########################判断xls文件是否下载成功
##读取浏览来源数据####################################################################
def is_crdownload(self,path):
xls_crdownload_info = None
for root,dirs,files in os.walk(path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
if '.crdownload' in file_name: ##判断以xlsx结尾的文件是否在文件名称中
#xls_path = os.path.join(root,file_name)
print('chrome正在缓冲下载ing,请等待下载完毕')
time.sleep(15)
xls_crdownload_info = u'下载超时'
return(xls_crdownload_info)
#import os
#import time
#path = 'C:\\Users\\Acer\\Downloads'
#info = xls_crdownload(path)
#########################读取文件
##读取浏览来源数据####################################################################
def read_table(self,path):
temp_read_info =None
table_sheet = None
for root,dirs,files in os.walk(path):##文件夹的路径
if files: ##判断是否有文件
for file_name in files: ##循环文件的名称
#if '.xls' in file_name: ##判断以xlsx结尾的文件是否在文件名称中
if '.crdownload' in file_name:
time.sleep(15)
elif file_name.endswith('.csv'): ##判断以xlsx结尾的文件是否在文件名称的末尾
#elif '.xls' in file_name:
path = os.path.join(root,file_name)
print('文件下载成功临时存放地址--'+path+'正在向数据库写入ing')
table_sheet=pd.read_csv(path,encoding='oem',engine='python')
##判断表中是否有日期
temp_read_info='xls文件读取成功'
return(temp_read_info,table_sheet)
#path='C:\\Users\\Acer\\Downloads'
#sheetname = 'PC流量来源'
#sheetname = u'无线流量来源'
#business_name='波奇网旗舰店'
#info_read,sheet=read_xlsfile(path=path,sheetname=sheetname,business_name=business_name)
def to_mysql(self,local_table,server_table):
try:
engine = create_engine("mysql+pymysql://root:kemi336699@127.0.0.1/hujiang?charset=utf8")
local_table.to_sql(name=server_table,con=engine,if_exists='replace',index=False,chunksize=10000)
temp_sql_info='数据库写入成功'
engine.drop
print(temp_sql_info)
except:
temp_sql_info='数据库写入失败'
print(temp_sql_info)
return(temp_sql_info)
#local=sheet
#sever_table_name='temp_10'
#info_sql = to_mysql(local_table=local,server_table= sever_table_name)
base_file = Base_file()
path =r'E:\selenium_chrome_download'
base_file.delete_xlsfile(path)
base_file.is_crdownload(path)
temp_read_info,table_sheet = base_file.read_table(path)
base_file.to_mysql(local_table=table_sheet,server_table='xiaoe')