网站更新内容:请访问: https://bigdata.ministep.cn/

base_file文件

@@ base_file文件内容

import os,re
import shutil
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from sqlalchemy import create_engine


class Base_file(object):
    def delete_xlsfile(self,path):
        for root,dirs,files in os.walk(path):##文件夹的路径
            if files:   ##判断是否有文件
                for file_name in files:  ##循环文件的名称
                    if '.xls' in file_name:  ##判断以xlsx结尾的文件是否在文件名称中
                        xls_path = os.path.join(root,file_name)
                        #print(xls_path)
                        taobao_directory = 'C:\\taobao_downloads' ##把文件copy到哪里?
                        if os.path.exists(taobao_directory):
                            
                            shutil.copy(xls_path,taobao_directory)
                            
                        else :
                            os.mkdir(taobao_directory)
                        
                        os.remove(xls_path)
            temp_delete_info=u'xls文件清空完毕'
            return(temp_delete_info)
    #path_downloads = 'C:\\Users\\Acer\\Downloads'
    #info  = delete_xlsfile(path=path_downloads)
    #########################判断xls文件是否下载成功
    ##读取浏览来源数据####################################################################   
    def xls_crdownload(self,path):
        for root,dirs,files in os.walk(path):##文件夹的路径
            if files:   ##判断是否有文件
                for file_name in files:  ##循环文件的名称            
                    if '.xls.crdownload' in file_name:  ##判断以xlsx结尾的文件是否在文件名称中
                        #xls_path = os.path.join(root,file_name)
                        print('chrome正在缓冲下载ing,请等待下载完毕')
                        time.sleep(15)
                    xls_crdownload_info = u'下载超时'
            return(xls_crdownload_info)

    #import os
    #import time 
    #path = 'C:\\Users\\Acer\\Downloads'
    #info  = xls_crdownload(path)    
    #########################读取文件
    ##读取浏览来源数据####################################################################
    def read_table(self,path,sheetname,skiprows):
        temp_read_info =None
        table_sheet = None
        for root,dirs,files in os.walk(path):##文件夹的路径
            if files:   ##判断是否有文件
                for file_name in files:  ##循环文件的名称
                    #if '.xls' in file_name:  ##判断以xlsx结尾的文件是否在文件名称中
                    if 'xls.crdownload' in file_name:
                        time.sleep(15)
                    elif file_name.endswith('xls'): ##判断以xlsx结尾的文件是否在文件名称的末尾
                    #elif '.xls' in file_name:
                        path = os.path.join(root,file_name)
                        print('文件下载成功临时存放地址--'+path+'正在向数据库写入ing')
                        data = pd.ExcelFile(path)
                        table_sheet = data.parse(sheetname=sheetname,skiprows=skiprows)
                        ##判断表中是否有日期
                        date =u'日期'
                        if  date in table_sheet.columns:
                            table_sheet['date_temp'] = table_sheet[date]
                            del table_sheet[date]
                            table_sheet['日期'] = re.search(r'\d{4}-\d{2}-\d{2}.*\d{4}-\d{2}-\d{2}',path).group(0)
                            #table_sheet['min_pathdate'] = re.findall(r'\d{4}-\d{2}-\d{2}',path)[0]
                            #table_sheet['max_pathdate'] = re.findall(r'\d{4}-\d{2}-\d{2}',path)[1]
                        else :
                            table_sheet['日期'] =  re.search(r'\d{4}-\d{2}-\d{2}.*\d{4}-\d{2}-\d{2}',path).group(0)
                            #table_sheet['min_pathdate'] = re.findall(r'\d{4}-\d{2}-\d{2}',path)[0]
                            #table_sheet['max_pathdate'] = re.findall(r'\d{4}-\d{2}-\d{2}',path)[1]
                        temp_read_info='xls文件读取成功'
                        return(temp_read_info,table_sheet)
                            
    
    #path='C:\\Users\\Acer\\Downloads'
    #sheetname = ''
    #sheetname = u''
    #business_name=''
    #info_read,sheet=read_xlsfile(path=path,sheetname=sheetname,business_name=business_name)
    
    def to_mysql(self,local_table,server_table):
        try: 
            engine = create_engine("mysql+pymysql://step:123456@172.16.57.72/tmall?charset=utf8")            
            local_table.to_sql(name=server_table,con=engine,if_exists='append',index=False,chunksize=10000)
            temp_sql_info='数据库写入成功'
            engine.drop
            print(temp_sql_info)
        except:
            temp_sql_info='数据库写入失败'
            print(temp_sql_info)
        return(temp_sql_info)
    
    #local=sheet
    #sever_table_name='temp_10'
    #info_sql = to_mysql(local_table=local,server_table= sever_table_name)
    
    
    def login_taobao(self,username,password):
        
        ##下载路径位置和谷歌selenium驱动器位置
        options = webdriver.ChromeOptions()
        
        prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': 'C:\\downloads_taobao'}
        
        options.add_experimental_option('prefs', prefs)

        
        driver = webdriver.Chrome("C:\\Users\\Acer\\Desktop\\git\\selenium\\chromedriver_win32\\chromedriver.exe",chrome_options=options)
        
        driver.get('https://sycm.taobao.com/custom/login.htm')
        
        
        frame = driver.find_element_by_xpath('//*[@id="app"]/div/section[1]/div/div[1]/iframe')
        
        driver.switch_to.frame(frame)
        
        
        driver.find_element_by_xpath('//*[@id="TPL_username_1"]').send_keys(username)
        
    
        driver.find_element_by_xpath('//*[@id="TPL_password_1"]').send_keys(password)
    
        ##
        driver.find_element_by_xpath('//*[@id="J_SubmitStatic"]').submit()
        time.sleep(5)
        #print (driver.page_source)
        
        driver.implicitly_wait(30)
        
        ##直接定位到主页
        driver.get('https://sycm.taobao.com/portal/index.htm?spm=a21ag.8106233.logo.1.kxzbwq')
        
        ##浏览器最大化
        driver.maximize_window()
        
        temp_login_info='登陆成功'
        
        return(temp_login_info,driver)
posted @ 2021-10-05 11:23  ministep88  阅读(70)  评论(0编辑  收藏  举报
网站更新内容:请访问:https://bigdata.ministep.cn/