有banner的uid数, 有logo的uid数 , 交集数

 [len(i) for i in [banners_uid_set,logos_uid_set,banners_uid_set&logos_uid_set]]

 

import glob
import os

os_sep = os.sep
save_dir = '/data/xiaole_dl_img/dlDBimg'
mybanner_dir, mylogo_dir, mymp3_dir, myv_dir, myhtml_dir, myv_tmp_dir = '{}{}{}'.format(save_dir, os_sep,
                                                                                        'mybanner'), '{}{}{}'.format(
    save_dir,
    os_sep,
    'mylogo'), '{}{}{}'.format(
    save_dir, os_sep, 'mymp3'), '{}{}{}'.format(save_dir, os_sep, 'myv'), '{}{}{}'.format(save_dir, os_sep,
                                                                                          'myhtml'), '{}{}{}'.format(
    save_dir, os_sep, 'myv_tmp')

# 0428UNIX50005499_32.mp3
today_s = '0428UNIX'
f_mp3_d = '{}{}{}'.format(mymp3_dir, os_sep, '*.mp3')
mp3s = glob.glob(f_mp3_d)
mp3s_uid_set = set([i.split(os_sep)[-1].split('_')[0].split(today_s)[-1] for i in mp3s])

f = 'uid.username.txt'
un_uid_d = {}
with open(f, 'r', encoding='utf-8') as fr:
    for i in fr:
        uid, un = i.replace('\n', '').split('\t')
        un_uid_d[un] = uid

had_banner_uid_l = []
# beijingshifang_BANNER_c2_20161031091307_30624.jpg
f_banner_d = '{}{}{}'.format(mybanner_dir, os_sep, '*.*g')
banners = glob.glob(f_banner_d)
banners_un_set = set([i.split(os_sep)[-1].split('_BANNER_')[0] for i in banners])
for un in banners_un_set:
    if un in un_uid_d:
        uid = un_uid_d[un]
        if uid not in had_banner_uid_l:
            had_banner_uid_l.append(uid)
banners_uid_set = set(had_banner_uid_l)
# 34031923logo.jpg
f_logo_d = '{}{}{}'.format(mylogo_dir, os_sep, '*.*g')
logos = glob.glob(f_logo_d)
logos_uid_set = set([i.split(os_sep)[-1].split('logo.')[0] for i in logos])

banners_logos_set = banners_uid_set & logos_uid_set
# [len(i) for i in [banners_uid_set,logos_uid_set,banners_uid_set&logos_uid_set]]

 

posted @ 2018-04-28 13:21  papering  阅读(257)  评论(0编辑  收藏  举报