有banner的uid数, 有logo的uid数 , 交集数
[len(i) for i in [banners_uid_set,logos_uid_set,banners_uid_set&logos_uid_set]]
import glob import os os_sep = os.sep save_dir = '/data/xiaole_dl_img/dlDBimg' mybanner_dir, mylogo_dir, mymp3_dir, myv_dir, myhtml_dir, myv_tmp_dir = '{}{}{}'.format(save_dir, os_sep, 'mybanner'), '{}{}{}'.format( save_dir, os_sep, 'mylogo'), '{}{}{}'.format( save_dir, os_sep, 'mymp3'), '{}{}{}'.format(save_dir, os_sep, 'myv'), '{}{}{}'.format(save_dir, os_sep, 'myhtml'), '{}{}{}'.format( save_dir, os_sep, 'myv_tmp') # 0428UNIX50005499_32.mp3 today_s = '0428UNIX' f_mp3_d = '{}{}{}'.format(mymp3_dir, os_sep, '*.mp3') mp3s = glob.glob(f_mp3_d) mp3s_uid_set = set([i.split(os_sep)[-1].split('_')[0].split(today_s)[-1] for i in mp3s]) f = 'uid.username.txt' un_uid_d = {} with open(f, 'r', encoding='utf-8') as fr: for i in fr: uid, un = i.replace('\n', '').split('\t') un_uid_d[un] = uid had_banner_uid_l = [] # beijingshifang_BANNER_c2_20161031091307_30624.jpg f_banner_d = '{}{}{}'.format(mybanner_dir, os_sep, '*.*g') banners = glob.glob(f_banner_d) banners_un_set = set([i.split(os_sep)[-1].split('_BANNER_')[0] for i in banners]) for un in banners_un_set: if un in un_uid_d: uid = un_uid_d[un] if uid not in had_banner_uid_l: had_banner_uid_l.append(uid) banners_uid_set = set(had_banner_uid_l) # 34031923logo.jpg f_logo_d = '{}{}{}'.format(mylogo_dir, os_sep, '*.*g') logos = glob.glob(f_logo_d) logos_uid_set = set([i.split(os_sep)[-1].split('logo.')[0] for i in logos]) banners_logos_set = banners_uid_set & logos_uid_set # [len(i) for i in [banners_uid_set,logos_uid_set,banners_uid_set&logos_uid_set]]