集合运算 字典排序 按值 按键

 

 

Python 集合set()添加删除、交集、并集、集合操作详解 - 玩蛇网 http://www.iplaypy.com/jichu/set.html

 

 

 

fad = 'adFeature.csv'
ad_d = {}
c = 0
with open(fad, 'r') as fr:
    for i in fr:
        if 'aid' in i:
            continue

        # aid, advertiserId, campaignId, creativeId, creativeSize, adCategoryId, productId, productType = i.replace('\n','').split(',')

        ad_d[c] = i.replace('\n', '').split(',')
        c += 1
ad_k_l = 'aid, advertiserId, campaignId, creativeId, creativeSize, adCategoryId, productId, productType'.split(',')
ad_k_d = {}
for i in range(len(ad_k_l)):
    k = ad_k_l[i]
    ad_k_d[k] = set([ad_d[c][i] for c in ad_d])

# <class 'list'>: [{'aid': 173}, {' advertiserId': 79}, {' campaignId': 138}, {' creativeId': 173}, {' creativeSize': 15}, {' adCategoryId': 40}, {' productId': 33}, {' productType': 4}]
ad_k_counter_d = [{k: len(ad_k_d[k])} for k in ad_k_d]
ftrain, ftest = 'train.csv', 'test1.csv'
#
train_d, train_d_aid, train_d_uid = {}, {}, {}
c = 0
with open(ftrain, 'r') as fr:
    for i in fr:
        if 'aid' in i:
            continue
        try:
            train_d[c] = aid, uid, label = [int(ii) for ii in i.replace('\n', '').split(',')]
            if aid not in train_d_aid:
                train_d_aid[aid] = []
            train_d_aid[aid].append(uid)

            if uid not in train_d_uid:
                train_d_uid[uid] = []
            train_d_uid[uid].append(aid)

            c += 1
        except Exception as e:
            print(i)
            print(e)

test_d, test_d_aid, test_d_uid = {}, {}, {}
c = 0
with open(ftest, 'r') as fr:
    for i in fr:
        if 'aid' in i:
            continue
        try:
            test_d[c] = aid, uid = [int(ii) for ii in i.replace('\n', '').split(',')]
            if aid not in test_d_aid:
                test_d_aid[aid] = []
            test_d_aid[aid].append(uid)

            if uid not in test_d_uid:
                test_d_uid[uid] = []
            test_d_uid[uid].append(aid)

            c += 1
        except Exception as e:
            print(i)
            print(e)

# 测试集人数
# len(set([uid for uid in test_d_uid])) 2195951
# 训练集人数
# len(set([uid for uid in train_d_uid])) 7883466
# 交集人数
# set([uid for uid in train_d_uid]) & set([uid for uid in test_d_uid])  392464


# 测试集广告数
# len(set([aid for aid in test_d_aid])) 173
# 训练集广告数
# len(set([aid for aid in train_d_aid])) 173
# 交集广告数
# set([aid for aid in train_d_aid]) & set([aid for aid in test_d_aid])  173



# 测试集各个广告的人数
# [{i[0]:len(i[1])} for i in sorted(test_d_aid.items(), key=lambda k: k, reverse=False)]
# <class 'list'>: [{6: 2660}, {7: 7298}, {12: 14664}, {18: 3936}, {70: 46015}, {74: 10490}, {86: 3103}, {98: 5032}, {113: 2780}, {117: 32448}, {121: 8967}, {136: 36514}, {145: 7874}, {164: 3749}, {173: 19532}, {174: 5360}, {177: 3705}, {191: 53758}, {205: 2159}, {206: 8249}, {212: 11240}, {231: 15975}, {272: 1926}, {286: 10513}, {302: 23758}, {311: 20792}, {313: 1765}, {336: 1753}, {369: 17441}, {389: 27320}, {404: 15597}, {411: 95782}, {420: 5379}, {432: 19882}, {436: 3320}, {450: 2113}, {454: 3810}, {471: 6691}, {516: 1888}, {519: 8541}, {529: 4894}, {543: 15257}, {561: 63088}, {562: 2959}, {613: 2644}, {624: 3530}, {647: 3112}, {660: 4387}, {671: 2513}, {681: 5323}, {686: 5974}, {688: 2637}, {692: 142959}, {699: 11891}, {725: 17200}, {727: 13257}, {748: 13086}, {765: 9948}, {792: 11373}, {838: 1710}, {846: 30903}, {853: 4647}, {875: 1975}, {886: 2617}, {894: 6680}, {903: 2196}, {914: 46741}, {916: 23986}, {927: 1797}, {932: 5582}, {939: 7720}, {951: 1882}, {960: 5055}, {966: 20422}, {975: 1866}, {977: 2745}, {1017: 22030}, {1021: 9977}, {1023: 8148}, {1027: 2550}, {1044: 7563}, {1057: 1967}, {1085: 1831}, {1107: 5801}, {1119: 101222}, {1140: 11752}, {1171: 3187}, {1182: 3689}, {1201: 13713}, {1202: 5568}, {1215: 6396}, {1230: 3626}, {1242: 1672}, {1254: 20166}, {1277: 3771}, {1284: 4552}, {1291: 70546}, {1317: 6396}, {1335: 2124}, {1338: 13146}, {1350: 3011}, {1351: 1657}, {1375: 8870}, {1377: 21984}, {1379: 36786}, {1407: 30199}, {1415: 46989}, {1429: 1854}, {1449: 3012}, {1468: 84961}, {1483: 10295}, {1496: 8658}, {1503: 5041}, {1507: 2111}, {1508: 8948}, {1512: 3013}, {1530: 29011}, {1566: 54373}, {1580: 2178}, {1596: 20612}, {1599: 2813}, {1605: 41107}, {1621: 3903}, {1622: 14448}, {1635: 4515}, {1671: 3268}, {1672: 2921}, {1714: 12231}, {1716: 11928}, {1728: 14452}, {1746: 1756}, {1749: 21449}, {1781: 12405}, {1790: 3965}, {1819: 7284}, {1827: 20962}, {1841: 5775}, {1842: 2551}, {1847: 2111}, {1871: 2446}, {1894: 4013}, {1904: 5948}, {1910: 2780}, {1918: 62398}, {1925: 1756}, {1930: 4088}, {1931: 3810}, {1940: 9503}, {1950: 23711}, {1957: 2883}, {1962: 4859}, {1966: 2862}, {1970: 3009}, {1991: 3327}, {1998: 2980}, {2013: 19759}, {2031: 32509}, {2044: 3041}, {2047: 3636}, {2048: 8007}, {2050: 10191}, {2054: 6019}, {2066: 3017}, {2068: 3212}, {2112: 4829}, {2118: 23579}, {2154: 1953}, {2169: 2858}, {2196: 2061}, {2197: 2744}, {2201: 6152}, {2205: 5824}, {2216: 3090}]

# 训练集各个广告的人数
# [{i[0]:len(i[1])} for i in sorted(train_d_aid.items(), key=lambda k: k, reverse=False)]
# <class 'list'>: [{6: 10394}, {7: 28699}, {12: 56905}, {18: 15510}, {70: 178444}, {74: 40789}, {86: 11917}, {98: 18973}, {113: 11167}, {117: 126933}, {121: 34926}, {136: 140882}, {145: 30441}, {164: 14805}, {173: 77203}, {174: 20697}, {177: 14377}, {191: 208786}, {205: 8335}, {206: 31650}, {212: 44468}, {231: 62412}, {272: 7442}, {286: 39987}, {302: 92284}, {311: 80505}, {313: 7031}, {336: 7096}, {369: 68098}, {389: 104636}, {404: 60522}, {411: 372787}, {420: 20987}, {432: 77751}, {436: 13599}, {450: 8134}, {454: 14842}, {471: 25530}, {516: 7472}, {519: 33309}, {529: 18208}, {543: 59535}, {561: 244151}, {562: 11656}, {613: 10682}, {624: 13752}, {647: 12157}, {660: 17017}, {671: 9498}, {681: 21103}, {686: 22720}, {688: 10224}, {692: 553109}, {699: 46821}, {725: 65912}, {727: 51375}, {748: 50403}, {765: 38814}, {792: 44790}, {838: 6842}, {846: 119435}, {853: 17958}, {875: 7676}, {886: 10216}, {894: 26056}, {903: 8288}, {914: 179670}, {916: 93276}, {927: 7068}, {932: 21901}, {939: 30477}, {951: 7497}, {960: 19225}, {966: 77966}, {975: 7304}, {977: 10440}, {1017: 86516}, {1021: 38904}, {1023: 31444}, {1027: 9927}, {1044: 29771}, {1057: 7625}, {1085: 7194}, {1107: 22615}, {1119: 392531}, {1140: 46220}, {1171: 12847}, {1182: 14315}, {1201: 52992}, {1202: 21209}, {1215: 24502}, {1230: 13973}, {1242: 6757}, {1254: 78906}, {1277: 14652}, {1284: 17663}, {1291: 276310}, {1317: 24985}, {1335: 8167}, {1338: 51460}, {1350: 11464}, {1351: 6723}, {1375: 35068}, {1377: 86107}, {1379: 141733}, {1407: 118166}, {1415: 182716}, {1429: 7668}, {1449: 11781}, {1468: 329152}, {1483: 40046}, {1496: 33237}, {1503: 19501}, {1507: 7989}, {1508: 34598}, {1512: 11553}, {1530: 112370}, {1566: 211522}, {1580: 8665}, {1596: 80232}, {1599: 10677}, {1605: 160011}, {1621: 14876}, {1622: 56201}, {1635: 17448}, {1671: 12634}, {1672: 10772}, {1714: 47454}, {1716: 46671}, {1728: 56302}, {1746: 6886}, {1749: 82076}, {1781: 47848}, {1790: 15376}, {1819: 28299}, {1827: 81287}, {1841: 22411}, {1842: 9850}, {1847: 8259}, {1871: 9441}, {1894: 15590}, {1904: 23167}, {1910: 10837}, {1918: 241755}, {1925: 6624}, {1930: 15809}, {1931: 14474}, {1940: 36820}, {1950: 91864}, {1957: 11197}, {1962: 18490}, {1966: 10894}, {1970: 12077}, {1991: 12885}, {1998: 11492}, {2013: 76329}, {2031: 126853}, {2044: 11767}, {2047: 14082}, {2048: 31112}, {2050: 40531}, {2054: 23343}, {2066: 11632}, {2068: 12621}, {2112: 19114}, {2118: 91387}, {2154: 7624}, {2169: 10695}, {2196: 8099}, {2197: 10815}, {2201: 23777}, {2205: 22504}, {2216: 12052}]

# 求交集
# ok
# [{i[0]:i[1]} for i in sorted(test_d_aid.items(), key=lambda k: k, reverse=False)]
# [{i[0]:train_d_aid[i[0]]} for i in sorted(test_d_aid.items(), key=lambda k: k, reverse=False)]

# [{i[0]:set(i[1])&set(train_d_aid[i[0]])} for i in sorted(test_d_aid.items(), key=lambda k: k, reverse=False)]
# <class 'list'>: [{6: set()}, {7: set()}, {12: set()}, {18: set()}, {70: set()}, {74: set()}, {86: set()}, {98: set()}, {113: set()}, {117: set()}, {121: set()}, {136: set()}, {145: set()}, {164: set()}, {173: set()}, {174: set()}, {177: set()}, {191: set()}, {205: set()}, {206: set()}, {212: set()}, {231: set()}, {272: set()}, {286: set()}, {302: set()}, {311: set()}, {313: set()}, {336: set()}, {369: set()}, {389: set()}, {404: set()}, {411: set()}, {420: set()}, {432: set()}, {436: set()}, {450: set()}, {454: set()}, {471: set()}, {516: set()}, {519: set()}, {529: set()}, {543: set()}, {561: set()}, {562: set()}, {613: set()}, {624: set()}, {647: set()}, {660: set()}, {671: set()}, {681: set()}, {686: set()}, {688: set()}, {692: set()}, {699: set()}, {725: set()}, {727: set()}, {748: set()}, {765: set()}, {792: set()}, {838: set()}, {846: set()}, {853: set()}, {875: set()}, {886: set()}, {894: set()}, {903: set()}, {914: set()}, {916: set()}, {927: set()}, {932: set()}, {939: set()}, {951: set()}, {960: set()}, {966: set()}, {975: set()}, {977: set()}, {1017: set()}, {1021: set()}, {1023: set()}, {1027: set()}, {1044: set()}, {1057: set()}, {1085: set()}, {1107: set()}, {1119: set()}, {1140: set()}, {1171: set()}, {1182: set()}, {1201: set()}, {1202: set()}, {1215: set()}, {1230: set()}, {1242: set()}, {1254: set()}, {1277: set()}, {1284: set()}, {1291: set()}, {1317: set()}, {1335: set()}, {1338: set()}, {1350: set()}, {1351: set()}, {1375: set()}, {1377: set()}, {1379: set()}, {1407: set()}, {1415: set()}, {1429: set()}, {1449: set()}, {1468: set()}, {1483: set()}, {1496: set()}, {1503: set()}, {1507: set()}, {1508: set()}, {1512: set()}, {1530: set()}, {1566: set()}, {1580: set()}, {1596: set()}, {1599: set()}, {1605: set()}, {1621: set()}, {1622: set()}, {1635: set()}, {1671: set()}, {1672: set()}, {1714: set()}, {1716: set()}, {1728: set()}, {1746: set()}, {1749: set()}, {1781: set()}, {1790: set()}, {1819: set()}, {1827: set()}, {1841: set()}, {1842: set()}, {1847: set()}, {1871: set()}, {1894: set()}, {1904: set()}, {1910: set()}, {1918: set()}, {1925: set()}, {1930: set()}, {1931: set()}, {1940: set()}, {1950: set()}, {1957: set()}, {1962: set()}, {1966: set()}, {1970: set()}, {1991: set()}, {1998: set()}, {2013: set()}, {2031: set()}, {2044: set()}, {2047: set()}, {2048: set()}, {2050: set()}, {2054: set()}, {2066: set()}, {2068: set()}, {2112: set()}, {2118: set()}, {2154: set()}, {2169: set()}, {2196: set()}, {2197: set()}, {2201: set()}, {2205: set()}, {2216: set()}]

  字典排序 按值 按键

      求交集

 

差集 a-b

交集 并集  差集 对称差集

 

a=[1,3,3,4,4,5,5,5,6,6]
b=[1,3]
dd=0

差集
set(a)-set(b)




posted @ 2018-01-17 00:16  papering  阅读(292)  评论(0编辑  收藏  举报