从virustotal上下载病毒样本

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python
 
import os
import csv
#import Queue
import zipfile
import requests
import argparse
import multiprocessing
 
# TODO: Don't hardcode the relative path?
samples_path = "gym_malware/envs/utils/samples/"
hashes_path = "gym_malware/envs/utils/sample_hashes.csv"
vturl = "https://www.virustotal.com/intelligence/download"
 
 
def get_sample_hashes():
    hash_rows = []
    with open(hashes_path) as csvfile:
        for row in csv.DictReader(csvfile):
            hash_rows.append(row)
    return hash_rows
 
 
def vt_download_sample(sha256, sample_path, vtapikey):
    tries = 0
    success = False
    while not success and tries < 10:
        resp = requests.get(vturl, params={"hash": sha256, "apikey": vtapikey})
 
        if not resp.ok:
            tries += 1
            continue
 
        else:
            success = True
 
    if not success:
        return False
 
    with open(sample_path, "wb") as ofile:
        ofile.write(resp.content)
 
    return True
 
 
def download_worker_function(download_queue, vtapikey):
    while True:
        try:
            sha256 = download_queue.get()
        except queue.Empty:
            continue
 
        if sha256 == "STOP":
            download_queue.task_done()
            return True
 
        print("{} downloading".format(sha256))
        sample_path = os.path.join(samples_path, sha256)
        success = vt_download_sample(sha256, sample_path, vtapikey)
 
        if not success:
            print("{} had a problem".format(sha256))
 
        print("{} done".format(sha256))
        download_queue.task_done()
 
 
def use_virustotal(args):
    """
    Use Virustotal to download the environment malware
    """
    m = multiprocessing.Manager()
    download_queue = m.JoinableQueue(args.nconcurrent)
 
    archive_procs = [
        multiprocessing.Process(
            target=download_worker_function,
            args=(download_queue, args.vtapikey))
        for i in range(args.nconcurrent)
    ]
    for w in archive_procs:
        w.start()
 
    for row in get_sample_hashes():
        download_queue.put(row["sha256"])
 
    for i in range(args.narchiveprocs):
        download_queue.put("STOP")
 
    download_queue.join()
    for w in archive_procs:
        w.join()
 
 
def use_virusshare(args):
    """
    Use VirusShare zip files as the source for the envirnment malware
    """
    pwd = bytes(args.zipfilepassword, "ascii")
    md5_to_sha256_dict = {d["md5"]: d["sha256"] for d in get_sample_hashes()}
 
    for path in args.zipfile:
        z = zipfile.ZipFile(path)
        for f in z.namelist():
            z_object_md5 = f.split("_")[1]
            if z_object_md5 in md5_to_sha256_dict:
                sample_bytez = z.open(f, "r", pwd).read()
                with open(md5_to_sha256_dict[z_object_md5], "wb") as ofile:
                    ofile.write(sample_bytez)
                print("Extracted {}".format(md5_to_sha256_dict[z_object_md5]))
 
 
if __name__ == '__main__':
    prog = "download_samples"
    descr = "Download the samples that define the malware gym environment"
    parser = argparse.ArgumentParser(prog=prog, description=descr)
    parser.add_argument(
        "--virustotal",
        default=False,
        action="store_true",
        help="Use Virustotal to download malware samples")
    parser.add_argument(
        "--vtapikey", type=str, default=None, help="Virustotal API key")
    parser.add_argument(
        "--nconcurrent",
        type=int,
        default=6,
        help="Maximum concurrent downloads from Virustotal")
    parser.add_argument(
        "--virusshare",
        default=False,
        action="store_true",
        help="Use malware samples from VirusShare torrents")
    parser.add_argument(
        "--zipfile",
        type=str,
        nargs="+",
        help="The path of VirusShare zipfile 290 or 291")
    parser.add_argument(
        "--zipfilepassword",
        type=str,
        default=None,
        help="Password for the VirusShare zipfiles 290 or 291")
    args = parser.parse_args()
 
    if not args.virustotal and not args.virusshare:
        parser.error("Must use either Virustotal or VirusShare")
 
    if args.virusshare:
        if len(args.zipfile) == 0:
            parser.error("Must the paths for one or more Virusshare zip files")
 
        if args.zipfilepassword is None:
            parser.error("Must enter a password for the VirusShare zip files")
 
        use_virusshare(args)
 
    if args.virustotal:
        if args.vtapikey is None:
            parser.error("Must enter a VirusTotal API key")
 
        use_virustotal(args)

使用方法:

1
python download_samples.py  --virustotal --vtapikey 1a7b7440ceca037b88fd160ef6c8e04b69ba434bdd76ef2ab0ab52a567xxxxx

 csv文件格式:

1
2
3
4
5
6
7
8
9
10
11
12
13
sha256,sha1,md5
0007df5e92070f8d12411078070bdcafb24df81c837d8113a1e047ed7ac9fba1,e760b4ae027975928735024273a4240995442e2b,002e5581fabb21af4d4e7ec070561d38
0026b14f896934c621eccca48474353fff08f592ebc2949dde4b881f2353e3d2,f5cc8bd5accc281a8a41a9b13d870734361ec26b,292bd61f51ef0983b058a3b0f16ad263
00341b912ae7a9fc5bd25ac544bb2525cdc10f7dfcf51e6d96e9221a9ca06525,0329a4316eea3cf6d1376ea1eab5e2806258193b,c0370cb71216559beef7fe943b52003e
007792005ee9d835d5d0d4e0d6f7b886605272252a202e97a04bbc30bbbe12ae,f8190fe3936eff91a011901e30d66d0ad96e7e0c,64fed9d345dec9156090832c2b768982
009868767950256d823b0e9c6a89b8a7b2cef63424adc1840d1350ffa0bd3e42,50d4083adcd17910c2889842daf0d5e6ec41ab40,2f7a71e7abfd8536b9dee243656e0a8a
00a52b54695bac31830bdecf1c0e71b10da9bf3e9ff3d52cf1fc90f110458475,26f7549b66b2578112a77ceda7be7647ce5bc763,84b7490cae7fb84010863e006988951a
00c00e802109d0a3cb122c90168380ca23dfd3c28b1f03711b6218a8b1800f7c,eca4d84561c6440975ca64402e92ab01cf1bf4c8,d40cf7ae9174d5dc79c2e9db8cdb1bbb
0105e7aadf4e069b10aea00a43d90b753acfdd81c8db6e37df2c5b563162c30b,310a76a010cb58b510da8eb743f53ff517e441a9,2d4dcc983545014af6c8994ffa478488
0106fb2d96d5643f7ccb4a3e9fe8f3bb34c7d65d03333370648915991a3b200d,ad073b1ada3bb0aff0cae2edd7d41f6f09816cbe,f3e5b6b8c47211d54c2031d7a9a8f54f
012244e5a30708451b0b8b36a45e7d36fc8694f999adec739ef21efbc5f8e922,ef0ce82ca912e79a4fe64879ceb7fc30605367bb,ef24712cdbd8bd210e44d1546f5b91ab
014b392af2230b6275acf08a1384b1dd578e7fa3e7aba70c1b5b2ea6956c2108,43336578eb0efb1f9096836ff420fba635527020,d8f99268a5727a64bbac9a149b169afc
01640574490f32ba3d84bef60bdc30794edacf32932e93bada4d068dc5e27457,320c06678b0253ef5e30933d341a981744702c49,06c7dcdcdc887e052c2b6ee0dc88a2a6

 

posted @   bonelee  阅读(16336)  评论(1编辑  收藏  举报
编辑推荐:
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
点击右上角即可分享
微信分享提示