从virustotal上下载病毒样本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | #!/usr/bin/env python import os import csv #import Queue import zipfile import requests import argparse import multiprocessing # TODO: Don't hardcode the relative path? samples_path = "gym_malware/envs/utils/samples/" hashes_path = "gym_malware/envs/utils/sample_hashes.csv" vturl = "https://www.virustotal.com/intelligence/download" def get_sample_hashes(): hash_rows = [] with open (hashes_path) as csvfile: for row in csv.DictReader(csvfile): hash_rows.append(row) return hash_rows def vt_download_sample(sha256, sample_path, vtapikey): tries = 0 success = False while not success and tries < 10 : resp = requests.get(vturl, params = { "hash" : sha256, "apikey" : vtapikey}) if not resp.ok: tries + = 1 continue else : success = True if not success: return False with open (sample_path, "wb" ) as ofile: ofile.write(resp.content) return True def download_worker_function(download_queue, vtapikey): while True : try : sha256 = download_queue.get() except queue.Empty: continue if sha256 = = "STOP" : download_queue.task_done() return True print ( "{} downloading" . format (sha256)) sample_path = os.path.join(samples_path, sha256) success = vt_download_sample(sha256, sample_path, vtapikey) if not success: print ( "{} had a problem" . format (sha256)) print ( "{} done" . format (sha256)) download_queue.task_done() def use_virustotal(args): """ Use Virustotal to download the environment malware """ m = multiprocessing.Manager() download_queue = m.JoinableQueue(args.nconcurrent) archive_procs = [ multiprocessing.Process( target = download_worker_function, args = (download_queue, args.vtapikey)) for i in range (args.nconcurrent) ] for w in archive_procs: w.start() for row in get_sample_hashes(): download_queue.put(row[ "sha256" ]) for i in range (args.narchiveprocs): download_queue.put( "STOP" ) download_queue.join() for w in archive_procs: w.join() def use_virusshare(args): """ Use VirusShare zip files as the source for the envirnment malware """ pwd = bytes(args.zipfilepassword, "ascii" ) md5_to_sha256_dict = {d[ "md5" ]: d[ "sha256" ] for d in get_sample_hashes()} for path in args.zipfile: z = zipfile.ZipFile(path) for f in z.namelist(): z_object_md5 = f.split( "_" )[ 1 ] if z_object_md5 in md5_to_sha256_dict: sample_bytez = z. open (f, "r" , pwd).read() with open (md5_to_sha256_dict[z_object_md5], "wb" ) as ofile: ofile.write(sample_bytez) print ( "Extracted {}" . format (md5_to_sha256_dict[z_object_md5])) if __name__ = = '__main__' : prog = "download_samples" descr = "Download the samples that define the malware gym environment" parser = argparse.ArgumentParser(prog = prog, description = descr) parser.add_argument( "--virustotal" , default = False , action = "store_true" , help = "Use Virustotal to download malware samples" ) parser.add_argument( "--vtapikey" , type = str , default = None , help = "Virustotal API key" ) parser.add_argument( "--nconcurrent" , type = int , default = 6 , help = "Maximum concurrent downloads from Virustotal" ) parser.add_argument( "--virusshare" , default = False , action = "store_true" , help = "Use malware samples from VirusShare torrents" ) parser.add_argument( "--zipfile" , type = str , nargs = "+" , help = "The path of VirusShare zipfile 290 or 291" ) parser.add_argument( "--zipfilepassword" , type = str , default = None , help = "Password for the VirusShare zipfiles 290 or 291" ) args = parser.parse_args() if not args.virustotal and not args.virusshare: parser.error( "Must use either Virustotal or VirusShare" ) if args.virusshare: if len (args.zipfile) = = 0 : parser.error( "Must the paths for one or more Virusshare zip files" ) if args.zipfilepassword is None : parser.error( "Must enter a password for the VirusShare zip files" ) use_virusshare(args) if args.virustotal: if args.vtapikey is None : parser.error( "Must enter a VirusTotal API key" ) use_virustotal(args) |
使用方法:
1 | python download_samples.py - - virustotal - - vtapikey 1a7b7440ceca037b88fd160ef6c8e04b69ba434bdd76ef2ab0ab52a567xxxxx |
csv文件格式:
1 2 3 4 5 6 7 8 9 10 11 12 13 | sha256,sha1,md5 0007df5e92070f8d12411078070bdcafb24df81c837d8113a1e047ed7ac9fba1 ,e760b4ae027975928735024273a4240995442e2b, 002e5581fabb21af4d4e7ec070561d38 0026b14f896934c621eccca48474353fff08f592ebc2949dde4b881f2353e3d2 ,f5cc8bd5accc281a8a41a9b13d870734361ec26b, 292bd61f51ef0983b058a3b0f16ad263 00341b912ae7a9fc5bd25ac544bb2525cdc10f7dfcf51e6d96e9221a9ca06525 , 0329a4316eea3cf6d1376ea1eab5e2806258193b ,c0370cb71216559beef7fe943b52003e 007792005ee9d835d5d0d4e0d6f7b886605272252a202e97a04bbc30bbbe12ae ,f8190fe3936eff91a011901e30d66d0ad96e7e0c, 64fed9d345dec9156090832c2b768982 009868767950256d823b0e9c6a89b8a7b2cef63424adc1840d1350ffa0bd3e42 , 50d4083adcd17910c2889842daf0d5e6ec41ab40 , 2f7a71e7abfd8536b9dee243656e0a8a 00a52b54695bac31830bdecf1c0e71b10da9bf3e9ff3d52cf1fc90f110458475 , 26f7549b66b2578112a77ceda7be7647ce5bc763 , 84b7490cae7fb84010863e006988951a 00c00e802109d0a3cb122c90168380ca23dfd3c28b1f03711b6218a8b1800f7c ,eca4d84561c6440975ca64402e92ab01cf1bf4c8,d40cf7ae9174d5dc79c2e9db8cdb1bbb 0105e7aadf4e069b10aea00a43d90b753acfdd81c8db6e37df2c5b563162c30b , 310a76a010cb58b510da8eb743f53ff517e441a9 , 2d4dcc983545014af6c8994ffa478488 0106fb2d96d5643f7ccb4a3e9fe8f3bb34c7d65d03333370648915991a3b200d ,ad073b1ada3bb0aff0cae2edd7d41f6f09816cbe,f3e5b6b8c47211d54c2031d7a9a8f54f 012244e5a30708451b0b8b36a45e7d36fc8694f999adec739ef21efbc5f8e922 ,ef0ce82ca912e79a4fe64879ceb7fc30605367bb,ef24712cdbd8bd210e44d1546f5b91ab 014b392af2230b6275acf08a1384b1dd578e7fa3e7aba70c1b5b2ea6956c2108 , 43336578eb0efb1f9096836ff420fba635527020 ,d8f99268a5727a64bbac9a149b169afc 01640574490f32ba3d84bef60bdc30794edacf32932e93bada4d068dc5e27457 , 320c06678b0253ef5e30933d341a981744702c49 , 06c7dcdcdc887e052c2b6ee0dc88a2a6 |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」