实现html文件解析出dcm信息并转换成nii文件
主要是用bs4库来解析html信息,针对的是GE公司生成的dcm信息描述文件,环境在Windows下,Linux同理可适用,不过路径的斜杠需要更换
'''
author: 龙雪
date: 2020-10-13 1:12 am
'''
from bs4 import BeautifulSoup
import re
import os
import shutil
def next_nine(h5):
h5_next = h5.next_sibling.next_sibling.next_sibling.next_sibling\
.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling
return h5_next
base_dir = 'E:/data/new_data/GE1.5'
out_dir = 'E:/data/out_data/GE1.5'
niisets = os.listdir(base_dir)
for niiset in niisets:
input_niiset = os.path.join(base_dir, niiset)
out_niiset = os.path.join(out_dir, niiset)
os.makedirs(out_niiset, exist_ok=True)
input_niiset_win = input_niiset.replace('/', '\\')
out_niiset_win = out_niiset.replace('/', '\\')
url = input_niiset + '/ihe_pdi/index.htm'
soup = Be