实现html文件解析出dcm信息并转换成nii文件

主要是用bs4库来解析html信息,针对的是GE公司生成的dcm信息描述文件,环境在Windows下,Linux同理可适用,不过路径的斜杠需要更换

'''
author: 龙雪
date: 2020-10-13 1:12 am
'''

from bs4 import BeautifulSoup
import re
import os
import shutil


def next_nine(h5):
    h5_next = h5.next_sibling.next_sibling.next_sibling.next_sibling\
        .next_sibling.next_sibling.next_sibling.next_sibling.next_sibling
    return h5_next


base_dir = 'E:/data/new_data/GE1.5'
out_dir = 'E:/data/out_data/GE1.5'

niisets = os.listdir(base_dir)
for niiset in niisets:
    input_niiset = os.path.join(base_dir, niiset)
    out_niiset = os.path.join(out_dir, niiset)
    os.makedirs(out_niiset, exist_ok=True)
    input_niiset_win = input_niiset.replace('/', '\\')
    out_niiset_win = out_niiset.replace('/', '\\')

    url = input_niiset + '/ihe_pdi/index.htm'
    soup = Be
posted @ 2023-12-28 16:24  龙雪  阅读(2)  评论(0编辑  收藏  举报  来源