正则表达式捕获并替换
正则表达式捕获并替换
const fs = require('fs'); const path = require('path'); const { spawnSync } = require('child_process'); const pandocPath = 'E:\\pandoc.exe'; // 定义要转换的文件类型和转换后的文件类型 const inputExtension = '.docx'; const outputExtension = '.md'; const i = "xxx/数据标注--图像数据标注"; const o = 'xxx/label'; function doTransfer(inputDir, outputDir) { if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir); } // 获取当前目录下的所有文件 let i = 1; const files = fs.readdirSync(inputDir).filter(f => f.endsWith(inputExtension)) for (let file of files) { if (!file.endsWith(inputExtension)) { continue; } const inputFilePath = path.join(inputDir, file); const outputFileName = `${i}`.padStart(3, "0") + outputExtension; const outputFilePath = path.join(outputDir, outputFileName); // 构造 Pandoc 命令 const c1 = `${pandocPath} ${inputFilePath} -f docx -t markdown_strict+pipe_tables -o ${outputFilePath} --extract-media=${outputDir}`; const c2 = `mkdir ${outputDir}\\media${i}`; const c3 = `move ${outputDir}\\media\\* ${outputDir}\\media${i}`; const c4 = `rmdir ${outputDir}\\media`; execSync(c1); execSync(c2); execSync(c3); execSync(c4); insertChar(outputFilePath, i); i = i + 1; } console.log("----------done----------") } function execSync(cmd) { //chcp 65001是为了正常显示中文,不乱码 const result = spawnSync(`chcp 65001 && ${cmd}`, { shell: true, encoding: 'utf-8' }); if (result.error) { console.error(`执行命令时发生错误: ${result.error.message}`); return; } const stdout = result.stdout.toString(); const stderr = result.stderr.toString(); stdout && console.log(`命令输出: ${stdout}`); stderr && console.error(`命令错误输出: ${stderr}`); } function insertChar(filepath, i) { try { const data = fs.readFileSync(filepath, 'utf8'); // 在所有以#开头的行的前面再插入一个# let modifiedData = data.replace(/^(#+?)([^#+?])/gm, '#$1$2'); const regex = /<img[^>]*src=["'].*?(image\d+\.(?:png|jpe?g)).*?["'][^>]*>/g; // 将图片内容替换为Markdown格式,此处仅捕获类似image1.png,image1.png,image1.png等的字符串并将之放到替换后的里面 modifiedData = modifiedData.replace(regex, `![](media${i}\\$1)`); const iconMarkRegex = /(图 \d+ .*)/g; modifiedData = modifiedData.replace(iconMarkRegex, '<div class="center">$1</div>'); fs.writeFileSync(filepath, modifiedData, 'utf8'); console.log(`${filepath} # updated.`); } catch (err) { console.error(err); } }
doTransfer(i, o);