文本文件按行去重,有序输出

针对有些文本数据重复的情况,需要将数据去重,考虑到LinkedHashMap是有序的,可以保证文本顺序不变,所以采用此集合。

去重前:
image

去重后:
image

代码如下:

import java.io.*;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

/**
 * @program: receiveDemo
 * @description: 文件去重
 * @author: huang wei
 * @create: 2021-04-09 09:57
 */
public class FileTest {
	public static void main(String[] args) {
		String oldFilePath = "H:\\upload\\test\\";
		String newFilePath = "H:\\upload\\test2\\";

		// 遍历文件夹
		File directory = new File(oldFilePath);
		if (directory.isDirectory()) {
			File[] files = directory.listFiles();
			for (File file : files) {
				if (!file.isDirectory()) {
					String result = removeDuplicate(file.getAbsolutePath());
					writeData(newFilePath + file.getName(), result);
				}
			}
		}else {
			String result = removeDuplicate(directory.getAbsolutePath());
			writeData(newFilePath + directory.getName(), result);
		}
	}

	/**
	 * @throws Exception
	 * @Description: 文本按行去重
	 * @Param:
	 * @return:
	 * @author: hw
	 * @date: 2021/4/13 11:35
	 */
	public static String removeDuplicate(String filePath) {
		String str;
		StringBuffer stringBuffer = new StringBuffer();
		Map<String, String> map = new LinkedHashMap<>();

		try (FileReader reader = new FileReader(filePath);
			 BufferedReader br = new BufferedReader(reader)) {
			while ((str = br.readLine()) != null) {
				map.put(str, "");
			}

			Set<String> set = map.keySet();
			Iterator<String> iterator = set.iterator();
			while (iterator.hasNext()) {
				String key = iterator.next();
				stringBuffer.append(key).append("\r\n");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return stringBuffer.toString();
	}

	/**
	 * @throws Exception
	 * @Description: 文件写入
	 * @Param:
	 * @return:
	 * @author: hw
	 * @date: 2021/4/13 11:34
	 */
	public static void writeData(String filePath, String content) {
		RandomAccessFile randomFile = null;
		try {
			// 打开一个随机访问文件流,按读写方式
			randomFile = new RandomAccessFile(filePath, "rw");
			// 文件长度,字节数
			long fileLength = randomFile.length();
			// 将写文件指针移到文件尾。
			randomFile.seek(fileLength);
			// 将数据转成byte防止中文乱码
			byte buffer[] = new byte[1024];
			buffer = content.getBytes();
			randomFile.write(buffer);
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (randomFile != null) {
				try {
					randomFile.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}
}
posted @ 2021-04-13 16:02  逐梦寻欢  阅读(309)  评论(0编辑  收藏  举报