同样环境与 java 做了个对比,差的不是一星半点!先看执行结果,同样环境下读取同一个400M左右数据文件,以下截图是测试结果:

 

 JAVA 代码:

package ccb;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class syc
{
    static String fp = "D:\\ccb\\ONL_STLM_JNL_410_20190625.dat";
//    static String fp = "D:\\ccb\\ONL.dat";
    static File sourceFile = new File(fp);
    
    public static void main(String[] args) throws Exception
    {
        long s0 = System.currentTimeMillis();
        long lnu = get_line_number();
        System.out.println(String.format("文件共有:%s行", lnu));
        long s1 = System.currentTimeMillis();

        System.out.println(String.format("统计文件行数用时:%s", (s1-s0)));


        long t1 = System.currentTimeMillis();
        
        // 读取文件内容,并进行归类
        Map<String, List<String>> mapList = read_data();

        long t2 = System.currentTimeMillis();
        
        // 将文件内容写入磁盘
        write_data(mapList);
        
        long t3 = System.currentTimeMillis();
        
        System.out.println(String.format("t1=%s, t2=%s, 读取耗时=%s, 写入耗时=%s, 合计耗时=%s",
                t1, t2, (t2-t1), (t3-t2), (t3-t1)));
    }

    /**
     * 带缓存读取文件,归类
     * @return Map
     */
    @SuppressWarnings({ "rawtypes", "unchecked" })
    public static Map<String, List<String>> read_data() {
        Map<String, List<String>> mapList = new HashMap<>();
        
        // 带缓存的输入流读取
        BufferedReader buff = null;

        try {
            buff = new BufferedReader(new FileReader(sourceFile));
            String line = "";
            while ((line = buff.readLine()) != null) {
                String[] items = line.split("\\|@\\|");
                String type_name = items[1];
                if (mapList.containsKey(type_name)) {
                    List tempList = mapList.get(type_name);
                    tempList.add(line);
                    mapList.put(type_name, tempList);
                } else {
                    List lines = new ArrayList<>();
                    lines.add(line);
                    mapList.put(type_name, lines);
                }
            }
            buff.close();

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (buff != null) {
                try {
                    buff.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        System.out.println(String.format("找到的%s类别:", mapList.size()));
        return mapList;
    }

    /**
     * 带缓存写入文件
     * @param mapList
     * @throws IOException
     */
    public static void write_data(Map<String, List<String>> mapList) throws IOException {
        FileWriter fWriter = new FileWriter(sourceFile + ".result");
        BufferedWriter buff = new BufferedWriter(fWriter);

        for (Map.Entry<String, List<String>> entry : mapList.entrySet()) {
            List<String> lines = entry.getValue();
            System.out.println(String.format("--%s %s", entry.getKey(), lines.size()));
            for (String line : lines) {
                buff.write(line + "\n");
            }
        }
        try {
            buff.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 返回文件的总行数
     * @return
     */
    public static long get_line_number() throws IOException {

        File file = new File(fp);
        FileReader reader = new FileReader(file);
        LineNumberReader line_num_reader = new LineNumberReader(reader);
        long lineNu = 0;
        while (line_num_reader.readLine() != null)
            lineNu++;

        return lineNu;
    }
}

  Golang 代码:

package main

import (
	"bufio"
	_ "container/list"
	"fmt"
	"io"
	"os"
	"strings"
	"time"
)

func main() {
	file_path := "ONL_STLM_JNL_410_20190625.dat"
	new_file_path := file_path + ".result"

	t1 := time.Now().UnixNano() / 1e3

	// lines := read_file(file_path)
	mapList := read_file(file_path)

	t2 := time.Now().UnixNano() / 1e3

	write_file(mapList, new_file_path)

	t3 := time.Now().UnixNano() / 1e3

	fmt.Printf("t1=%v, t2=%v, 读取耗时=%v, 写入耗时=%v, 合计耗时=%v\n",
		t1, t2, (t2-t1)/1e3, (t3-t2)/1e3, (t3-t1)/1e3)
}

// 逐行读取文件
func read_file(fp string) map[string][]string {
	file, err := os.Open(fp)
	if err != nil {
		fmt.Printf("读取文件错误: %s\n", err)
		return nil
	}
	defer file.Close()

	br := bufio.NewReader(file)

	//声明一个切片
	// var file_list []string
	//声明一个 map
	mapList := make(map[string][]string)

	for {
		b_line, _, err := br.ReadLine()
		if err == io.EOF {
			break
		}
		// 向 list 中追加元素
		// file_list.PushBack(string(line))
		// file_list = append(file_list, string(line)+"\n")

		line := string(b_line)
		// 使用map
		items := strings.Split(line, "|@|")
		type_name := items[1]
		if mapValue, has_key := mapList[type_name]; has_key {
			mapValue := append(mapValue, line)
			mapList[type_name] = mapValue
		} else {
			//创建一个临时切片
			tmp_slice := []string{line}
			mapList[type_name] = tmp_slice
		}
	}
	// 输出 list 长度
	// fmt.Println(file_list)
	return mapList
}

func line2map(lines []string) map[string][]string {

	/*
		_, err := os.Stat(new_file_path)
		if err == nil {
			//文件存在,删除重建
		}
	*/

	//类别
	mapList := make(map[string][]string)

	//第一次循环,查找分类
	for _, value := range lines {
		item := strings.Split(value, "|@|")

		//类别名称,作为map主键
		key := item[1]
		if mapValue, has_key := mapList[key]; has_key {
			//包含该类别, map的value继续添加
			mapValue := append(mapValue, value)
			mapList[key] = mapValue
		} else {
			//不包含该类别, 新增一个key
			lines := []string{value}
			mapList[key] = lines
		}
		/*
			//判断类型是否在切片中
			_, found := in_array(classes, item[1])
			if !found {
				classes = append(classes, item[1])
			}
		*/
	}
	//找到的类别 共12个类别
	fmt.Printf("找到的%d类别:\n", len(mapList))

	return mapList
}

/*
Find获取一个切片并在其中查找元素。
如果找到它,它将返回它的密钥,否则它将返回-1和一个错误的bool
*/
//从切片中查找
func in_array(slice []string, val string) (int, bool) {
	for i, item := range slice {
		if item == val {
			return i, true
		}
	}
	return -1, false
}

// 写入文件
func write_file(mapList map[string][]string, new_file_path string) {
	file_handle, err := os.OpenFile(new_file_path, os.O_CREATE|os.O_RDWR, 0666)
	buf := bufio.NewWriter(file_handle)

	if err != nil {
		fmt.Println("打开新文件错误:", err)
		return
	}
	defer file_handle.Close()
	//遍历map
	for key, value := range mapList {
		fmt.Printf("--%s %v\n", key, len(value))
		for nu := range value {
			// fmt.Print(value[nu])	//行末带有换行符
			buf.WriteString(value[nu])
		}
	}

	//缓存写入文件
	/*
		fmt.Println(item)
		fmt.Printf("%v | %v", key, string(value))
	*/
	err = buf.Flush()

	if err != nil {
		fmt.Printf("写入磁盘错误: ", err)
	}
}

  

posted on 2021-10-31 00:25  jarod99  阅读(98)  评论(0编辑  收藏  举报