博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

去掉txt中的重复标题

Posted on 2013-05-30 12:01  时间之外  阅读(944)  评论(0编辑  收藏  举报

想把小说转为mobi格式,找了个软件Txt2eBook.exe,把小说拖进去后,发现标题是重复的,格式如下:

正文 第一章 某某

第一章某某

正文。。。。

去掉多余的标题,思路是逐行读取,把除多余的标题外的内容原格式写到新文本文件中。

代码如下:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.IO;

namespace readText
{
    public partial class Form1 : Form
    {
        long coun;
        public Form1()
        {
            coun=count();
            InitializeComponent();
        }
        private long count()//确定总行数,用于查看进度,但实际效果并不好。
        {
            string path = @"D:\test.txt";
            StreamReader sr = new StreamReader(path, Encoding.GetEncoding("gb2312"));
           string line;
            line=sr.ReadLine();
            while (line != null)
            {
                coun++;
                line = sr.ReadLine();
            }
            return coun;
        }
        private void button1_Click(object sender, EventArgs e)
        {
            long lin = 0;
            string path = @"D:\test.txt";
            string path1 = "D:\\test1.txt";
            StreamReader sr = new StreamReader(path, Encoding.GetEncoding("gb2312"));
            StreamWriter sw = new StreamWriter(path1);
            sr.BaseStream.Seek(0,SeekOrigin.Begin);
            try
            {               
                string line = sr.ReadLine();
                    while (line!=null)
                    {                        
                        string a = line.ToString().Trim();
                       // sw.WriteLine(line);
                       string []temp=a.Split(new char[] {' '});//以空格分隔
                        // string[] temp = a.Split(' ');
                        if (temp.Length == 1)//不是需要保留的标题格式,无需判断直接写入
                        {
                            sw.WriteLine(line);
                            line = sr.ReadLine();
                            lin++;
                            this.label1.Text = lin.ToString() + "/" + coun.ToString();
                        }
                        else
                        {
                            string aa = temp[temp.Length-2] + temp[temp.Length-1];
                            // MessageBox.Show(temp[2].ToString());
                            sw.WriteLine(line);
                            line = sr.ReadLine();
                            string b = line.ToString().Trim();
                            if (aa.Trim() != b.Trim())//判断标题行与下一行是否重复
                            {                                
                                sw.WriteLine(line);
                                lin += 2;
                                this.label1.Text = lin.ToString() + "/" + coun.ToString();
                            }                          
                            line = sr.ReadLine();
                            lin++;
                            this.label1.Text = lin.ToString() + "/" + coun.ToString();
                        }
                        //else
                        //{
                        //    sw.WriteLine(line);
                        //    line = sr.ReadLine();
                        //}
                        
                    }
                    sr.Close();
                    sw.Flush();
                    sw.Close();
                    MessageBox.Show("OK");
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.ToString());
            }
        }
    }
}