Ansi2Utf8 小工具
将GB2312编码的文件转成Unity使用的UTF8无bom格式
主要用批处理执行 Ansi2Utf8.exe XXXXX.txt
源代码
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Ansi2Utf8
{
class Program
{
static void Main(string[] args)
{
if (args.Length < 1)
{
Console.WriteLine("None file path !!!");
return;
}
string fileName = args[0];
try
{
FileStream fs = new System.IO.FileStream(fileName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
if (fs == null)
{
Console.WriteLine(fileName + " is null !!!");
return;
}
byte[] flieByte = new byte[fs.Length];
fs.Read(flieByte, 0, flieByte.Length);
fs.Close();
if (IsUtf8(flieByte))
{
Console.WriteLine(fileName + " is utf8 coding");
return;
}
Encoding ansi = Encoding.GetEncoding("GB2312");
Encoding utf = Encoding.UTF8;
flieByte = Encoding.Convert(ansi, utf, flieByte);
StreamWriter docWriter;
var utf8WithoutBom = new UTF8Encoding(false);
docWriter = new StreamWriter(fileName, false, utf8WithoutBom);
docWriter.Write(utf.GetString(flieByte));
docWriter.Close();
}
catch
{
Console.WriteLine(fileName + " convert error !!!!!!!!!!!!!!");
}
}
static bool IsUtf8(byte[] bs)
{
int len = bs.Length;
if (len >= 3 && bs[0] == 0xEF && bs[1] == 0xBB && bs[2] == 0xBF)
{
return true; //Encoding.UTF8;
}
int[] cs = { 7, 5, 4, 3, 2, 1, 0, 6, 14, 30, 62, 126 };
for (int i = 0; i < len; i++)
{
int bits = -1;
for (int j = 0; j < 6; j++)
{
if (bs[i] >> cs[j] == cs[j + 6])
{
bits = j;
break;
}
}
if (bits == -1)
{
return false; //Encoding.Default;
}
while (bits-- > 0)
{
i++;
if (i == len || bs[i] >> 6 != 2)
{
return false; //Encoding.Default;
}
}
}
return true; //Encoding.UTF8;
}
}
}
附件列表