将GB2312的文件转为Unicode格式
/// 将一个文件转化为UTF8编码
///
///
///
///
public static bool convertFileEncode(string srcFileName,string destEncode)
{
if (isUTF8File(srcFileName)) return true;
try
{
string destFileName = System.IO.Path.GetTempFileName();
System.Text.StringBuilder sb = new StringBuilder();
System.IO.FileStream fs = System.IO.File.Open(srcFileName, System.IO.FileMode.Open);
/*
System.IO.StreamWriter sw = System.IO.File.CreateText(destFileName);
byte[] b = new byte[1024];
while (fs.Read(b, 0, b.Length) > 0)
{
string line = System.Text.Encoding.Default.GetString(b);
line = GB2312toUnicode(line);
sw.Write(line);
}
sw.Close();
fs.Close();
*/
System.IO.FileStream sw = System.IO.File.Create(destFileName);
if (fs.Length > 0)
{
sw.WriteByte(0xFF);
sw.WriteByte(0xFE);
}
byte[] b = new byte[1024];
while (true)
{
int i = fs.Read(b, 0, b.Length);
byte[] bc = new byte[i];
Array.Copy(b, bc, i);
if (i <= 0) break;
byte[] btemp = GB2312toUnicodeBytes(bc);
foreach (byte bi in btemp)
{
sw.WriteByte(bi);
}
}
sw.Close();
fs.Close();
System.IO.File.Copy(destFileName, srcFileName,true);
System.IO.File.Delete(destFileName);
}
catch
{
return false;
}
return true;
}
///
/// 判断一个文件是不是UTF8文件
///
///
///
public static bool isUTF8File(string srcFileName)
{
bool isUTF8 = false;
System.IO.FileStream fs = System.IO.File.Open(srcFileName,
System.IO.FileMode.Open,System.IO.FileAccess.Read,System.IO.FileShare.Read);
byte[] b = new byte[2];
if (fs.Read(b, 0, b.Length) > 0)
{
if (b[0] == 0xFF && b[1] == 0xFE)
{
isUTF8 = true;
}
}
fs.Close();
return isUTF8;
}
//public static string GB2312toUnicodeString(string content)
//{
// string gb2312Info = string.Empty;
// Encoding utf8 = Encoding.Unicode;
// Encoding gb2312 = Encoding.GetEncoding("gb2312");
// // Convert the string into a byte[].
// byte[] gb2312Bytes = gb2312.GetBytes(content);
// // Perform the conversion from one encoding to the other.
// byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
// // Convert the new byte[] into a char[] and then into a string.
// // This is a slightly different approach to converting to illustrate
// // the use of GetCharCount/GetChars.
// char[] utf8Chars = new char[utf8.GetCharCount(utf8Bytes, 0, utf8Bytes.Length)];
// utf8.GetChars(utf8Bytes, 0, utf8Bytes.Length, utf8Chars, 0);
// string utf8info = new string(utf8Chars);
// return utf8info;
//}
public static byte[] GB2312toUnicodeBytes(byte[] gb2312Bytes)
{
Encoding gb2312 = Encoding.GetEncoding("gb2312");
Encoding utf8 = Encoding.Unicode;
byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
return utf8Bytes;
}
}
}
using System;
using System.Collections.Generic;
using System.Text;
namespace CrmTools
{
class FileUtil
{
public static string readFile(string fileName)
{
//return System.IO.File.ReadAllText(fileName,System.Text.Encoding.Default);
System.Text.StringBuilder sb = new StringBuilder();
System.IO.FileStream fs = System.IO.File.Open(fileName, System.IO.FileMode.Open);
byte[] b = new byte[1024];
while (fs.Read(b, 0, b.Length) > 0)
{
sb.Append(System.Text.Encoding.Default.GetString(b));
}
fs.Close();
return sb.ToString();
}
///
/// 将一个文件转化为UTF8编码
///
///
///
///
public static bool convertFileEncode(string srcFileName,string destEncode)
{
if (isUTF8File(srcFileName)) return true;
try
{
string destFileName = System.IO.Path.GetTempFileName();
System.Text.StringBuilder sb = new StringBuilder();
System.IO.FileStream fs = System.IO.File.Open(srcFileName, System.IO.FileMode.Open);
/*
System.IO.StreamWriter sw = System.IO.File.CreateText(destFileName);
byte[] b = new byte[1024];
while (fs.Read(b, 0, b.Length) > 0)
{
string line = System.Text.Encoding.Default.GetString(b);
line = GB2312toUnicode(line);
sw.Write(line);
}
sw.Close();
fs.Close();
*/
System.IO.FileStream sw = System.IO.File.Create(destFileName);
if (fs.Length > 0)
{
sw.WriteByte(0xFF);
sw.WriteByte(0xFE);
}
byte[] b = new byte[1024];
while (true)
{
int i = fs.Read(b, 0, b.Length);
byte[] bc = new byte[i];
Array.Copy(b, bc, i);
if (i <= 0) break;
byte[] btemp = GB2312toUnicodeBytes(bc);
foreach (byte bi in btemp)
{
sw.WriteByte(bi);
}
}
sw.Close();
fs.Close();
System.IO.File.Copy(destFileName, srcFileName,true);
System.IO.File.Delete(destFileName);
}
catch
{
return false;
}
return true;
}
///
/// 判断一个文件是不是UTF8文件
///
///
///
public static bool isUTF8File(string srcFileName)
{
bool isUTF8 = false;
System.IO.FileStream fs = System.IO.File.Open(srcFileName,
System.IO.FileMode.Open,System.IO.FileAccess.Read,System.IO.FileShare.Read);
byte[] b = new byte[2];
if (fs.Read(b, 0, b.Length) > 0)
{
if (b[0] == 0xFF && b[1] == 0xFE)
{
isUTF8 = true;
}
}
fs.Close();
return isUTF8;
}
//public static string GB2312toUnicodeString(string content)
//{
// string gb2312Info = string.Empty;
// Encoding utf8 = Encoding.Unicode;
// Encoding gb2312 = Encoding.GetEncoding("gb2312");
// // Convert the string into a byte[].
// byte[] gb2312Bytes = gb2312.GetBytes(content);
// // Perform the conversion from one encoding to the other.
// byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
// // Convert the new byte[] into a char[] and then into a string.
// // This is a slightly different approach to converting to illustrate
// // the use of GetCharCount/GetChars.
// char[] utf8Chars = new char[utf8.GetCharCount(utf8Bytes, 0, utf8Bytes.Length)];
// utf8.GetChars(utf8Bytes, 0, utf8Bytes.Length, utf8Chars, 0);
// string utf8info = new string(utf8Chars);
// return utf8info;
//}
public static byte[] GB2312toUnicodeBytes(byte[] gb2312Bytes)
{
Encoding gb2312 = Encoding.GetEncoding("gb2312");
Encoding utf8 = Encoding.Unicode;
byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes);
return utf8Bytes;
}
}
}
antony
:antony1029@163.com
:http://antony1029.cnblogs.com