如何将UTF-8转换为UTF-8n
Encoding 提供一个前导码(即一个字节数组),可以将它作为编码过程中所产生的字节序列的前缀。如果前导码中包含字节顺序标记(在 Unicode 中,码位为 U+FEFF),则它会帮助解码器确定字节顺序和转换格式或 UTF。Unicode 字节顺序标记的序列化结果(十六进制)如下所示:
-
UTF-8:EF BB BF
-
UTF-16 Big-Endian 字节顺序:FE FF
-
UTF-16 Little-Endian 字节顺序:FF FE
-
UTF-32 Big-Endian 字节顺序:00 00 FE FF
-
UTF-32 Little-Endian 字节顺序:FF FE 00 00
其中,UTF-8的字节顺序标记分别是EF BB BF转换成十进制以后是239 298 191 .
当我们通过自己编写的程序读取文件中的数据时候,如果编码不正确,就会导致读取错误。
例如:现在有一个文件,编码类型为UTF-8n(这种编码也属于UTF-8编码,但是文件中不包含字节顺序标记)。如果我们使用C#提供的StreamReader类进行读取,指定的编码为Encoding.Unicode的时候,会错误的读取该数据。但是如果该文件的编码类型为UTF-8则不会读取发生错误,因为我们指定的编码是可以识别UTF-8编码的。如果想正确的读取UTF-8n文件,我们只需要将StreamReader的编码指定为Encoding.UTF-8就可以了。
但是有时候我们需要制作一些数据来进行测试。需要将UTF-8数据转换成UTF-8n或者是将UTF-8n 转换成UTF-8类型。
下边的简单代码演示如何进行转换:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using IWshRuntimeLibrary;
namespace WindowsApplication7
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private string name;
//UTF-8字符标记
private const Byte b1 = 239;
private const Byte b2 = 187;
private const Byte b3 = 191;
bool ishavebom = false;
/// <summary>
/// 清除字符标记
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button2_Click(object sender, EventArgs e)
{
try
{
ishavebom = HasBom();
using (BinaryReader br = new BinaryReader(System.IO.File.Open(this.textBox1.Text, FileMode.Open)))
{
FileInfo fi = new FileInfo(this.textBox1.Text);
using (BinaryWriter bw = new BinaryWriter(System.IO.File.Open(fi.DirectoryName + "\\ClearBom_" + fi.Name, FileMode.Create)))
{
try
{
if (ishavebom)
{
br.ReadByte();
br.ReadByte();
br.ReadByte();
}
do
{
bw.Write(br.ReadByte());
}
while (br != null);
}
catch (EndOfStreamException)
{ }
bw.Close();
}
br.Close();
}
MessageBox.Show("Over");
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
/// <summary>
/// 添加上字符标记
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button3_Click(object sender, EventArgs e)
{
try
{
ishavebom = HasBom();
using (BinaryReader br = new BinaryReader(System.IO.File.Open(this.textBox1.Text, FileMode.Open)))
{
FileInfo fi = new FileInfo(this.textBox1.Text);
using (BinaryWriter bw = new BinaryWriter(System.IO.File.Open(fi.DirectoryName + "\\AddBom_" + fi.Name, FileMode.Create)))
{
try
{
if (!ishavebom)
{
bw.Write(b1);
bw.Write(b2);
bw.Write(b3);
}
do
{
bw.Write(br.ReadByte());
}
while (br != null);
}
catch (EndOfStreamException)
{ }
bw.Close();
}
br.Close();
}
MessageBox.Show("Over");
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
/// <summary>
/// 检查是否存在字符标记
/// </summary>
/// <returns></returns>
private bool HasBom()
{
try
{
using (BinaryReader br = new BinaryReader(System.IO.File.Open(this.textBox1.Text, FileMode.Open)))
{
FileInfo fi = new FileInfo(this.textBox1.Text);
if (br.ReadByte() == b1)
{
if (br.ReadByte() == b2)
{
if (br.ReadByte() == b3)
{
br.Close();
return true;
}
}
}
br.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return false;
}
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog fd = new OpenFileDialog();
DialogResult dr = fd.ShowDialog(this);
if (dr == DialogResult.OK)
{
this.textBox1.Text = fd.FileName;
}
}
}
}
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using IWshRuntimeLibrary;
namespace WindowsApplication7
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private string name;
//UTF-8字符标记
private const Byte b1 = 239;
private const Byte b2 = 187;
private const Byte b3 = 191;
bool ishavebom = false;
/// <summary>
/// 清除字符标记
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button2_Click(object sender, EventArgs e)
{
try
{
ishavebom = HasBom();
using (BinaryReader br = new BinaryReader(System.IO.File.Open(this.textBox1.Text, FileMode.Open)))
{
FileInfo fi = new FileInfo(this.textBox1.Text);
using (BinaryWriter bw = new BinaryWriter(System.IO.File.Open(fi.DirectoryName + "\\ClearBom_" + fi.Name, FileMode.Create)))
{
try
{
if (ishavebom)
{
br.ReadByte();
br.ReadByte();
br.ReadByte();
}
do
{
bw.Write(br.ReadByte());
}
while (br != null);
}
catch (EndOfStreamException)
{ }
bw.Close();
}
br.Close();
}
MessageBox.Show("Over");
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
/// <summary>
/// 添加上字符标记
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void button3_Click(object sender, EventArgs e)
{
try
{
ishavebom = HasBom();
using (BinaryReader br = new BinaryReader(System.IO.File.Open(this.textBox1.Text, FileMode.Open)))
{
FileInfo fi = new FileInfo(this.textBox1.Text);
using (BinaryWriter bw = new BinaryWriter(System.IO.File.Open(fi.DirectoryName + "\\AddBom_" + fi.Name, FileMode.Create)))
{
try
{
if (!ishavebom)
{
bw.Write(b1);
bw.Write(b2);
bw.Write(b3);
}
do
{
bw.Write(br.ReadByte());
}
while (br != null);
}
catch (EndOfStreamException)
{ }
bw.Close();
}
br.Close();
}
MessageBox.Show("Over");
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
/// <summary>
/// 检查是否存在字符标记
/// </summary>
/// <returns></returns>
private bool HasBom()
{
try
{
using (BinaryReader br = new BinaryReader(System.IO.File.Open(this.textBox1.Text, FileMode.Open)))
{
FileInfo fi = new FileInfo(this.textBox1.Text);
if (br.ReadByte() == b1)
{
if (br.ReadByte() == b2)
{
if (br.ReadByte() == b3)
{
br.Close();
return true;
}
}
}
br.Close();
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return false;
}
private void button1_Click(object sender, EventArgs e)
{
OpenFileDialog fd = new OpenFileDialog();
DialogResult dr = fd.ShowDialog(this);
if (dr == DialogResult.OK)
{
this.textBox1.Text = fd.FileName;
}
}
}
}