Microsoft.Baidu.Ali.语音识别/人脸识别
在第一篇博客里提过图片识别的底层.最精准的图片识别需要海量的数据磨炼.自己写的底层没有以亿为单位的数据支持其实也是个残废品.
此篇不是为了教学.而且在需要的时候抄下来就能用
在此介绍Microsoft.Baidu.Ali的几个人工智能接口吧.
没啥技术含量.都是HTTP.POST请求一类的.
个人可以申请30天免费试用.
以下是微软的人工智能接口(还尝试了百度.阿里的人工智能.微软识别的是最精准的)
public class FaceHelper { private const string uriBase = "https://westcentralus.api.cognitive.microsoft.com/face/v1.0/detect"; private static string subscriptionKey = string.Empty; public FaceHelper(string Key,string imageFilePath) { if (!String.IsNullOrWhiteSpace(Key)) { subscriptionKey = Key; MakeAnalysisRequest(imageFilePath); } } static async void MakeAnalysisRequest(string imageFilePath) { HttpClient client = new HttpClient(); client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey); string requestParameters = "returnFaceId=true&returnFaceLandmarks=false&returnFaceAttributes=age,gender,headPose,smile,facialHair,glasses,emotion,hair,makeup,occlusion,accessories,blur,exposure,noise"; string uri = uriBase + "?" + requestParameters; HttpResponseMessage response; byte[] byteData = GetImageAsByteArray(imageFilePath); using (ByteArrayContent content = new ByteArrayContent(byteData)) { content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); response = await client.PostAsync(uri, content); string contentString = await response.Content.ReadAsStringAsync(); Console.WriteLine("\nResponse:\n"); Console.WriteLine(JsonPrettyPrint(contentString)); } } static byte[] GetImageAsByteArray(string imageFilePath) { FileStream fileStream = new FileStream(imageFilePath, FileMode.Open, FileAccess.Read); BinaryReader binaryReader = new BinaryReader(fileStream); return binaryReader.ReadBytes((int)fileStream.Length); } static string JsonPrettyPrint(string json) { if (string.IsNullOrEmpty(json)) return string.Empty; json = json.Replace(Environment.NewLine, "").Replace("\t", ""); StringBuilder sb = new StringBuilder(); bool quote = false; bool ignore = false; int offset = 0; int indentLength = 3; foreach (char ch in json) { switch (ch) { case '"': if (!ignore) quote = !quote; break; case '\'': if (quote) ignore = !ignore; break; } if (quote) sb.Append(ch); else { switch (ch) { case '{': case '[': sb.Append(ch); sb.Append(Environment.NewLine); sb.Append(new string(' ', ++offset * indentLength)); break; case '}': case ']': sb.Append(Environment.NewLine); sb.Append(new string(' ', --offset * indentLength)); sb.Append(ch); break; case ',': sb.Append(ch); sb.Append(Environment.NewLine); sb.Append(new string(' ', offset * indentLength)); break; case ':': sb.Append(ch); sb.Append(' '); break; default: if (ch != ' ') sb.Append(ch); break; } } } return sb.ToString().Trim(); } }
脸识别 API.检测、识别、分析、组织和标记照片中的人脸
FaceHelper face = new FaceHelper("你的密钥",ConfigurationManager.AppSettings["Face"] );
返回值很多很详细.人脸在图片的那个区域。性别.有没有头发。有没有胡子。有没有眼镜都写的很清楚.在此不一一列举
以下是声音识别.分REST 和SOCKET 语音识别也分中英美法.传递的音频也要分长短.以下配置为英文识别.REST.15秒以下音频
public class VoiceHelper { /// <summary> /// 识别模式 ///有认可的三种模式:interactive,conversation,和dictation。识别模式根据用户如何说话来调整语音识别。为您的应用程序选择适当的识别模式。 /// </summary> public VoiceHelper(string file,string key) { string url = "https://speech.platform.bing.com/speech/recognition/dictation/cognitiveservices/v1?language=en-US&format=simple"; string responseString = string.Empty; HttpWebRequest request = null; request = (HttpWebRequest)HttpWebRequest.Create(url); request.SendChunked = true; request.Accept = @"application/json;text/xml"; request.Method = "POST"; request.ProtocolVersion = HttpVersion.Version11; request.ContentType = @"audio/wav; codec=audio/pcm; samplerate=16000"; request.Headers["Ocp-Apim-Subscription-Key"] = key; using (FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read)) { byte[] buffer = null; int bytesRead = 0; using (Stream requestStream = request.GetRequestStream()) { buffer = new Byte[checked((uint)Math.Min(1024, (int)fs.Length))]; while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) != 0) { requestStream.Write(buffer, 0, bytesRead); } requestStream.Flush(); } } using (WebResponse response = request.GetResponse()) { Console.WriteLine(((HttpWebResponse)response).StatusCode); using (StreamReader sr = new StreamReader(response.GetResponseStream())) { responseString = sr.ReadToEnd(); } Console.WriteLine(responseString); } } }
VoiceHelper voice = new VoiceHelper(@ConfigurationManager.AppSettings["Voice"], "你的密钥");
这个语音识别还是可以的.Displaytext就是我在音频中说的话.重复了三遍 TEST.声音很沙哑也很低沉.识别率很赞.
不过要注意只支持15秒带有PCM单声道(单声道),16 KHz的WAV文件
以下是图片识别.这个就可好玩了.我放了一个大飞机.返回的数据中.飞机蓝天都识别了
public class OCRHelper { const string subscriptionKey = "你的密钥"; const string uriBase = "https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/analyze"; public OCRHelper(string file) { // Get the path and filename to process from the user. Console.WriteLine("Analyze an image:"); Console.Write("Enter the path to an image you wish to analzye: "); // Execute the REST API call. MakeAnalysisRequest(file); Console.WriteLine("\nPlease wait a moment for the results to appear. Then, press Enter to exit...\n"); } /// <summary> /// Gets the analysis of the specified image file by using the Computer Vision REST API. /// </summary> /// <param name="imageFilePath">The image file.</param> static async void MakeAnalysisRequest(string imageFilePath) { HttpClient client = new HttpClient(); // Request headers. client.DefaultRequestHeaders.Add("Ocp-Apim-Subscription-Key", subscriptionKey); // Request parameters. A third optional parameter is "details". string requestParameters = "visualFeatures=Categories,Description,Color&language=en"; // Assemble the URI for the REST API Call. string uri = uriBase + "?" + requestParameters; HttpResponseMessage response; // Request body. Posts a locally stored JPEG image. byte[] byteData = GetImageAsByteArray(imageFilePath); using (ByteArrayContent content = new ByteArrayContent(byteData)) { // This example uses content type "application/octet-stream". // The other content types you can use are "application/json" and "multipart/form-data". content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); // Execute the REST API call. response = await client.PostAsync(uri, content); // Get the JSON response. string contentString = await response.Content.ReadAsStringAsync(); // Display the JSON response. Console.WriteLine("\nResponse:\n"); Console.WriteLine(JsonPrettyPrint(contentString)); //description.captions.text 对图片的英文描述 } } /// <summary> /// Returns the contents of the specified file as a byte array. /// </summary> /// <param name="imageFilePath">The image file to read.</param> /// <returns>The byte array of the image data.</returns> static byte[] GetImageAsByteArray(string imageFilePath) { FileStream fileStream = new FileStream(imageFilePath, FileMode.Open, FileAccess.Read); BinaryReader binaryReader = new BinaryReader(fileStream); return binaryReader.ReadBytes((int)fileStream.Length); } /// <summary> /// Formats the given JSON string by adding line breaks and indents. /// </summary> /// <param name="json">The raw JSON string to format.</param> /// <returns>The formatted JSON string.</returns> static string JsonPrettyPrint(string json) { if (string.IsNullOrEmpty(json)) return string.Empty; json = json.Replace(Environment.NewLine, "").Replace("\t", ""); StringBuilder sb = new StringBuilder(); bool quote = false; bool ignore = false; int offset = 0; int indentLength = 3; foreach (char ch in json) { switch (ch) { case '"': if (!ignore) quote = !quote; break; case '\'': if (quote) ignore = !ignore; break; } if (quote) sb.Append(ch); else { switch (ch) { case '{': case '[': sb.Append(ch); sb.Append(Environment.NewLine); sb.Append(new string(' ', ++offset * indentLength)); break; case '}': case ']': sb.Append(Environment.NewLine); sb.Append(new string(' ', --offset * indentLength)); sb.Append(ch); break; case ',': sb.Append(ch); sb.Append(Environment.NewLine); sb.Append(new string(' ', offset * indentLength)); break; case ':': sb.Append(ch); sb.Append(' '); break; default: if (ch != ' ') sb.Append(ch); break; } } } return sb.ToString().Trim(); } }
OCRHelper ocr = new OCRHelper(@"C:\Users\Administrator\Desktop\test2.png");
下图是输入参数
下面是输出参数
以下是阿里的人工智能接口
/// <summary> /// 人脸属性 /// </summary> /// <param name="file"></param> private static void Face(string file) { String host = "http://rlsxsb.market.alicloudapi.com"; String path = "/face/attribute"; String method = "POST"; String appcode = "b009c20b62664344a794fe0a4535b2ab"; String querys = ""; string base64 = ImageHelper.ImgToBase64String(file); //String bodys = "{\"type\":0,#0:通过url识别,参数image_url不为空;1:通过图片content识别,参数content不为空\"image_url\":\"http://a.com/a.jgp\",#输入图像URL\"content\":\"\"#图像内容,base64编码}"; String bodys = "{\"type\":1,\"image_url\":\" \",\"content\":\"" + base64 + " \"}"; String url = host + path; HttpWebRequest httpRequest = null; HttpWebResponse httpResponse = null; if (0 < querys.Length) { url = url + "?" + querys; } if (host.Contains("https://")) { ServicePointManager.ServerCertificateValidationCallback = new RemoteCertificateValidationCallback(CheckValidationResult); httpRequest = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url)); } else { httpRequest = (HttpWebRequest)WebRequest.Create(url); } httpRequest.Method = method; httpRequest.Headers.Add("Authorization", "APPCODE " + appcode); //根据API的要求,定义相对应的Content-Type httpRequest.ContentType = "application/json; charset=UTF-8"; if (0 < bodys.Length) { byte[] data = Encoding.UTF8.GetBytes(bodys); using (Stream stream = httpRequest.GetRequestStream()) { stream.Write(data, 0, data.Length); } } try { httpResponse = (HttpWebResponse)httpRequest.GetResponse(); } catch (WebException ex) { httpResponse = (HttpWebResponse)ex.Response; } Console.WriteLine(httpResponse.StatusCode); Console.WriteLine(httpResponse.Method); Console.WriteLine(httpResponse.Headers); Stream st = httpResponse.GetResponseStream(); StreamReader reader = new StreamReader(st, Encoding.GetEncoding("utf-8")); string result = reader.ReadToEnd(); Console.WriteLine(result); Console.WriteLine("\n"); }
在放一个图片转Base64帮助
using System; using System.Drawing; using System.Drawing.Drawing2D; using System.Drawing.Imaging; using System.IO; namespace Microsofot.Core { /// <summary> /// 图片帮助 /// </summary> public static class ImageHelper { /// <summary> /// 图片转base64 /// </summary> /// <param name="filename"></param> public static string ImgToBase64String(string filename) { if (!File.Exists(@"" + filename) || String.IsNullOrWhiteSpace(filename)) { return null; } Bitmap bmp = new Bitmap(filename); MemoryStream ms = new MemoryStream(); bmp.Save(ms, ImageFormat.Jpeg); byte[] arr = new byte[ms.Length]; ms.Position = 0; ms.Read(arr, 0, (int)ms.Length); ms.Close(); String strbaser64 = Convert.ToBase64String(arr); return strbaser64; } /// <summary> /// base64转图片 /// </summary> /// <param name="base64Code"></param> public static Bitmap Base64StringToImage(string base64Code) { byte[] arr = Convert.FromBase64String(base64Code); MemoryStream ms = new MemoryStream(arr); Bitmap bmp = new Bitmap(ms); ms.Close(); return bmp; } /// <summary> /// 生成缩略图 /// </summary> /// <param name="img">原始图片</param> /// <param name="thumbImagePath">缩略图地址</param> /// <param name="width">图片宽度</param> /// <param name="height">图片高度</param> /// <param name="p"></param> public static void GenerateThumbImage(System.Drawing.Image img, string thumbImagePath, int width, int height) { System.Drawing.Image serverImage = img; //画板大小 int towidth = width; int toheight = height; //缩略图矩形框的像素点 int ow = serverImage.Width; int oh = serverImage.Height; if (ow > oh) { toheight = serverImage.Height * width / serverImage.Width; } else { towidth = serverImage.Width * height / serverImage.Height; } //新建一个bmp图片 System.Drawing.Image bm = new Bitmap(width, height); //新建一个画板 Graphics g = Graphics.FromImage(bm); //设置高质量插值法 g.InterpolationMode = InterpolationMode.High; //设置高质量,低速度呈现平滑程度 g.SmoothingMode = SmoothingMode.HighQuality; //清空画布并以透明背景色填充 g.Clear(Color.White); //在指定位置并且按指定大小绘制原图片的指定部分 g.DrawImage(serverImage, new Rectangle((width - towidth) / 2, (height - toheight) / 2, towidth, toheight), 0, 0, ow, oh, GraphicsUnit.Pixel); //以jpg格式保存缩略图 bm.Save(thumbImagePath, ImageFormat.Jpeg); serverImage.Dispose(); bm.Dispose(); g.Dispose(); } } }
在此就不放阿里的语音识别接口了.
因为
(至2018/1/10免费版本仅支持小量的调用,首先请手动将音频文件放到阿里服务器上,然后将生成的URL拉下来作为发起识别的参数.并不能支持并发)
https://help.aliyun.com/document_detail/32378.html?spm=5176.product30413.3.4.IegjQU
以下是百度的人工智能接口
在使用百度的接口时.需要先获取Token
/// <summary> /// 获取百度人工智能token /// </summary> public static class AccessToken { // 调用getAccessToken()获取的 access_token建议根据expires_in 时间 设置缓存 // 返回token示例 //public static String TOKEN = "24.adda70c11b9786206253ddb70affdc46.2592000.1493524354.282335-1234567"; // 百度云中开通对应服务应用的 API Key 建议开通应用的时候多选服务 private static String clientId = "bQWhFMDTvIZpHXr8ZYwT0r9d"; // 百度云中开通对应服务应用的 Secret Key private static String clientSecret = "EQGW33PLeYnWozRzafAcpiMBdxH8fLs2"; public static String getAccessToken() { String authHost = "https://aip.baidubce.com/oauth/2.0/token"; HttpClient client = new HttpClient(); List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>(); paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials")); paraList.Add(new KeyValuePair<string, string>("client_id", clientId)); paraList.Add(new KeyValuePair<string, string>("client_secret", clientSecret)); HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result; String result = response.Content.ReadAsStringAsync().Result; Console.WriteLine(result); return result; } }
然后调用人脸识别
/// <summary> /// 人脸识别 /// </summary> public class FaceDetect { // 人脸探测 public static string detect(string imageFile,string token) { //string token = "[调用鉴权接口获取的token]"; string host = "https://aip.baidubce.com/rest/2.0/face/v1/detect?access_token=" + token; Encoding encoding = Encoding.Default; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host); request.Method = "post"; request.KeepAlive = true; // 图片的base64编码 string base64 = ImageHelper.ImgToBase64String(imageFile); String str = "max_face_num=" + 5 + "&face_fields=" + "age,beauty,expression,faceshape,gender,glasses,landmark,race,qualities" + "&image=" + HttpUtility.UrlEncode(base64); byte[] buffer = encoding.GetBytes(str); request.ContentLength = buffer.Length; request.GetRequestStream().Write(buffer, 0, buffer.Length); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default); string result = reader.ReadToEnd(); Console.WriteLine("人脸探测:"); Console.WriteLine(result); return result; } }
百度的语音识别就很简单了.在NUGET上搜索baidu.ai安装就行了
private readonly Asr _asrClient; private readonly Tts _ttsClient; public SpeechDemo() { _asrClient = new Asr("你的KEY", "你的密钥"); _ttsClient = new Tts("你的KEY", "EQGW33PLeYnWozRzafAcpiMBdxH8fLs2"); } // 识别本地文件 public void AsrData(string file) { var data = File.ReadAllBytes(file); var result = _asrClient.Recognize(data, "wav", 16000); Console.Write(result); }
(至2018/01/09百度语音服务保持免费.但是在调试过程中同一段音频.时而能识别.时而不能.并且存在漏词现象.)