CefSharp使用记录
一点经验
- 缺少c++运行库也会报错。直接全装就好了。
本次使用的一些技巧
- httpClient不行的换用chromium。各种浏览器还是chromium比较全能,之前用miniblink会有些页面打不开。
- SemaphoreSlim设置一个最大并发数。
- Task.Any设置一个超时时间。
- 等待document.Ready为complete后,再读取html内容。
int iCount = 0; DateTime CurStartTime = DateTime.Now; //检查外链 public async Task Run() { "检查外链-开始执行".Dump(); CurStartTime = DateTime.Now; //获取所有外链 var dtWaiLian = GetDtWaiLian(); dtWaiLian.Dump(); iCount = 0; // 使用并行循环检查,并使用SemaphoreSlim限制并发量 var maxRequest = SysVar.GetAppSetting("MaxRequest").CToInt(); var semaphore = new SemaphoreSlim(maxRequest); var tasks = dtWaiLian.Rows.Cast<DataRow>().Select(async r => { await semaphore.WaitAsync(); // 等待可用资源 try { await HandleOneCheck(r); } finally { semaphore.Release(); // 释放资源 GC.Collect(); } }); //等待所有任务完成 await Task.WhenAll(tasks); //总结 ZongJie(); "检查外链-执行完毕".Dump(); } //获取所有外链信息 DataTable GetDtWaiLian() { //sql外链 var sqlWaiLian = SysVar.GetAppSetting("GetDtWaiLianSQL"); if (sqlWaiLian.IsNullOrEmpty()) { sqlWaiLian = @" select wl.Id, wl.Link, d.domain, linkState, linkErrorDate from SEO_WaiLian wl left join SEO_Domain d on d.Id = wl.DomainId "; } var dtWaiLian = SqlSs.GetDataTable(sqlWaiLian); return dtWaiLian; } //处理一行检查的方法 private async Task HandleOneCheck(DataRow r) { var dtWaiLian = r.Table; var Id = r["Id"].ToString(); var link = r["Link"].ToString().Trim(); var domain = r["domain"].ToString().Trim(); var linkState0 = r["linkState"].ToString().Trim(); //获取请求结果 Func<string, Task<ResponseObj>> GetRes = GetUrlResByHttpClientAsync; var res = await GetRes(link); //处理直接请求{403,打开超时}等不让直接请求的 if (new List<string> { "401","403","-1"}.Contains(res.code)) { GetRes = GetUrlResByCefAsync; res = await GetRes(link); } //处理404但是http开头写错的 if (res.code == 404.ToString()) { if (link.StartsWith("https")) { link = link.Replace("https", "http"); } else if (link.StartsWith("http")) { link = link.Replace("http", "https"); } res = await GetRes(link); } //不包含的尝试用Cef if (GetRes == GetUrlResByHttpClientAsync && res.code == 200.ToString() && res.html.Contains(domain) == false) { GetRes = GetUrlResByCefAsync; res = await GetRes(link); } $"{res.code}---{link}".Dump(); //状态码不是200的;是200但不包含的;正常的。 if (res.code != 200.ToString()) { UpdateWaiLianInfo(linkState0,new { Id = Id, linkState = res.code.ToString(), linkErrorDate = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") }); } else if (res.html.Contains(domain) == false) { UpdateWaiLianInfo(linkState0, new { Id = Id, linkState = "被删除", linkErrorDate = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") }); } else { UpdateWaiLianInfo(linkState0, new { Id = Id, linkState = "", linkErrorDate = DBNull.Value }); } $">>>{++iCount}/{dtWaiLian.Rows.Count}".Dump(); } //ResponseObj,需要的相应对象 public class ResponseObj { public string code; public string html; } //获取url的code和html,用HttpClient public async Task<ResponseObj> GetUrlResByHttpClientAsync(string url) { var client = new HttpClient() { Timeout = TimeSpan.FromSeconds(30), }; using (client) { // 请求 try { // 创建 CancellationTokenSource 实例 CancellationTokenSource cts = new CancellationTokenSource(); var token = cts.Token; HttpResponseMessage response = await client.GetAsync(url, token); int statusCode = (int)response.StatusCode; var html = ""; if (statusCode == 200) { html = await response.Content.ReadAsStringAsync(); } return new ResponseObj() { code = statusCode.ToString(), html = html }; } catch (Exception) { return new ResponseObj() { code = "-1", html = "" }; } } } //获取url的code和html,用CefSharp,加上时间限制 public async Task<ResponseObj> GetUrlResByCefAsync(string url) { //CefSharp.OffScreen.114.2.120.0 // 初始化 Chromium 嵌入器,只能初始化一次 //Cef.Initialize(new CefSettings()); // 创建一个 OffScreen 浏览器实例 using (var browser = new ChromiumWebBrowser(url)) { //等待主框架加载完成,等不超过30s var taskLoad = browser.WaitForInitialLoadAsync(); var taskLoadTimeout = Task.Delay(TimeSpan.FromSeconds(30)); var completedTask1 = await Task.WhenAny(taskLoad, taskLoadTimeout); if (completedTask1 == taskLoadTimeout) { return new ResponseObj { code = "-1", html = "", }; } // 等待 document.readyState 为 complete Func<Task<string>> GetDocReadyState = async () => { try { var jsRes = await browser.EvaluateScriptAsync("document.readyState"); if (jsRes.Success == false) return ""; return jsRes.Result.ToString(); } catch { return ""; } }; var readyState = ""; var maxWaitTime = TimeSpan.FromSeconds(15); // 最大等待时间 var startTime = DateTime.Now; while (readyState != "complete" && (DateTime.Now - startTime) < maxWaitTime) { await Task.Delay(TimeSpan.FromSeconds(1)); readyState = await GetDocReadyState(); } //获取html var taskContent = browser.GetSourceAsync(); var taskContentTimeout = Task.Delay(TimeSpan.FromSeconds(15)); var completedTask2 = await Task.WhenAny(taskContent, taskContentTimeout); if (completedTask2 == taskContentTimeout) { return new ResponseObj { code = "-1", html = "", }; } //返回 return new ResponseObj { code = taskLoad.Result.HttpStatusCode.ToString(), html = taskContent.Result, }; } } //CefSharpInit public void InitCef() { var settings = new CefSettings() { CookieableSchemesExcludeDefaults = false, PersistSessionCookies = true, Locale = "zh-CN", UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" }; Cef.Initialize(settings); } //更新外链信息 void UpdateWaiLianInfo(string linkState0,object param) { JObject jp = param is JObject ? (JObject)param : JObject.FromObject(param); //-1的是【打开超时】;前面逻辑处理用-1是和chromium保持一致。 if (jp["linkState"].ToString() == "-1") { jp["linkState"] = "打开超时"; } //状态没变的不update if (jp["linkState"].ToString() == linkState0) return; //sql var Id = jp["Id"].ToString(); var setFieldsList = jp.Properties() .Where(p => p.Name.ToLower() != "id") .Select(p => { if (p.Value.Type.ToString().Trim() == "Null") { return $"{p.Name} = null"; } return $"{p.Name} = '{p.Value}'"; }) .ToList(); var setFieldsStr = string.Join(",", setFieldsList); var sql = $@" update SEO_WaiLian set {setFieldsStr} where Id = '{Id}' "; SqlSs.GetNonQuery(sql); }