CefSharp使用记录

一点经验

  • 缺少c++运行库也会报错。直接全装就好了。

本次使用的一些技巧

  • httpClient不行的换用chromium。各种浏览器还是chromium比较全能,之前用miniblink会有些页面打不开。
  • SemaphoreSlim设置一个最大并发数。
  • Task.Any设置一个超时时间。
  • 等待document.Ready为complete后,再读取html内容。
        int iCount = 0;
        DateTime CurStartTime = DateTime.Now;
        //检查外链
        public async Task Run()
        {
            "检查外链-开始执行".Dump();
            CurStartTime = DateTime.Now;
            //获取所有外链
            var dtWaiLian = GetDtWaiLian();
            dtWaiLian.Dump();
            iCount = 0;

            // 使用并行循环检查,并使用SemaphoreSlim限制并发量
            var maxRequest = SysVar.GetAppSetting("MaxRequest").CToInt();
            var semaphore = new SemaphoreSlim(maxRequest);
            var tasks = dtWaiLian.Rows.Cast<DataRow>().Select(async r =>
            {
                await semaphore.WaitAsync(); // 等待可用资源
                try
                {
                    await HandleOneCheck(r);
                }
                finally
                {
                    semaphore.Release(); // 释放资源
                    GC.Collect();
                }
            });

            //等待所有任务完成
            await Task.WhenAll(tasks);
             
            //总结
            ZongJie();
            "检查外链-执行完毕".Dump();
        }

        //获取所有外链信息
        DataTable GetDtWaiLian()
        {
            //sql外链
            var sqlWaiLian = SysVar.GetAppSetting("GetDtWaiLianSQL");
            if (sqlWaiLian.IsNullOrEmpty())
            {
                sqlWaiLian = @"
select
    wl.Id,
    wl.Link,
    d.domain,
    linkState,
    linkErrorDate
from SEO_WaiLian wl
left join SEO_Domain d
    on d.Id = wl.DomainId 
";
            } 
            var dtWaiLian = SqlSs.GetDataTable(sqlWaiLian);
            return dtWaiLian;
        }

        //处理一行检查的方法
        private async Task HandleOneCheck(DataRow r)
        {
            var dtWaiLian = r.Table;
            var Id = r["Id"].ToString();
            var link = r["Link"].ToString().Trim();
            var domain = r["domain"].ToString().Trim();
            var linkState0 = r["linkState"].ToString().Trim();

            //获取请求结果
            Func<string, Task<ResponseObj>> GetRes = GetUrlResByHttpClientAsync;

            var res = await GetRes(link);
            //处理直接请求{403,打开超时}等不让直接请求的
            if (new List<string> { "401","403","-1"}.Contains(res.code))
            {
                GetRes = GetUrlResByCefAsync;
                res = await GetRes(link);
            } 
            //处理404但是http开头写错的
            if (res.code == 404.ToString())
            { 
                if (link.StartsWith("https"))
                {
                    link = link.Replace("https", "http");
                }
                else if (link.StartsWith("http"))
                {
                    link = link.Replace("http", "https");
                }
                res = await GetRes(link);
            }
            //不包含的尝试用Cef
            if (GetRes == GetUrlResByHttpClientAsync && res.code == 200.ToString() && res.html.Contains(domain) == false)
            {
                GetRes = GetUrlResByCefAsync;
                res = await GetRes(link);
            }

            $"{res.code}---{link}".Dump();
            //状态码不是200的;是200但不包含的;正常的。
            if (res.code != 200.ToString())
            {
                UpdateWaiLianInfo(linkState0,new { Id = Id, linkState = res.code.ToString(), linkErrorDate = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") });
            }
            else if (res.html.Contains(domain) == false)
            {
                UpdateWaiLianInfo(linkState0, new { Id = Id, linkState = "被删除", linkErrorDate = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") });
            }
            else 
            { 
                UpdateWaiLianInfo(linkState0, new { Id = Id, linkState = "", linkErrorDate = DBNull.Value });
            }
            $">>>{++iCount}/{dtWaiLian.Rows.Count}".Dump();
             
        }
         
        //ResponseObj,需要的相应对象
        public class ResponseObj
        {
            public string code;
            public string html;
        }

        //获取url的code和html,用HttpClient
        public async Task<ResponseObj> GetUrlResByHttpClientAsync(string url)
        {
            var client = new HttpClient()
            {
                Timeout = TimeSpan.FromSeconds(30),
            };
            using (client)
            {
                // 请求
                try
                {
                    // 创建 CancellationTokenSource 实例
                    CancellationTokenSource cts = new CancellationTokenSource();
                    var token = cts.Token;
                    HttpResponseMessage response = await client.GetAsync(url, token);
                    int statusCode = (int)response.StatusCode;
                    var html = "";
                    if (statusCode == 200)
                    {
                        html = await response.Content.ReadAsStringAsync();
                    }
                    return new ResponseObj()
                    {
                        code = statusCode.ToString(),
                        html = html
                    };
                }
                catch (Exception)
                {
                    return new ResponseObj()
                    {
                        code = "-1",
                        html = ""
                    };
                }
            }
        }

        //获取url的code和html,用CefSharp,加上时间限制 
        public async Task<ResponseObj> GetUrlResByCefAsync(string url)
        {
            //CefSharp.OffScreen.114.2.120.0
            // 初始化 Chromium 嵌入器,只能初始化一次
            //Cef.Initialize(new CefSettings());

            // 创建一个 OffScreen 浏览器实例 
            using (var browser = new ChromiumWebBrowser(url))
            {
                //等待主框架加载完成,等不超过30s
                var taskLoad = browser.WaitForInitialLoadAsync();
                var taskLoadTimeout = Task.Delay(TimeSpan.FromSeconds(30));
                var completedTask1 = await Task.WhenAny(taskLoad, taskLoadTimeout);
                if (completedTask1 == taskLoadTimeout)
                {
                    return new ResponseObj
                    {
                        code = "-1",
                        html = "",
                    };
                }

                // 等待 document.readyState 为 complete
                Func<Task<string>> GetDocReadyState = async () =>
                {
                    try
                    {
                        var jsRes = await browser.EvaluateScriptAsync("document.readyState");
                        if (jsRes.Success == false) return "";
                        return jsRes.Result.ToString();
                    }
                    catch
                    {
                        return "";
                    }
                }; 
                var readyState = "";
                var maxWaitTime = TimeSpan.FromSeconds(15); // 最大等待时间
                var startTime = DateTime.Now;
                while (readyState != "complete" && (DateTime.Now - startTime) < maxWaitTime)
                {
                    await Task.Delay(TimeSpan.FromSeconds(1)); 
                    readyState = await GetDocReadyState();
                }
                 
                //获取html 
                var taskContent = browser.GetSourceAsync();
                var taskContentTimeout = Task.Delay(TimeSpan.FromSeconds(15));
                var completedTask2 = await Task.WhenAny(taskContent, taskContentTimeout);
                if (completedTask2 == taskContentTimeout)
                {
                    return new ResponseObj
                    {
                        code = "-1",
                        html = "",
                    };
                }

                //返回
                return new ResponseObj
                {
                    code = taskLoad.Result.HttpStatusCode.ToString(),
                    html = taskContent.Result,
                };
            }
        }

        //CefSharpInit
        public void InitCef()
        {
            var settings = new CefSettings()
            {
                CookieableSchemesExcludeDefaults = false,
                PersistSessionCookies = true,
                Locale = "zh-CN",
                UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
            };
            Cef.Initialize(settings);
        }

        //更新外链信息
        void UpdateWaiLianInfo(string linkState0,object param)
        {
            JObject jp = param is JObject ? (JObject)param : JObject.FromObject(param);
            //-1的是【打开超时】;前面逻辑处理用-1是和chromium保持一致。 
            if (jp["linkState"].ToString() == "-1")
            {
                jp["linkState"] = "打开超时";
            }
            //状态没变的不update  
            if (jp["linkState"].ToString() == linkState0) return;
            //sql
            var Id = jp["Id"].ToString();
            var setFieldsList = jp.Properties()
                .Where(p => p.Name.ToLower() != "id")
                .Select(p => {
                    if (p.Value.Type.ToString().Trim() == "Null")
                    {
                        return $"{p.Name} = null";
                    }
                    return $"{p.Name} = '{p.Value}'";
                })
                .ToList();
            var setFieldsStr = string.Join(",", setFieldsList);
            var sql = $@"
update SEO_WaiLian
set {setFieldsStr}
where Id = '{Id}'            
";
            SqlSs.GetNonQuery(sql);
        }

 

posted @ 2024-08-15 17:50  法宝  阅读(25)  评论(0编辑  收藏  举报