【.net】获取网页CDM的下载链接的地址
using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using Framework.Core.Crawl; using HtmlAgilityPack; namespace WebCaptureSolution { static class Program { /// <summary> /// 应用程序的主入口点。 [DllImport("urlmon.dll", CharSet = CharSet.Ansi)] private static extern int UrlMkSetSessionOption(int dwOption, string pBuffer, int dwBufferLength, int dwReserved); const int URLMON_OPTION_USERAGENT = 0x10000001; const string SPUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"; [STAThread] static void Main(string[] args) { Application.EnableVisualStyles(); Application.SetCompatibleTextRenderingDefault(false); string log_url = "http://www.handsupowo.pl/member.php?action=login"; string url1 = "http://www.handsupowo.pl/archive/index.php?forum-13.html"; List<string> aList = new List<string>(); List<string> lastinfo = new List<string>(); var form = new WebCapture(); form.DocumentOK = false; #region ログイン if (!string.IsNullOrEmpty(SPUserAgent)) { UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, SPUserAgent, SPUserAgent.Length, 0); } form.Navigate(log_url); while (!form.DocumentOK) { Application.DoEvents(); } //step 1 login var s = form.WebBrowser.Document.GetElementById("content"); var input = s.GetElementsByTagName("input"); for (int i = 0; i < input.Count; i++) { var p = input[i]; if (p.OuterHtml.Contains("username")) { p.SetAttribute("value", "id"); } else if (p.OuterHtml.Contains("pass")) { p.SetAttribute("value", "password"); } else if (p.OuterHtml.Contains("submit")) { p.InvokeMember("Click"); break; } } var startTime = System.DateTime.Now; while ((System.DateTime.Now - startTime).TotalSeconds <= 10) { Application.DoEvents(); } // System.IO.File.WriteAllText(form.SavePath, form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml, System.Text.Encoding.UTF8); // step 2 #endregion form.DocumentOK = false; form.Navigate(url1); while (!form.DocumentOK) { Application.DoEvents(); } HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument(); htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml); // div[contains(@class,'ads-creative')] var anodes = htmldoc.DocumentNode.SelectNodes("//div[@class='threadlist']//a"); foreach (var tn in anodes) { aList.Add(tn.Attributes["href"].Value); } // 循环访问a foreach (var cdmurl in aList) { form.DocumentOK = false; form.Navigate(cdmurl); while (!form.DocumentOK) { Application.DoEvents(); } startTime = System.DateTime.Now; while ((System.DateTime.Now - startTime).TotalSeconds <= 5) { Application.DoEvents(); } htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml); var downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@rel='nofollow']"); var info = htmldoc.DocumentNode.SelectSingleNode("//div[@id='fullversion']//a"); if (downloadurl == null) { downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@target='_blank']"); } List<string> dllist = new List<string>(); if (downloadurl.Count == 1) { dllist.Add(info.InnerText); dllist.Add(downloadurl[0].Attributes["href"].Value); } else { foreach (var dl in downloadurl) { if (dllist.Count == 0) { dllist.Add(info.InnerText); } else { dllist.Add(dl.Attributes["href"].Value); } } } lastinfo.Add(string.Join(Environment.NewLine, dllist.ToArray())); lastinfo.Add(Environment.NewLine); } System.IO.File.WriteAllLines(@"D:\Nodejs\myjs\DownLoadUrl.txt", lastinfo.ToArray(), Encoding.UTF8); } } }