【.net】获取网页CDM的下载链接的地址

using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Framework.Core.Crawl;
using HtmlAgilityPack;

namespace WebCaptureSolution
{
    static class Program
    {
        /// <summary>
        /// 应用程序的主入口点。
        [DllImport("urlmon.dll", CharSet = CharSet.Ansi)]
        private static extern int UrlMkSetSessionOption(int dwOption, string pBuffer, int dwBufferLength, int dwReserved);

        const int URLMON_OPTION_USERAGENT = 0x10000001;

        const string SPUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36";
        [STAThread]

        static void Main(string[] args)
        {
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            string log_url = "http://www.handsupowo.pl/member.php?action=login";

            string url1 = "http://www.handsupowo.pl/archive/index.php?forum-13.html";
            List<string> aList = new List<string>();
            List<string> lastinfo = new List<string>();

            var form = new WebCapture();

            form.DocumentOK = false;
            #region ログイン
            if (!string.IsNullOrEmpty(SPUserAgent))
            {
                UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, SPUserAgent, SPUserAgent.Length, 0);
            }
            form.Navigate(log_url);
            while (!form.DocumentOK)
            {
                Application.DoEvents();
            }

            //step 1 login
            var s = form.WebBrowser.Document.GetElementById("content");
            var input = s.GetElementsByTagName("input");

            for (int i = 0; i < input.Count; i++)
            {
                var p = input[i];
                if (p.OuterHtml.Contains("username"))
                {
                    p.SetAttribute("value", "id");
                }
                else if (p.OuterHtml.Contains("pass"))
                {
                    p.SetAttribute("value", "password");
                }
                else if (p.OuterHtml.Contains("submit"))
                {
                    p.InvokeMember("Click");
                    break;

                }

            }
            var startTime = System.DateTime.Now;
            while ((System.DateTime.Now - startTime).TotalSeconds <= 10)
            {
                Application.DoEvents();
            }
            // System.IO.File.WriteAllText(form.SavePath, form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml, System.Text.Encoding.UTF8);
            // step 2
            #endregion
            form.DocumentOK = false;

            form.Navigate(url1);
            while (!form.DocumentOK)
            {
                Application.DoEvents();
            }
            HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument();
            htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml);
            //  div[contains(@class,'ads-creative')]
            var anodes = htmldoc.DocumentNode.SelectNodes("//div[@class='threadlist']//a");
            foreach (var tn in anodes)
            {
                aList.Add(tn.Attributes["href"].Value);
            }
            // 循环访问a
            foreach (var cdmurl in aList)
            {
                form.DocumentOK = false;

                form.Navigate(cdmurl);
                while (!form.DocumentOK)
                {
                    Application.DoEvents();
                }
                startTime = System.DateTime.Now;
                while ((System.DateTime.Now - startTime).TotalSeconds <= 5)
                {
                    Application.DoEvents();
                }
                htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml);
                var downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@rel='nofollow']");
                var info = htmldoc.DocumentNode.SelectSingleNode("//div[@id='fullversion']//a");
                if (downloadurl == null)
                {
                    downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@target='_blank']");

                }
                List<string> dllist = new List<string>();
                if (downloadurl.Count == 1)
                {
                    dllist.Add(info.InnerText);
                    dllist.Add(downloadurl[0].Attributes["href"].Value);
                }
                else
                {
                    foreach (var dl in downloadurl)
                    {
                        if (dllist.Count == 0)
                        {
                            dllist.Add(info.InnerText);

                        }
                        else
                        {
                            dllist.Add(dl.Attributes["href"].Value);
                        }


                    }

                }

                lastinfo.Add(string.Join(Environment.NewLine, dllist.ToArray()));
                lastinfo.Add(Environment.NewLine);


            }


            System.IO.File.WriteAllLines(@"D:\Nodejs\myjs\DownLoadUrl.txt", lastinfo.ToArray(), Encoding.UTF8);



        }
    }
}

  

posted @ 2017-02-15 11:54  公众号python学习开发  阅读(350)  评论(0编辑  收藏  举报