下载网页中远程图片的方法

在网站内容编辑时, 经常会把别的网站的内容COPY后再PASTE到自己的后台编辑中, 但是其中的图片只是保存的URL, 并未COPY到本地. 下面这个方法能把COPY回来的HTML代码中的图片的URL转换成指定的URL, 并把图片下载到指定的物理路径.

using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;

namespace EFPlatform.Helper
{
    
public class WebRequestHelper
    {
        
#region RequestFileFromHtml
        
public static string RequestFileFromHtml(string html, string fileUrl, string filePath, string reservedHost)
        {
            Uri url;
            
string fileExt;
            
string fileName;
            WebClient c 
= new WebClient();
            
string p = @"((http|https|ftp):(\/\/|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(gif|jpg|png)))";
            Regex r 
= new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase);
            MatchCollection mc 
= r.Matches(html);

            
if(mc.Count > 0)
            {
                List
<Uri> urlList = new List<Uri>();
                
int matchIndex = 0;
                
bool repeated = false;

                
for(int i = 0;i < mc.Count;i++)
                {
                    url 
= new Uri(mc[i].Value);

                    
for(int j = 0;j < urlList.Count;j++)
                    {
                        
if(url == urlList[j])
                        {
                            repeated 
= true;
                            
break;
                        }
                    }

                    
if(!repeated && (url.Host.ToLower() != reservedHost.ToLower()))
                    {
                        urlList.Add(url);
                        matchIndex
++;
                    }
                }

                
for(int i = 0;i < urlList.Count;i++)
                {
                    url 
= urlList[i];
                    fileExt 
= url.AbsoluteUri.Substring(url.AbsoluteUri.LastIndexOf("."));
                    fileName 
= string.Format("{0:yyMMddHHmmssff}{1}{2}", DateTime.Now, i, fileExt);

                    
try
                    {
                        c.DownloadFile(url, filePath 
+ fileName);
                        html 
= html.Replace(url.AbsoluteUri, fileUrl + fileName);
                    }
                    
catch
                    {
                    }
                }
            }

            
return html;
        } 
        
#endregion
    }

}
posted @ 2007-06-30 20:57  Eric Fine  阅读(844)  评论(4编辑  收藏  举报