正则表达式 匹配标签里面的值 eg:image input
Code
//得到input的values
public ArrayList GetInput(string htmlText)
{
Regex regex = new Regex("<title>(?<content>.*?)function JasonSearch()", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
string str = regex.Match(htmlText).Groups["content"].Value;
const string pattern = "<input [^~]*?>";
const string pattern1 = "value\\s*=\\s*((\"|\')?)(?<url>\\S+)(\"|\')?[^>]*";
ArrayList al = new ArrayList();
Match match = Regex.Match(str, pattern, RegexOptions.IgnoreCase);
while (match.Success)
{
string img = match.Value;
string imgsrc = Regex.Match(img, pattern1, RegexOptions.IgnoreCase).Result("${url}");
imgsrc = Regex.Replace(imgsrc, "\"|\'|\\>", "", RegexOptions.IgnoreCase);
al.Add(imgsrc);
match = match.NextMatch();
}
return al;
}
//得到图片的image路径
public static ArrayList GetImages(string htmlText)
{
const string pattern = "<img [^~]*?>";
const string pattern1 = "src\\s*=\\s*((\"|\')?)(?<url>\\S+)(\"|\')?[^>]*";
ArrayList al = new ArrayList();
Match match = Regex.Match(htmlText, pattern, RegexOptions.IgnoreCase); //找到img标记
while (match.Success)
{
string img = match.Value;
string imgsrc = Regex.Match(img, pattern1, RegexOptions.IgnoreCase).Result("${url}");
imgsrc = Regex.Replace(imgsrc, "\"|\'|\\>", "", RegexOptions.IgnoreCase);
al.Add(imgsrc);
match = match.NextMatch();
}
return al;
}
//得到input的values
public ArrayList GetInput(string htmlText)
{
Regex regex = new Regex("<title>(?<content>.*?)function JasonSearch()", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
string str = regex.Match(htmlText).Groups["content"].Value;
const string pattern = "<input [^~]*?>";
const string pattern1 = "value\\s*=\\s*((\"|\')?)(?<url>\\S+)(\"|\')?[^>]*";
ArrayList al = new ArrayList();
Match match = Regex.Match(str, pattern, RegexOptions.IgnoreCase);
while (match.Success)
{
string img = match.Value;
string imgsrc = Regex.Match(img, pattern1, RegexOptions.IgnoreCase).Result("${url}");
imgsrc = Regex.Replace(imgsrc, "\"|\'|\\>", "", RegexOptions.IgnoreCase);
al.Add(imgsrc);
match = match.NextMatch();
}
return al;
}
//得到图片的image路径
public static ArrayList GetImages(string htmlText)
{
const string pattern = "<img [^~]*?>";
const string pattern1 = "src\\s*=\\s*((\"|\')?)(?<url>\\S+)(\"|\')?[^>]*";
ArrayList al = new ArrayList();
Match match = Regex.Match(htmlText, pattern, RegexOptions.IgnoreCase); //找到img标记
while (match.Success)
{
string img = match.Value;
string imgsrc = Regex.Match(img, pattern1, RegexOptions.IgnoreCase).Result("${url}");
imgsrc = Regex.Replace(imgsrc, "\"|\'|\\>", "", RegexOptions.IgnoreCase);
al.Add(imgsrc);
match = match.NextMatch();
}
return al;
}