10---Net基础加强

复习：

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
using System.Runtime.Serialization.Formatters.Binary;
using System.IO;

namespace _01作业问题
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 正则
            //Regex regex = new Regex(@"^\d{6}$", RegexOptions.ECMAScript);
            //while (true)
            //{
            //    Console.WriteLine("请输入一个字符串");
            //    string postcode = Console.ReadLine();
            //    bool b = regex.IsMatch(postcode);
            //    Console.WriteLine(b);
            //}           
            #endregion

            #region 二进制序列化的步骤
           
           // //创建一个二进制序列化器
           // BinaryFormatter bf = new BinaryFormatter();
          
           //using (FileStream fsWrite = File.OpenWrite("data"))
           //{
           //    //2.执行序列化或者反序列化
           //    //调用序列化的时候，需要传递两个参数，一个是流，另一个是需要序列化的对象
           //    bf.Serialize(fsWrite,new MyClass());
           //}

           //Console.WriteLine("OK");

            #endregion

            #region 二进制反序列化的步骤

            //BinaryFormatter bf = new BinaryFormatter();

            //using (FileStream fsRead = File.OpenRead("data"))
            //{
            //    object obj = bf.Deserialize(fsRead);
            //    MyClass mc = obj as MyClass;
            //}

            //Console.WriteLine("OK");

            #endregion

            #region MyRegion
            //while (true)
            //{
            //    Console.WriteLine("请输入一个字符串");
            //    string input = Console.ReadLine();

            //    ////表示只有用户输入一个数字字符的时候才返回true
            //    //bool b = Regex.IsMatch(input,"^\\d$");

            //    //表示只有用户输入\d的时候返回true,否则输入其它内容都返回false
            //    //bool b = Regex.IsMatch(input, "^\\\\d$");
            //    bool b = Regex.IsMatch(input, @"^\\d$");
            //    Console.WriteLine(b);
            //}  
            #endregion

            #region 匹配IP地址，4段用.分割的最多的三位数字。192.168.54.77、333.333.333.333假设都是正确的
            //while (true)
            //{
            //    Console.WriteLine("请输入一个字符串");
            //    string ip = Console.ReadLine();
            //    bool b = Regex.IsMatch(ip, @"^(\d{1,3}\.){3}\d{1,3}$",RegexOptions.ECMAScript);
            //    Console.WriteLine(b);
            //}  
            #endregion

            #region 判断是否是合法的日期“2008-08-08”.四位数字-两位数字-两位数字
            //while (true)
            //{
            //    Console.WriteLine("请输入日期");
            //    string date = Console.ReadLine();
            //    //bool b = Regex.IsMatch(date, @"^\d{4}-\d{2}-\d{2}$", RegexOptions.ECMAScript);
            //    bool b = Regex.IsMatch(date, @"^\d{4}-(0[1-9]|1[0-2])-\d{2}$", RegexOptions.ECMAScript);
            //    Console.WriteLine(b);
            //}  
            #endregion

            #region 判断是否是合法的url地址
            //while (true)
            //{
            //    Console.WriteLine("请输入");
            //    string url = Console.ReadLine();
            //    bool b = Regex.IsMatch(url, @"^[a-zA-Z0-9]+://.+$");
            //    Console.WriteLine(b);
            //}  
            #endregion
        }
    }

    [Serializable]
    class MyClass
    { 
    
    }
}

字符串提取与正则表达式提取-提取组

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
using System.IO;

namespace _02正则表达式提取
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 提取出字符串中的所有数字

            //string msg = "大家好呀，hello,2015年 3月30日是个好日子，恩恩 9494，吼吼886";

            ////字符串提取 Match()和 Matches()
            ////提取第一个匹配的字符串，只提取一个。
            ////Match match = Regex.Match(msg, @"\d", RegexOptions.ECMAScript);//2
            //Match match = Regex.Match(msg, @"\d+", RegexOptions.ECMAScript);//2015
            //Console.WriteLine(match.Value);
            //Console.ReadLine();

            ////逐个提取
            ////Match match1 = Regex.Match(msg, @"\d+", RegexOptions.ECMAScript);//2015
            ////match1.Index=
            ////Console.WriteLine(match1.Value);
            //Regex regex = new Regex(@"\d+", RegexOptions.ECMAScript);
            //Match match = regex.Match(msg);
            //Console.WriteLine(match.Value);

            //match = regex.Match(msg,match.Index+match.Value.Length);
            //Console.WriteLine(match.Value);

            //match = regex.Match(msg, match.Index + match.Value.Length);
            //Console.WriteLine(match.Value);
            //Console.ReadLine();


            ////逐个提取
            ////一般在调用IsMatch()的时候要判断完全匹配，所以要加^$
            ////但是在Match()和 Matches()的时候，是要从一个大字符串中提取某一部分匹配的内同所以不能加^$
            ////如果加了，则必须整个大字符串与给定的正则表达式完全匹配
            //Regex regex = new Regex(@"\d+", RegexOptions.ECMAScript);
            //Match match = regex.Match(msg);
            //while (match.Value.Length!=0)
            //{
            //     Console.WriteLine(match.Value);
            //     match = regex.Match(msg, match.Index + match.Value.Length);
            //}
            //Console.ReadLine();


            ////提取所有匹配的字符串
            // MatchCollection matches = Regex.Matches(msg, @"\d+", RegexOptions.ECMAScript);
            // for (int i = 0; i < matches.Count; i++)
            // {
            //     Console.WriteLine(matches[i].Value); 
            // }
            // Console.ReadLine();

            //判断是否匹配
            //Regex.IsMatch()

            #endregion

            #region 从文件中提取Email地址

            ////1.读取文件中的内容到字符串
            //string html = File.ReadAllText("提取Email.htm");

            ////2.创建正则表达式
            //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9._]+@[-a-zA-Z0-9]+(\.[a-zA-Z0-9]+){1,}", RegexOptions.ECMAScript);

            ////3.进行提取
            //for (int i = 0; i < matches.Count; i++)
            //{

            //    //4.输出
            //    Console.WriteLine(matches[i].Value);
            //}
            //Console.ReadLine();
          
            #endregion

            #region 统计叶长种个数

            //string msg = "大家好哦，我叫叶长种，叶长种是个好孩子，哈哈哈哈哈哈·····你有认识叫叶长种的吗";

            //MatchCollection matches = Regex.Matches(msg, "叶长种");

            //foreach (Match item in matches)
            //{
            //    Console.WriteLine(item.Index);
            //}
            //Console.WriteLine("一共出现了{0}次",matches.Count);
            //Console.ReadLine();

            #endregion

            #region 字符串提取

            ////1.从一个大字符串中提取 某一部分字符串

            ////2.在提取到的字符串中，在提取其中的某部分
            ////提取组

            ////1.读取文件中的内容到字符串
            //string html = File.ReadAllText("提取Email.htm");

            ////2.创建正则表达式
            //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9._]+@[-a-zA-Z0-9]+(\.[a-zA-Z0-9]+){1,}", RegexOptions.ECMAScript);

            ////()()(()()) 按照括号来分组
            //foreach (Match item in matches)
            //{
            //    Console.WriteLine(item.Value+"================"+item.Groups[1].Value);
            //}
            //Console.WriteLine(matches.Count);
            //Console.ReadLine();

            #endregion

            #region 提取组2

            //Match match = Regex.Match("age=30",@"^(.+)=(.+)$");
            //Console.WriteLine(match.Groups[1].Value+"============="+match.Groups[2].Value);
            //Console.ReadKey();
            
            #endregion

            #region 从文件路径中提取文件名

            ////普通的字符串提取：Match().Matches(),思路是从整个字符串中找出所有那些匹配正则表达式的字符串

            ////提取组的思路：先写一个能满足整个字符串的正则表达式，然后再正则表达式中用括号讲那些你想要提取的内容扩起来
            ////这样就可以提取你想要的组了

            //string path = @"c:\windows\testb.txt";
            //Match match=Regex.Match(path, @".+\\(.+)");
            //Console.WriteLine(match.Groups[1].Value);
            //Console.ReadKey();

            #endregion

            #region 提取年月日

            //string msg = "June          26    ,   1951   ";
            //Match match = Regex.Match(msg,@"^([a-zA-Z]+)\s*(\d{1,2})\s*,\s*(\d{4})\s*$");
            //Console.WriteLine(match.Groups[1].Value);
            //Console.WriteLine(match.Groups[2].Value);
            //Console.WriteLine(match.Groups[3].Value);
            //Console.ReadKey();

            #endregion

            #region 从Email中提取出用户名和域名，比如从test@163.com中提取出test和163.com。

            //string email = "test@163.com";
            //Match match = Regex.Match(email, @"(^\w+)@(\w+\.\w+)$");
            //Console.WriteLine(match.Groups[1].Value + "         " + match.Groups[2].Value);
            //Console.ReadKey();

            #endregion


            #region “192.168.10.5[port=21,type=ftp]”，这个字符串表示IP地址为192.168.10.5的服务器的21端口提供的是ftp服务，其中如果“,type=ftp”部分被省略，则默认为http服务。请用程序解析此字符串，然后打印出“IP地址为***的服务器的***端口提供的服务为***”

            //string s1 = "192.168.10.5[port=21,type=ftp]";
            ////string s1 = "192.168.10.5[port=21]";

            //Match match = Regex.Match(s1, @"^((\d{1,3}\.){3}\d{1,3})\[port=(\d{1,2})(,type=([a-zA-Z0-9]+))?\]$", RegexOptions.ECMAScript);
            //Console.WriteLine("ip:{0}", match.Groups[1].Value);
            //Console.WriteLine("port:{0}", match.Groups[3]);
            //Console.WriteLine("service:{0}", match.Groups[5].Value.Length == 0 ? "http" : match.Groups[5].Value);
            //Console.ReadKey();

            #endregion


        }
    }
}

正则表达式-贪婪模式

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;

namespace _03贪婪模式
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 贪婪模式介绍

            //string msg = "1111。11。111。111111。";

            ////正则表达式会尽可能多的找到匹配，这就是正则表达式的贪婪模式。
            ////Match match = Regex.Match(msg, ".+");

            ////终止贪婪模式：  ? 具有终止贪婪模式的功能。
            ////当?出现在了另外一个限定符后的时候，表示终止贪婪模式。
            ////终止贪婪模式，表示，尽可能少的去匹配，则只匹配一个。
            //Match match = Regex.Match(msg, ".+?");  
            ////Match match = Regex.Match(msg, ".+*?");  //尽可能少的匹配 0个
            //Console.WriteLine(match.Value);
            //Console.ReadKey();

            #endregion

            #region 案例1

            ////string msg = "1111。11。111。111111。";
            ////Match match = Regex.Match(msg, "(.+)(。)");
            ////Console.WriteLine(match.Value);
            ////Console.WriteLine(match.Groups[1].Value + "       " + match.Groups[2].Value);
            ////Console.ReadKey();

            //string msg = "1111。11。111。111111。";
            ////终止贪婪模式后的结果：1111。
            //Match match = Regex.Match(msg, ".+?。");
            //Console.WriteLine(match.Value);
            //Console.ReadKey();

            #endregion

            #region 案例2

            //string msg = "大家好。我们是S.H.E。我是S。我是H。我是E。我是叶长种。我是刘德华。我是范冰冰。我是小王。我是N.L.L。我是★小叶★。呜呜。fffff";

            ////正确结果： S    H   E    叶长种   刘德华   范冰冰   小王  N.L.L   ★小叶★

            ////当我们希望找到多个匹配的时候，结果却只找到了一个很大的匹配值，这个时候一般都是贪婪模式的问题，尝试终结贪婪模式。
            //MatchCollection matches = Regex.Matches(msg, "我是(.+?)。");
            //foreach (Match item in matches)
            //{
            //    Console.WriteLine(item.Groups[1].Value);
            //}
            //Console.ReadKey();

            #endregion


        }
    }
}

正则表达式替换

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;

namespace _04正则表达式替换
{
    class Program
    {
        static void Main(string[] args)
        {
            //string msg = "你aaa好aa哈哈a你";
            //msg = msg.Replace("a", "A");
            ////msg = Regex.Replace(msg, "a+", "A");
            //Console.WriteLine(msg);
            //Console.ReadKey();

            #region 练习1：将一段文本中的MM/DD/YYYY格式的日期转换为YYYY-MM-DD格式 ，比如“我的生日是05/21/2010耶”转换为“我的生日是2010-05-21耶”。

            //string msg = "我的生日是05/21/2010耶我的生日是03/11/2000耶我的生日是05/21/2010耶我的生日是05/21/2010耶";
            ////在替换的方法中，使用提取组。 注意在引用分组的时候是使用  $1、$2、.....
            //msg = Regex.Replace(msg, @"(\d{2})/(\d{2})/(\d{4})", "$3-$1-$2");
            //Console.WriteLine(msg);
            //Console.ReadKey();

            #endregion

            #region  将hello ‘welcome’ to ‘China’   替换成 hello 【welcome】 to 【China】

            ////hello 【welcome】 to 【China】
            //string s = " hello 'welcome' to  be'aut'iful 'China' fdsfds jfklds'jfdsk'lfjskl ";
            ////如果就想表示一个普通的$1，则需要$$1
            //s = Regex.Replace(s, "'(.+?)'", "【$1】");
            //Console.WriteLine(s);
            //Console.ReadKey();

            #endregion

            #region 替换手机号的掩码

            //string msg = "我的手机号码是13888888888 苏坤的手机号是18999165365。长15210998254的觉得是浪费";
            //msg = Regex.Replace(msg, @"(\d{3})\d{4}(\d{4})", "$1****$2");
            //Console.WriteLine(msg);
            //Console.ReadKey();

            ////string msg = "嘎哈发的睡觉了zxh@itcast.cn范德萨abcdef@yahoo.com范德萨nihaomahaha@sina.com.cn范德萨";
            //////嘎哈发的睡觉了***@itcast.cn范德萨******@yahoo.com范德萨************@sina.com.cn范德萨
            #endregion

              #region 练习2：给一段文本中匹配到的url添加超链接，比如把http://www.test.com替换为<a href="http://www.test.com"> http://www.test.com</a>。参考代码见备注。因为这个是整体做为一个组，比较特殊，难以理解，先把日期转换的理解了就好理解了。

            //string msg = "新浪的网址是：http://www.sina.com.cn搜狐的网址是：http://www.sohu.com 还有网易的网址：http://www.163.com";
            ////msg = Regex.Replace(msg, "([a-zA-Z0-9]+://[0-9a-zA-Z.&=\\?%]+)", "<a href=\"$1\">$1</a>");
            //msg = Regex.Replace(msg, "([a-zA-Z0-9]+://[0-9a-zA-Z.&=\\?%]+)", @"<a href=""$1"">$1</a>");
            //Console.WriteLine(msg);
            //Console.ReadKey();

            ////新浪的网址是：<a href="http://www.sina.com.cn">http://www.sina.com.cn</a>搜狐的网址是：<a href="http://www.sohu.com">http://www.sohu.com<a>还有网易的网址：<a href="http://www.163.com">http://www.163.com</a>

            #endregion

        }
    }
}

正则提取职位信息

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;

namespace _05通过WebClient类来发起请求并下载html
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 抓取网页email
            //string url = "http://192.168.1.100:8080/提取Email.htm";
            ////1.根据网址下载对应html字符串
            //WebClient wc = new WebClient();
            //wc.Encoding = Encoding.UTF8;
            //string html = wc.DownloadString("http://192.168.1.100:8080/提取Email.htm");
            ////2.从下载到字符串中提取Email,并把提取到的Email写入到文本文件中
            //MatchCollection matches = Regex.Matches(html, @"[-a-zA-Z0-9_.]+@[-a-zA-Z0-9]+(\.[a-zA-Z0-9]+){1,}");

            //using (StreamWriter writer = new StreamWriter("email.txt"))
            //{
            //    //遍历提取到的email
            //    foreach (Match item in matches)
            //    {
            //        //Console.WriteLine(item.Value);
            //        writer.WriteLine(item.Value);
            //    }
            //}
            //Console.ReadKey();
            #endregion

            #region 抓取网页图片

            //WebClient wc = new WebClient();
            ////1.下载网页源代码
            //string html = wc.DownloadString("http://image.haosou.com/i?src=360pic_strong&q=美女");
            ////2.提取网页中的图片，其实就是<img>标签
            ////<img alt="" src="hotgirls/00_00.jpg" />
            //MatchCollection matches = Regex.Matches(html, @"<img\s+alt="""" src=""(.+)""\s*/>");
            //foreach (Match item in matches)
            //{
            //    string imgPath = "http://image.haosou.com/i?src=360pic_strong&q=美女" + item.Groups[1].Value;
            //    //下载图片
            //    wc.DownloadFile(imgPath, @"C:\Users\Administrator\Desktop\MV" + Path.GetFileName(imgPath));
            //}
            //Console.WriteLine("ok");
            //Console.ReadKey();

            #endregion


            #region 抓取职位信息

            //WebClient webClient = new WebClient();
            //string html = webClient.DownloadString("http://192.168.1.100:8080/【上海,IT-管理,计算机软件招聘，求职】-前程无忧.htm");

            ////<a href="http://search.51job.com/job/46621778,c.html" onclick="zzSearch.acStatRecJob( 1 );" class="jobname" target="_blank">ERP项目经理</a>
            //MatchCollection matches = Regex.Matches(html, @"<a\s+href=""http://search.51job.com/job/[0-9]{8},c.html"".+>(.+)</a>");
            //foreach (Match item in matches)
            //{
            //    Console.WriteLine(item.Groups[1].Value);
            //}
            //Console.WriteLine("共{0}个职位信息。", matches.Count);
            //Console.ReadKey();

            #endregion


        }
    }
}

posted @ 2014-12-11 14:16 代码沉思者阅读(217) 评论(0) 收藏举报

刷新页面返回顶部

代码沉思者

左手代码右手诗，亦是生活亦是痴。有技术的地方就有江湖......

10---Net基础加强

公告