自己写的爬虫

抓500彩票网上的数据

将图中每场比赛的信息。

直接上代码

@Test
    public void testUrl() throws IOException
    {
        //根据URL获取某天的比赛。
        Document doc = Jsoup.connect("http://live.500.com/?e=2017-09-13").get();
        
        //获取所有场次的集合。
        Elements games = doc.select("#table_match tbody tr");
        
        //遍历每场比赛,获取每场比赛的信息
        for (Element game : games)
        {
            
            Elements cells = game.getElementsByTag("td");
            
            //场次、赛事、轮次、比赛时间、状态
            String pid = cells.get(0).text();
            String tournament = cells.get(1).text();
            String round = cells.get(2).text();
            String date = cells.get(3).text();
            String status = cells.get(4).text();
//            System.out.println(pid+ ", " + tournament + ", " + round + ", " + date + ", " + status);
            
            
            //主队名称、黄牌数、红牌数、让球/受让球数
            String homeName = cells.get(5).getElementsByTag("a").get(0).text();
            Elements homeSpans = cells.get(5).getElementsByTag("span");
            String homeYellowCard = "0";
            String homeRedCard = "0";
            String homeRQ = "0";
            for (Element span : homeSpans)
            {
                if ("yellowcard".equals(span.attr("class")))
                {
                    //主队黄牌
                    homeYellowCard = span.text();
                }else if("redcard".equals(span.attr("class")))
                {
                    //主队红牌
                    homeRedCard = span.text();
                }else if("sp_rq".equals(span.attr("class")) || "sp_sr".equals(span.attr("class")))
                {
                    //主队让球数
                    homeRQ = span.text();
                }
                
            }
//            System.out.println(homeName + ", " + homeSpans + ", " + homeYellowCard + ", " + homeRedCard + ", " + homeRQ);
//            System.out.println();
            
            //主队进球数、本场比赛数据统计页面的链接、客队进球数
            String homeFullTimeGoals = cells.get(6).getElementsByClass("clt1").get(0).text();
            String href = cells.get(6).getElementsByClass("fhuise").get(0).attr("abs:href");
            String awayFullTimeGoals = cells.get(6).getElementsByClass("clt3").get(0).text();
            
            //客队名称、客队黄牌、客队红牌
            String awayName = cells.get(7).getElementsByTag("a").get(0).text();
            String awayYellowCard = "0";
            String awayRedCard = "0";
            Elements awaySpans = cells.get(7).getElementsByTag("span");
            for (Element span : awaySpans)
            {
                if ("yellowcard".equals(span.attr("class")))
                {
                    awayYellowCard = span.text();
                }else if ("redcard".equals(span.attr("class")))
                {
                    awayRedCard = span.text();
                }
            }
            
            //半场比分
            String halfScores = cells.get(8).text();
            String homeHalfTimeGoals = halfScores.substring(0, (halfScores.indexOf("-") - 1));
            String awayHalfTimeGoals = halfScores.substring(halfScores.indexOf("-") + 2);
            
            
            
            System.out.println("场次:" + pid + " 赛事:" + tournament + " 轮次:" + round + " 比赛时间:" + date + " 状态:" 
            + status + " 主队名称:" + homeName + " 主队黄牌:" + homeYellowCard + " 主队红牌:" 
            + homeRedCard + " 让球:" + homeRQ + " 主队全场进球:" + homeFullTimeGoals + " 链接:" 
            + href + " 客队全场进球:" + awayFullTimeGoals + " 客队名称:" + awayName + " 客队黄牌:" + awayYellowCard + " 客队红牌:" 
            + awayRedCard + " 主队半场进球:" + homeHalfTimeGoals + " 客队半场进球:" 
            + awayHalfTimeGoals);
            
            System.out.println();
        }

---------------------------------------------------------------------------------------

@Test
    public void test4() throws IOException
    {
        Document doc = Jsoup.connect("http://live.500.com/detail.php?fid=699415&r=1").get();
        
        Elements teamTitles = doc.select(".wrap .t2>table .team_title");
        String homeName = teamTitles.get(0).text();
        String awayName = teamTitles.get(1).text();
        
        //获取每行数据。也就是每一项的数据。比如射门的数据。
        Elements datas = doc.select(".wrap .t2>div>table>tbody> tr");
        
        //射门
        String homeShots = datas.get(0).getElementsByTag("td").get(1).text();
        String shot = datas.get(0).getElementsByTag("td").get(2).text();
        String awayShots = datas.get(0).getElementsByTag("td").get(3).text();
        System.out.println(homeShots + " " + shot + " " + awayShots);
        
        //射正
        String homeShotOns = datas.get(1).getElementsByTag("td").get(1).text();
        String shotOn = datas.get(1).getElementsByTag("td").get(2).text();
        String awayShotOns = datas.get(1).getElementsByTag("td").get(3).text();
        System.out.println(homeShotOns + " " + shotOn + " " + awayShotOns);
        
        //犯规
        String homeFouls = datas.get(2).getElementsByTag("td").get(1).text();
        String foul = datas.get(2).getElementsByTag("td").get(2).text();
        String awayFouls = datas.get(2).getElementsByTag("td").get(3).text();
        System.out.println(homeFouls + " " + foul + " " + awayFouls);
        
        //角球
        String homeCorners = datas.get(3).getElementsByTag("td").get(1).text();
        String corner = datas.get(3).getElementsByTag("td").get(2).text();
        String awayCorners = datas.get(3).getElementsByTag("td").get(3).text();
        System.out.println(homeCorners + " " + corner + " " + awayCorners);
        
        //越位
        String homeOffsides = datas.get(4).getElementsByTag("td").get(1).text();
        String offside = datas.get(4).getElementsByTag("td").get(2).text();
        String awayOffsides = datas.get(4).getElementsByTag("td").get(3).text();
        System.out.println(homeOffsides + " " + offside + " " + awayOffsides);
        
        //红牌
        String homeRedCards = datas.get(5).getElementsByTag("td").get(1).text();
        String redCard = datas.get(5).getElementsByTag("td").get(2).text();
        String awayRedCards = datas.get(5).getElementsByTag("td").get(3).text();
        System.out.println(homeRedCards + " " + redCard + " " + awayRedCards);
        
        //黄牌
        String homeYellowCards = datas.get(6).getElementsByTag("td").get(1).text();
        String yellowCard = datas.get(6).getElementsByTag("td").get(2).text();
        String awayYellowCards = datas.get(6).getElementsByTag("td").get(3).text();
        System.out.println(homeYellowCards + " " + yellowCard + " " + awayYellowCards);
        
        //进攻
        String homeOffensives = datas.get(7).getElementsByTag("td").get(1).text();
        String offensive = datas.get(7).getElementsByTag("td").get(2).text();
        String awayOffensives = datas.get(7).getElementsByTag("td").get(3).text();
        System.out.println(homeOffensives + " " + offensive + " " + awayOffensives);
        
        //威胁进攻
        String homeThreatOffensives = datas.get(8).getElementsByTag("td").get(1).text();
        String threatOffensive = datas.get(8).getElementsByTag("td").get(2).text();
        String awayThreatOffensives = datas.get(8).getElementsByTag("td").get(3).text();
        System.out.println(homeThreatOffensives + " " + threatOffensive + " " + awayThreatOffensives);
        
        //任意球
        String homefreeKicks = datas.get(9).getElementsByTag("td").get(1).text();
        String freeKick = datas.get(9).getElementsByTag("td").get(2).text();
        String awayfreeKicks = datas.get(9).getElementsByTag("td").get(3).text();
        System.out.println(homefreeKicks + " " + freeKick + " " + awayfreeKicks);
        
        //控球
        String homePossessionPercentages = datas.get(11).getElementsByTag("td").get(1).text();
        String possessionPercentage = datas.get(11).getElementsByTag("td").get(2).text();
        String awayPossessionPercentages = datas.get(11).getElementsByTag("td").get(3).text();
        System.out.println(homePossessionPercentages + " " + possessionPercentage + " " + awayPossessionPercentages);
        
    }

 

posted @ 2017-09-15 16:04  钓鱼翁  阅读(330)  评论(0编辑  收藏  举报