Googler

两情相悦,又岂在朝朝暮暮。

zlhome.com Deal

using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace AnfleCrawler.DataAnalyzer
{
    internal class Zlhome : AnalyzerBase
    {
        protected override void AnalyzeInternal(PageLandEntity current)
        {
            var lander = Crawler.Lander;
            var pHandler = CreateContentHandler(current);
            switch (current.Depth)
            {
                case 0:
                    {
                        var dom = lander.GetDocument(pHandler);
                        DoPerPaging(current, dom.DocumentNode, ".page:first-child a:last-child");

                        foreach (var node in QueryNodes(dom.DocumentNode, ".xqlistBox .l_img a"))
                        {
                            var url = GetHref(node, current.Url);
                            Crawler.PushUrl(url, DataDepth.Houses);
                        }
                    }
                    break;
                case DataDepth.Houses:
                    {
                        var dom = lander.GetDocument(pHandler);
                        var attrs = new AttributeFiller();

                        attrs.Append("小区名称:{0}", QueryTexts(dom.DocumentNode, ".sc a").First().Replace("关注", string.Empty));

                        attrs.Append(QueryTexts(dom.DocumentNode, ".c:last-child li"));

                        Guid hashKey = GenHashKey(current.Url.OriginalString);
                        var bo = Crawler.Repository.LoadHouses(hashKey);
                        bo.SiteID = "Zlhome.com";
                        bo.PageUrl = current.Url.OriginalString;
                        bo.CityName = Crawler.Config.CityName;
                        attrs.FillEntity(bo, new Dictionary<string, string>()
                        {
                            {"地址", "小区地址"},
                            {"所属片区", "所属区域"},
                            {"物业类型", "物业类别"},
                            {"骏工日期", "竣工时间"},
                        });
                        MapMark(bo);
                        Repository.Save(bo);
                        Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);

                        var pNode = QueryNodes(dom.DocumentNode, ".xqinfo").Skip(1).First();
                        var dealNode = QueryNode(pNode, "a");
                        var url = GetHref(dealNode, current.Url);
                        Crawler.PushUrl(url, DataDepth.Deal, bo.RowID);
                    }
                    break;
                case DataDepth.Deal:
                    {
                        Guid housesID = (Guid)current.State;
                        var dom = lander.GetDocument(pHandler);

                        bool isRent = false;
                        foreach (var table in QueryNodes(dom.DocumentNode, ".cjxxtable"))
                        {
                            foreach (var node in QueryNodes(table, "tr"))
                            {
                                var spans = QueryTexts(node, "td").ToArray();
                                DateTime? transactionDate = null;
                                DateTime dump;
                                if (DateTime.TryParse(spans[0], out dump))
                                {
                                    transactionDate = dump;
                                }
                                Repository.SaveHouselisting(new HouselistingEntity()
                                {
                                    HousesID = housesID,
                                    TransactionDate = transactionDate,
                                    Area = spans[1],
                                    Apartment = spans[3],
                                    Orientation = spans[4],
                                    Floor = spans[5],
                                    UnitPriceOrLease = spans[6],
                                    SoldPriceOrRent = spans[7],
                                    ServiceBroker = spans[8],
                                    IsRent = isRent
                                });
                                Crawler.OutWrite("保存小区{1}记录 {0}", housesID, isRent ? "出租" : "出售");
                            }
                            isRent = true;
                        }
                    }
                    break;
            }
        }
    }
}

 

posted on 2015-02-04 16:18  RockyLOMO  阅读(220)  评论(0编辑  收藏  举报

导航

Apple/苹果笔记本 Mac Air MC968CH/A 行货在保 I5 11寸 超级本