都市行杭州公交数据获取脚本

require 'watir'
require 'jcode'
$KCODE = 'e'

serial_number = Time.now.strftime("%H%M")
$filename   =  "c:\\bus_#{serial_number}.txt"
 
$ie = Watir::IE.new
def tofile(string)
  file = File.open( $filename, "a+" )
  line_row = IO.readlines($filename ) 
  string = string.tr(':',':')
 
  if (line_row  == [])
    file.puts string
  else
    last_line =  line_row[line_row.length-1]
    if (last_line.strip != string)
      file.puts string
    end
   
  end

  file.close
end

def getbus(no)
index_url = 'http://www.hzbus.com.cn/index.jsp'
$ie.goto(index_url)
$ie.text_field(:name,"line_name").set(no)
$ie.button(:name,"imageField3").click

if !($ie.contains_text('您要查询的公交路线不存在'))
  row = $ie.tables[11].to_a

  #获取站点明细
  count = row.length-1
  up_row = []
  down_row = []
  flag = '0'

  $title_row = []
  $id = 0 
  $up_row = []
  $subname = ''
  $up_temp = []

  0.upto(count) do |j|
    if ((row[j][0] =~ /#{no}/ )and ( row[j].length ==13) )
      route_no =  row[j][0].to_s
      route_no_row = route_no.split('/')

      if (route_no_row.length == 1)
        no_notk = route_no_row[0]
        no_k = route_no_row[0]
      else
        no_notk = route_no_row[0]
        no_k = route_no_row[1]
      end
      no_notk = no_notk.delete "(区间)"
      no_k = no_k.delete "(区间)"
      no_notk = no_notk.delete "夜线"
      no_k = no_k.delete "夜线"
   
      time = row[j][4]
      time = time.tr(' ','')
       time = time.tr('.',':')
      time_row = time.scan(/\S+/)

      #获取价格
      no_air_price =  row[j][6].to_s
      air_price =  row[j][8].to_s
      no_air_price =   no_air_price.delete "元"
      air_price =   air_price.delete "元"

      if (no_air_price == '--' and air_price != '--' )
        price = no_k + ':' + air_price + '元 = '
      elsif (no_air_price != '--' and air_price == '--' )
        price = no_notk + ':' + no_air_price + '元 = '
      else
        price = no_k + ':' + air_price + '元 ' + no_notk + ':' + no_air_price + '元 = '
      end

      stops = row[j][2].to_s
      stops = stops.tr('--',' ')
      stops_row = stops .scan(/\S+/)
     
      if ((stops_row[0] == stops_row[1]) and (stops_row[2] == stops_row[3]))
        $title_row << [no_notk,stops_row[0],stops_row[1],price,'(环行线)|',route_no,time_row[0]]
      elsif ((stops_row[0] == stops_row[2]) and (stops_row[1] == stops_row[3]))
        $title_row << [no_notk,stops_row[0],stops_row[1],price,'(环行线)|',route_no,time_row[0]]
      else
        $title_row << [no_notk,stops_row[0],stops_row[1],price,'↑|',route_no,time_row[0]]
        $title_row << [no_k,stops_row[2],stops_row[3],price,'↓|',route_no,time_row[1]]
      end
     
    end
  end

  0.upto(count) do |i|
    if( (row[i][0] =~ /\A\d+/)  and ( row[i].length > 4) and ( row[i].length <12) )
   
      up_length = $up_temp.length

      if (up_length > 0 )
        up_length = up_length-1
      end
     
      if ($id < $title_row.length)
        if (($up_temp[up_length] == $title_row[$id][2]) and ($up_temp[0] ==  $title_row[$id][1] ) and $up_row.length > 5)
         # ps = [$up_row[up_length],$title_row[$id][2],$up_row[0],$title_row[$id][1] ]
          #p ps
          up_string =$up_row.join(',')
          bus_up = $title_row[$id][5] + $title_row[$id][4]+ $title_row[$id][6] +'|' +  $title_row[$id][3] + up_string
          tofile(bus_up)
          $up_row.clear
          $up_temp.clear
          $up_temp  << row[i][1]
         
          if (row[i][9] != '')
            $subname = '(' + row[i][9].to_s + ')'
          elsif (row[i][10].strip != '')
            supplementary = row[i][10].to_s
            supplementary = supplementary.split(' ')
            $subname = '(' + supplementary[0] + ')'
          end
            
          if ($subname  != '()' )
            busstop = row[i][1].to_s + $subname
          else
            busstop = row[i][1].to_s
          end

          $up_row  << busstop
          $subname = '()'
          $id =$id +1
        else
          if (row[i][1].index('已撤销') == nil)
            $up_temp   << row[i][1]

            if (row[i][9] != '')
              $subname = '(' + row[i][9].to_s + ')'
            elsif (row[i][10].strip != '')
              supplementary = row[i][10].to_s
              supplementary = supplementary.split(' ')
              $subname = '(' + supplementary[0] + ')'
            end
            
            if ($subname  != '()' )
              busstop = row[i][1].to_s + $subname
            else
              busstop = row[i][1].to_s
            end
           
            $up_row  << busstop
            $subname = '()'
          end
        end
      end   
    end
    end

 #puts '-----------'
  if ($title_row.length%2 == 0)
      up_string =$up_row.join(',')
      bus_up = $title_row[$id][5] + $title_row[$id][4]+ $title_row[$id][6] +'|' +  $title_row[$id][3] + up_string
      tofile(bus_up)
    end
  end
  #puts '-----------'
end

1.upto(1000) do |line|
  getbus(line.to_s)
end


busline1 = ['临时线','UT接送车','临浦临时线','X1','X2','Y1','Y2','Y3','Y4','Y5','Y6','Y7','Y8','Y9','Y13','J1','J5','J9','J12','J14','J17','J18','615D','B1','B支1','B支2','B支3','观光6线']
busline1.each do|line|
  getbus(line)
end

#puts string
#ie.tables.each { |t| t.flash}

posted @ 2008-11-20 15:35  赵献良  阅读(608)  评论(0编辑  收藏  举报