都市行杭州公交数据获取脚本
require 'watir'
require 'jcode'
$KCODE = 'e'
serial_number = Time.now.strftime("%H%M")
$filename = "c:\\bus_#{serial_number}.txt"
$ie = Watir::IE.new
def tofile(string)
file = File.open( $filename, "a+" )
line_row = IO.readlines($filename )
string = string.tr(':',':')
if (line_row == [])
file.puts string
else
last_line = line_row[line_row.length-1]
if (last_line.strip != string)
file.puts string
end
end
file.close
end
def getbus(no)
index_url = 'http://www.hzbus.com.cn/index.jsp'
$ie.goto(index_url)
$ie.text_field(:name,"line_name").set(no)
$ie.button(:name,"imageField3").click
if !($ie.contains_text('您要查询的公交路线不存在'))
row = $ie.tables[11].to_a
#获取站点明细
count = row.length-1
up_row = []
down_row = []
flag = '0'
$title_row = []
$id = 0
$up_row = []
$subname = ''
$up_temp = []
0.upto(count) do |j|
if ((row[j][0] =~ /#{no}/ )and ( row[j].length ==13) )
route_no = row[j][0].to_s
route_no_row = route_no.split('/')
if (route_no_row.length == 1)
no_notk = route_no_row[0]
no_k = route_no_row[0]
else
no_notk = route_no_row[0]
no_k = route_no_row[1]
end
no_notk = no_notk.delete "(区间)"
no_k = no_k.delete "(区间)"
no_notk = no_notk.delete "夜线"
no_k = no_k.delete "夜线"
time = row[j][4]
time = time.tr(' ','')
time = time.tr('.',':')
time_row = time.scan(/\S+/)
#获取价格
no_air_price = row[j][6].to_s
air_price = row[j][8].to_s
no_air_price = no_air_price.delete "元"
air_price = air_price.delete "元"
if (no_air_price == '--' and air_price != '--' )
price = no_k + ':' + air_price + '元 = '
elsif (no_air_price != '--' and air_price == '--' )
price = no_notk + ':' + no_air_price + '元 = '
else
price = no_k + ':' + air_price + '元 ' + no_notk + ':' + no_air_price + '元 = '
end
stops = row[j][2].to_s
stops = stops.tr('--',' ')
stops_row = stops .scan(/\S+/)
if ((stops_row[0] == stops_row[1]) and (stops_row[2] == stops_row[3]))
$title_row << [no_notk,stops_row[0],stops_row[1],price,'(环行线)|',route_no,time_row[0]]
elsif ((stops_row[0] == stops_row[2]) and (stops_row[1] == stops_row[3]))
$title_row << [no_notk,stops_row[0],stops_row[1],price,'(环行线)|',route_no,time_row[0]]
else
$title_row << [no_notk,stops_row[0],stops_row[1],price,'↑|',route_no,time_row[0]]
$title_row << [no_k,stops_row[2],stops_row[3],price,'↓|',route_no,time_row[1]]
end
end
end
0.upto(count) do |i|
if( (row[i][0] =~ /\A\d+/) and ( row[i].length > 4) and ( row[i].length <12) )
up_length = $up_temp.length
if (up_length > 0 )
up_length = up_length-1
end
if ($id < $title_row.length)
if (($up_temp[up_length] == $title_row[$id][2]) and ($up_temp[0] == $title_row[$id][1] ) and $up_row.length > 5)
# ps = [$up_row[up_length],$title_row[$id][2],$up_row[0],$title_row[$id][1] ]
#p ps
up_string =$up_row.join(',')
bus_up = $title_row[$id][5] + $title_row[$id][4]+ $title_row[$id][6] +'|' + $title_row[$id][3] + up_string
tofile(bus_up)
$up_row.clear
$up_temp.clear
$up_temp << row[i][1]
if (row[i][9] != '')
$subname = '(' + row[i][9].to_s + ')'
elsif (row[i][10].strip != '')
supplementary = row[i][10].to_s
supplementary = supplementary.split(' ')
$subname = '(' + supplementary[0] + ')'
end
if ($subname != '()' )
busstop = row[i][1].to_s + $subname
else
busstop = row[i][1].to_s
end
$up_row << busstop
$subname = '()'
$id =$id +1
else
if (row[i][1].index('已撤销') == nil)
$up_temp << row[i][1]
if (row[i][9] != '')
$subname = '(' + row[i][9].to_s + ')'
elsif (row[i][10].strip != '')
supplementary = row[i][10].to_s
supplementary = supplementary.split(' ')
$subname = '(' + supplementary[0] + ')'
end
if ($subname != '()' )
busstop = row[i][1].to_s + $subname
else
busstop = row[i][1].to_s
end
$up_row << busstop
$subname = '()'
end
end
end
end
end
#puts '-----------'
if ($title_row.length%2 == 0)
up_string =$up_row.join(',')
bus_up = $title_row[$id][5] + $title_row[$id][4]+ $title_row[$id][6] +'|' + $title_row[$id][3] + up_string
tofile(bus_up)
end
end
#puts '-----------'
end
1.upto(1000) do |line|
getbus(line.to_s)
end
busline1 = ['临时线','UT接送车','临浦临时线','X1','X2','Y1','Y2','Y3','Y4','Y5','Y6','Y7','Y8','Y9','Y13','J1','J5','J9','J12','J14','J17','J18','615D','B1','B支1','B支2','B支3','观光6线']
busline1.each do|line|
getbus(line)
end
#puts string
#ie.tables.each { |t| t.flash}