python基础(xml,html,bs4)

http://python-data.dr-chuck.net/comments_42.xml

1 <commentinfo><note>This file contains the sample data for testing</note><comments><comment><name>Leven</name><count>100</count></comment><comment><name>Mahdiya</name><count>97</count></comment><comment><name>Ajayraj</name><count>87</count></comment><comment><name>Lillian</name><count>86</count></comment><comment><name>Aon</name><count>86</count></comment><comment><name>Ruaraidh</name><count>78</count></comment><comment><name>Gursees</name><count>75</count></comment><comment><name>Emmanuel</name><count>74</count></comment><comment><name>Christy</name><count>72</count></comment><comment><name>Annoushka</name><count>72</count></comment><comment><name>Inara</name><count>72</count></comment><comment><name>Caite</name><count>70</count></comment><comment><name>Rosangel</name><count>70</count></comment><comment><name>Iana</name><count>66</count></comment><comment><name>Anise</name><count>66</count></comment><comment><name>Jaosha</name><count>65</count></comment><comment><name>Cadyn</name><count>65</count></comment><comment><name>Edward</name><count>63</count></comment><comment><name>Charlotte</name><count>61</count></comment><comment><name>Sammy</name><count>60</count></comment><comment><name>Zarran</name><count>60</count></comment><comment><name>Rowen</name><count>59</count></comment><comment><name>Stanislaw</name><count>59</count></comment><comment><name>Maighdlin</name><count>57</count></comment><comment><name>Connan</name><count>56</count></comment><comment><name>Warrick</name><count>54</count></comment><comment><name>Diya</name><count>52</count></comment><comment><name>Lawson</name><count>52</count></comment><comment><name>Wu</name><count>51</count></comment><comment><name>Irmak</name><count>47</count></comment><comment><name>Emilija</name><count>47</count></comment><comment><name>Kayda</name><count>41</count></comment><comment><name>Ellenor</name><count>41</count></comment><comment><name>Kyra</name><count>41</count></comment><comment><name>Nikita</name><count>38</count></comment><comment><name>Kaelah</name><count>35</count></comment><comment><name>Meko</name><count>32</count></comment><comment><name>Marissa</name><count>31</count></comment><comment><name>Ayat</name><count>24</count></comment><comment><name>Sali</name><count>19</count></comment><comment><name>Hashem</name><count>19</count></comment><comment><name>Tygan</name><count>18</count></comment><comment><name>Rioden</name><count>17</count></comment><comment><name>Cruiz</name><count>16</count></comment><comment><name>Caoilfinn</name><count>13</count></comment><comment><name>Ewen</name><count>8</count></comment><comment><name>Baighley</name><count>7</count></comment><comment><name>Ramone</name><count>1</count></comment><comment><name>Kyran</name><count>1</count></comment><comment><name>Noelani</name><count>1</count></comment></comments></commentinfo>
 1 import xml.etree.ElementTree as ET
 2 import urllib2
 3 input = urllib2.urlopen('http://python-data.dr-chuck.net/comments_42.xml').read()
 4 
 5 commentinfo = ET.fromstring(input)
 6 lst = commentinfo.findall('comments/comment')
 7 sum = 0
 8 for item in lst:
 9    sum += int(item.find('count').text)
10 print sum #2553

 

 

  1 from xml.dom import minidom
  2 import os
  3 import glob
  4 import cv2
  5 
  6 
  7 """
  8 <annotation>
  9 """
 10 def InitVoc(vocfile):
 11     doc = minidom.Document()
 12     annotation = doc.createElement("annotation")
 13     doc.appendChild(annotation)
 14     addObject(doc, annotation, vocfile)
 15     f = file(vocfile.split('.')[0] + ".xml","w")
 16     doc.writexml(f)
 17     f.close()
 18 
 19 def addObject(doc, annotation, vocfile):
 20     #<folder>VOC2007</folder>
 21     folder = doc.createElement("folder")
 22     folder.appendChild(doc.createTextNode("VOC2007"))
 23     annotation.appendChild(folder)
 24 
 25     #<filename>XXXXXX</filename>
 26     filename = doc.createElement("filename")
 27     filename.appendChild(doc.createTextNode(vocfile))
 28     annotation.appendChild(filename)
 29 
 30     """
 31     <source>
 32         <database>The VOC2007 Database</database>
 33         <annotation>PASCAL VOC2007</annotation>
 34         <image>flickr</image>
 35         <flickrid>341012865</flickrid>
 36     </source>
 37     """
 38     source = doc.createElement("source")
 39     annotation.appendChild(source)
 40 
 41     database = doc.createElement("database")
 42     database.appendChild(doc.createTextNode("The VOC2007 Database"))
 43     annotation2 = doc.createElement("annotation")
 44     annotation2.appendChild(doc.createTextNode("PASCAL VOC2007"))
 45     image = doc.createElement("image")
 46     image.appendChild(doc.createTextNode("flickr"))
 47     flickrid = doc.createElement("flickrid")
 48     flickrid.appendChild(doc.createTextNode("wang"))
 49     source.appendChild(database)
 50     source.appendChild(annotation2)
 51     source.appendChild(image)
 52     source.appendChild(flickrid)
 53 
 54     """
 55      <owner>
 56         <flickrid>Fried Camels</flickrid>
 57         <name>Jinky the Fruit Bat</name>
 58     </owner>
 59     """
 60     owner = doc.createElement("owner")
 61     annotation.appendChild(owner)
 62 
 63     flickrid2 = doc.createElement("flickrid")
 64     flickrid2.appendChild(doc.createTextNode("wang"))
 65     name = doc.createElement("image")
 66     name.appendChild(doc.createTextNode("wang"))
 67     owner.appendChild(flickrid2)
 68     owner.appendChild(name)
 69 
 70     """
 71     <size>
 72         <width>353</width>
 73         <height>500</height>
 74         <depth>3</depth>
 75     </size>
 76     """
 77     img = cv2.imread(bmpfile)
 78     size = doc.createElement("size")
 79     annotation.appendChild(size)
 80 
 81     width = doc.createElement("width")
 82     width.appendChild(doc.createTextNode(str(len(img[0]))))
 83     height = doc.createElement("height")
 84     height.appendChild(doc.createTextNode(str(len(img))))
 85     depth = doc.createElement("depth")
 86     depth.appendChild(doc.createTextNode(str(img[0][0].size)))
 87     size.appendChild(width)
 88     size.appendChild(height)
 89     size.appendChild(depth)
 90 
 91     #<segmented>0</segmented>
 92     segmented = doc.createElement("segmented")
 93     segmented.appendChild(doc.createTextNode("0"))
 94     annotation.appendChild(segmented)
 95     """
 96     <object>
 97         <name>dog</name>
 98         <pose>Left</pose>
 99         <truncated>1</truncated>
100         <difficult>0</difficult>
101         <bndbox>
102             <xmin>48</xmin>
103             <ymin>240</ymin>
104             <xmax>195</xmax>
105             <ymax>371</ymax>
106         </bndbox>
107     </object>
108     """
109     f = open(vocfile.split('.')[0] + '.txs')
110     strs = ""
111     for i in f.read():
112         if '\\x' not in repr(i):
113             strs += i
114     for line in strs.split(' '):
115         elem = line.split('{')
116         if len(elem) > 1:
117             if len(elem[1].split(',')) == 4:
118                 nums = elem[1].split(',')
119                 if len(elem[0]) == 1 and len(img) > int(nums[0]) and len(img) >= int(nums[2]) and len(img[0]) >= int(nums[3]) and len(img[0]) > int(nums[1]):
120                     object = doc.createElement("object")
121                     annotation.appendChild(object)
122                     name2 = doc.createElement("name")
123                     name2.appendChild(doc.createTextNode(elem[0]))
124                     pose = doc.createElement("pose")
125                     pose.appendChild(doc.createTextNode("Left"))
126                     truncated = doc.createElement("truncated")
127                     truncated.appendChild(doc.createTextNode("1"))
128                     difficult = doc.createElement("difficult")
129                     difficult.appendChild(doc.createTextNode("0"))
130                     bndbox = doc.createElement("bndbox")
131                     xmin = doc.createElement("xmin")
132                     xmin.appendChild(doc.createTextNode(nums[0]))
133                     ymin = doc.createElement("ymin")
134                     ymin.appendChild(doc.createTextNode(nums[1]))
135                     xmax = doc.createElement("xmax")
136                     xmax.appendChild(doc.createTextNode(nums[2]))
137                     ymax = doc.createElement("ymax")
138                     ymax.appendChild(doc.createTextNode(nums[3]))
139                     bndbox.appendChild(xmin)
140                     bndbox.appendChild(ymin)
141                     bndbox.appendChild(xmax)
142                     bndbox.appendChild(ymax)
143                     object.appendChild(name2)
144                     object.appendChild(pose)
145                     object.appendChild(truncated)
146                     object.appendChild(difficult)
147                     object.appendChild(bndbox)
148 
149 os.chdir("E:\\shared\\Format_Trans_20160328\\src_txs")
150 bmpfiles = glob.glob("*.jpg")
151 for bmpfile in bmpfiles:
152     InitVoc(bmpfile)
 1 from xml.dom.minidom import *
 2 import struct
 3 
 4 class PltHeader(object):
 5     def __init__(self, uSize, uCharNum, uTest, uStrokeNum, reserve):
 6         self.uSize = uSize
 7         self.uCharNum = uCharNum
 8         self.uText = uTest
 9         self.uStrokeNum = uStrokeNum
10         self.reserve = reserve
11 
12 my_plt = PltHeader(0, 0, [], 0, 0)
13 dom1 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/inkml/1233225548643.inkml")
14 dom2 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/upx/1233225548643.upx")
15 root1 = dom1.documentElement
16 root2 = dom2.documentElement
17 itemlist1 = root1.getElementsByTagName('trace')
18 itemText = root2.getElementsByTagName("alternate")
19 
20 uTest = repr(itemText[0].getAttribute("value")).split('\\u')
21 
22 for i in uTest:
23     if i != "u'":
24         my_plt.uText.append(int(i.strip(' ').strip("'"), 16))
25 
26 my_plt.uStrokeNum = len(itemlist1)
27 my_plt.uCharNum = len(my_plt.uText)
28 for j in range(256 - my_plt.uCharNum):
29     my_plt.uText.append(0)
30 my_plt.uText.append(my_plt.reserve)
31 nums = []
32 count = 0
33 for item in itemlist1:
34     for coordinate in str(item.firstChild.data).split(','):
35         count += 1
36         nums.append(int(coordinate.split(" ")[0]))
37         nums.append(int(coordinate.split(" ")[1]))
38     nums.append(65535)
39     nums.append(0)
40 nums.append(65535)
41 nums.append(65535)
42 
43 my_plt.uSize = 520 + (my_plt.uStrokeNum + 1 + count) * 2 * 2
44 
45 binfile = open("C:/Users/samsung/Desktop/1.plt","w+b")
46 ss = struct.pack('HH258H', my_plt.uSize, my_plt.uCharNum, *my_plt.uText)
47 binfile.write(ss)
48 ss = struct.pack('%dH'%(len(nums)),*nums)
49 binfile.write(ss)
50 binfile.close()
51 print my_plt.uStrokeNum, my_plt.uSize, my_plt.uText, my_plt.uCharNum
52 print len(nums)

 

 

 1 from xml.dom.minidom import *
 2 import struct
 3 import os
 4 
 5 os.chdir("E:/ADAB_set/ADAB_set")
 6 reserve = 0
 7 
 8 for root, dirs, files in os.walk(os.getcwd()):
 9     for dir in dirs:
10         binfile = open("C:/Users/samsung/Desktop/%s.plt"%dir,"ab")
11         inkml_files = []
12         upx_files = []
13         os.chdir(".\\%s"%dir)
14 
15         for inkml_root, inkml_dirs, inkml_files in os.walk(".\\inkml"):
16             break
17         for upx_root, upx_dirs, upx_files in os.walk(".\\upx"):
18             break
19         for i in range(len(inkml_files)):
20             uText = []
21             uStrokeNum = 0
22             uCharNum = 0
23             uSize = 0
24             domInkml = parse(".\\inkml\\%s"%inkml_files[i])
25             domUpx = parse(".\\upx\\%s"%upx_files[i])
26             rootInkml = domInkml.documentElement
27             rootUpx = domUpx.documentElement
28             itemListInkml = rootInkml.getElementsByTagName('trace')
29             itemValue = rootUpx.getElementsByTagName("alternate")
30             strTest = repr(itemValue[0].getAttribute("value")).split('\\u')
31             print strTest
32             print i
33             for i in strTest:
34                 if i != strTest[0]:
35                     if len(i.split(" ")) > 1:
36                         uText.append(int(i.split(" ")[0], 16))
37                     else:
38                         uText.append(int(i.strip(' ').strip("'"), 16))
39             print uText
40             uStrokeNum = len(itemListInkml)
41             uCharNum = len(uText)
42             for j in range(256 - uCharNum):
43                 uText.append(0)
44 
45             uText.append(uStrokeNum)
46             uText.append(reserve)
47 
48             nums = []
49             count = 0
50             for item in itemListInkml:
51                 for coordinate in str(item.firstChild.data).split(','):
52                     count += 1
53                     nums.append(int(coordinate.split(" ")[0]))
54                     nums.append(int(coordinate.split(" ")[1]))
55                 nums.append(65535)
56                 nums.append(0)
57             nums.append(65535)
58             nums.append(65535)
59             uSize = 520 + (uStrokeNum + 1 + count) * 2 * 2
60             ss = struct.pack('HH258H', uSize, uCharNum, *uText)
61             binfile.write(ss)
62             ss = struct.pack('%dH'%(len(nums)),*nums)
63             binfile.write(ss)
64         binfile.close()
65         os.chdir("..\\")
66     break

 

 

  1 <html>
  2 <head>
  3 <title>People that Avah knows</title>
  4 <style>
  5 .overlay{
  6     opacity:0.99;
  7     background-color:#eee;
  8     position:fixed;
  9     width:100%;
 10     height:100%;
 11     top:0px;
 12     left:0px;
 13     z-index:1000;
 14 }
 15 </style>
 16 </head>
 17 <body>
 18 <h1>People that Avah knows</h1>
 19 <div class="overlay" id="overlay" style="display:none" >
 20 <center>
 21 <h2>
 22 This screen randomly changes the height between list items and vanishes 
 23 after a while to make sure that you retrieve and process the data
 24 in a Python program rather than simply counting down pressing links, and 
 25 and doing the assignment without writing a Python program :).
 26 The names are in the same order in the HTML even though they 
 27 shift around on the screen visually.
 28 Your Python program can look at the page as long as it likes.
 29 </h2>
 30 </center>
 31 </div>
 32 <ul>
 33 <li style="margin-top: 7px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Daniyal.html">Daniyal</a></li>
 34 <li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Fares.html">Fares</a></li>
 35 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kynan.html">Kynan</a></li>
 36 <li style="margin-top: 10px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Erika.html">Erika</a></li>
 37 <li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Milly.html">Milly</a></li>
 38 <li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ceara.html">Ceara</a></li>
 39 <li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rennie.html">Rennie</a></li>
 40 <li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Franco.html">Franco</a></li>
 41 <li style="margin-top: 19px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Roxie.html">Roxie</a></li>
 42 <li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Giyia.html">Giyia</a></li>
 43 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zuzanna.html">Zuzanna</a></li>
 44 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Calean.html">Calean</a></li>
 45 <li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Robyn.html">Robyn</a></li>
 46 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Sainabou.html">Sainabou</a></li>
 47 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maximilian.html">Maximilian</a></li>
 48 <li style="margin-top: 36px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Katso.html">Katso</a></li>
 49 <li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Armaan.html">Armaan</a></li>
 50 <li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keiva.html">Keiva</a></li>
 51 <li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eiko.html">Eiko</a></li>
 52 <li style="margin-top: 28px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Salahudin.html">Salahudin</a></li>
 53 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwa.html">Marwa</a></li>
 54 <li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nodoka.html">Nodoka</a></li>
 55 <li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dhyia.html">Dhyia</a></li>
 56 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lianne.html">Lianne</a></li>
 57 <li style="margin-top: 5px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Tyree.html">Tyree</a></li>
 58 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Diona.html">Diona</a></li>
 59 <li style="margin-top: 35px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lliam.html">Lliam</a></li>
 60 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Crystyn.html">Crystyn</a></li>
 61 <li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maca.html">Maca</a></li>
 62 <li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marina.html">Marina</a></li>
 63 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Leah.html">Leah</a></li>
 64 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avril.html">Avril</a></li>
 65 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Riagan.html">Riagan</a></li>
 66 <li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Jaying.html">Jaying</a></li>
 67 <li style="margin-top: 59px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emaly.html">Emaly</a></li>
 68 <li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rheanne.html">Rheanne</a></li>
 69 <li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Owais.html">Owais</a></li>
 70 <li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aria.html">Aria</a></li>
 71 <li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyie.html">Kyie</a></li>
 72 <li style="margin-top: 48px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keryis.html">Keryis</a></li>
 73 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcous.html">Marcous</a></li>
 74 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Catrin.html">Catrin</a></li>
 75 <li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcelina.html">Marcelina</a></li>
 76 <li style="margin-top: 52px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Madeline.html">Madeline</a></li>
 77 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Patrikas.html">Patrikas</a></li>
 78 <li style="margin-top: 66px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lacey.html">Lacey</a></li>
 79 <li style="margin-top: 57px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eason.html">Eason</a></li>
 80 <li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyrillos.html">Kyrillos</a></li>
 81 <li style="margin-top: 16px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Benjamin.html">Benjamin</a></li>
 82 <li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ege.html">Ege</a></li>
 83 <li style="margin-top: 39px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwan.html">Marwan</a></li>
 84 <li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Celik.html">Celik</a></li>
 85 <li style="margin-top: 42px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kendal.html">Kendal</a></li>
 86 <li style="margin-top: 18px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kasja.html">Kasja</a></li>
 87 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Asena.html">Asena</a></li>
 88 <li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Harris.html">Harris</a></li>
 89 <li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Si.html">Si</a></li>
 90 <li style="margin-top: 56px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lumi.html">Lumi</a></li>
 91 <li style="margin-top: 33px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Chevy.html">Chevy</a></li>
 92 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Famara.html">Famara</a></li>
 93 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Cara.html">Cara</a></li>
 94 <li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Elisa.html">Elisa</a></li>
 95 <li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nihaal.html">Nihaal</a></li>
 96 <li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vivian.html">Vivian</a></li>
 97 <li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Farrah.html">Farrah</a></li>
 98 <li style="margin-top: 69px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Shonagh.html">Shonagh</a></li>
 99 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Monty.html">Monty</a></li>
100 <li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Turner.html">Turner</a></li>
101 <li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Oliver.html">Oliver</a></li>
102 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aayan.html">Aayan</a></li>
103 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Atom.html">Atom</a></li>
104 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abby.html">Abby</a></li>
105 <li style="margin-top: 68px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Makala.html">Makala</a></li>
106 <li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rupert.html">Rupert</a></li>
107 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aine.html">Aine</a></li>
108 <li style="margin-top: 50px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Feden.html">Feden</a></li>
109 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Annick.html">Annick</a></li>
110 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avah.html">Avah</a></li>
111 <li style="margin-top: 72px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emilie.html">Emilie</a></li>
112 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Autumn.html">Autumn</a></li>
113 <li style="margin-top: 25px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Edyn.html">Edyn</a></li>
114 <li style="margin-top: 43px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Efe.html">Efe</a></li>
115 <li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kie.html">Kie</a></li>
116 <li style="margin-top: 105px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dougal.html">Dougal</a></li>
117 <li style="margin-top: 58px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Carolyn.html">Carolyn</a></li>
118 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lana.html">Lana</a></li>
119 <li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miryn.html">Miryn</a></li>
120 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Pearce.html">Pearce</a></li>
121 <li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ash.html">Ash</a></li>
122 <li style="margin-top: 49px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zahide.html">Zahide</a></li>
123 <li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Mathias.html">Mathias</a></li>
124 <li style="margin-top: 53px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vaila.html">Vaila</a></li>
125 <li style="margin-top: 104px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abbey.html">Abbey</a></li>
126 <li style="margin-top: 64px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ayooluwa.html">Ayooluwa</a></li>
127 <li style="margin-top: 117px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miriam.html">Miriam</a></li>
128 <li style="margin-top: 83px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Levon.html">Levon</a></li>
129 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Colin.html">Colin</a></li>
130 <li style="margin-top: 65px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Noah.html">Noah</a></li>
131 <li style="margin-top: 70px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Taegan.html">Taegan</a></li>
132 <li style="margin-top: 122px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zennon.html">Zennon</a></li>
133 </ul>
134 <script>
135 // http://stackoverflow.com/questions/20423322/simple-setting-off-display-none-block-with-javascript
136 function showHide(id) {
137     var el = document.getElementById(id);
138     if( el && el.style.display == 'none')    
139         el.style.display = 'block';
140     else 
141         el.style.display = 'none';
142 }
143 setTimeout('showHide("overlay");', 2500);
144 
145 </script>
146 </body>
147 </html>

 

 1 import urllib2
 2 from bs4 import BeautifulSoup
 3 
 4 url = raw_input()
 5 
 6 for i in range(7):
 7     html = urllib2.urlopen(url).read()
 8     soup = BeautifulSoup(html)
 9     tags = soup('a')
10     print tags[17].string
11     url = tags[17].get('href', None)

 

 

 

 

 

 

 1 <html>
 2 <head>
 3 <title>Welcome to the comments assignment from www.pythonlearn.com</title>
 4 </head>
 5 <body>
 6 <h1>This file contains the sample data for testing</h1>
 7 
 8 <table border="2">
 9 <tr>
10 <td>Name</td><td>Comments</td>
11 </tr>
12 <tr><td>Leven</td><td><span class="comments">100</span></td></tr>
13 <tr><td>Mahdiya</td><td><span class="comments">97</span></td></tr>
14 <tr><td>Ajayraj</td><td><span class="comments">87</span></td></tr>
15 <tr><td>Lillian</td><td><span class="comments">86</span></td></tr>
16 <tr><td>Aon</td><td><span class="comments">86</span></td></tr>
17 <tr><td>Ruaraidh</td><td><span class="comments">78</span></td></tr>
18 <tr><td>Gursees</td><td><span class="comments">75</span></td></tr>
19 <tr><td>Emmanuel</td><td><span class="comments">74</span></td></tr>
20 <tr><td>Christy</td><td><span class="comments">72</span></td></tr>
21 <tr><td>Annoushka</td><td><span class="comments">72</span></td></tr>
22 <tr><td>Inara</td><td><span class="comments">72</span></td></tr>
23 <tr><td>Caite</td><td><span class="comments">70</span></td></tr>
24 <tr><td>Rosangel</td><td><span class="comments">70</span></td></tr>
25 <tr><td>Iana</td><td><span class="comments">66</span></td></tr>
26 <tr><td>Anise</td><td><span class="comments">66</span></td></tr>
27 <tr><td>Jaosha</td><td><span class="comments">65</span></td></tr>
28 <tr><td>Cadyn</td><td><span class="comments">65</span></td></tr>
29 <tr><td>Edward</td><td><span class="comments">63</span></td></tr>
30 <tr><td>Charlotte</td><td><span class="comments">61</span></td></tr>
31 <tr><td>Sammy</td><td><span class="comments">60</span></td></tr>
32 <tr><td>Zarran</td><td><span class="comments">60</span></td></tr>
33 <tr><td>Rowen</td><td><span class="comments">59</span></td></tr>
34 <tr><td>Stanislaw</td><td><span class="comments">59</span></td></tr>
35 <tr><td>Maighdlin</td><td><span class="comments">57</span></td></tr>
36 <tr><td>Connan</td><td><span class="comments">56</span></td></tr>
37 <tr><td>Warrick</td><td><span class="comments">54</span></td></tr>
38 <tr><td>Diya</td><td><span class="comments">52</span></td></tr>
39 <tr><td>Lawson</td><td><span class="comments">52</span></td></tr>
40 <tr><td>Wu</td><td><span class="comments">51</span></td></tr>
41 <tr><td>Irmak</td><td><span class="comments">47</span></td></tr>
42 <tr><td>Emilija</td><td><span class="comments">47</span></td></tr>
43 <tr><td>Kayda</td><td><span class="comments">41</span></td></tr>
44 <tr><td>Ellenor</td><td><span class="comments">41</span></td></tr>
45 <tr><td>Kyra</td><td><span class="comments">41</span></td></tr>
46 <tr><td>Nikita</td><td><span class="comments">38</span></td></tr>
47 <tr><td>Kaelah</td><td><span class="comments">35</span></td></tr>
48 <tr><td>Meko</td><td><span class="comments">32</span></td></tr>
49 <tr><td>Marissa</td><td><span class="comments">31</span></td></tr>
50 <tr><td>Ayat</td><td><span class="comments">24</span></td></tr>
51 <tr><td>Sali</td><td><span class="comments">19</span></td></tr>
52 <tr><td>Hashem</td><td><span class="comments">19</span></td></tr>
53 <tr><td>Tygan</td><td><span class="comments">18</span></td></tr>
54 <tr><td>Rioden</td><td><span class="comments">17</span></td></tr>
55 <tr><td>Cruiz</td><td><span class="comments">16</span></td></tr>
56 <tr><td>Caoilfinn</td><td><span class="comments">13</span></td></tr>
57 <tr><td>Ewen</td><td><span class="comments">8</span></td></tr>
58 <tr><td>Baighley</td><td><span class="comments">7</span></td></tr>
59 <tr><td>Ramone</td><td><span class="comments">1</span></td></tr>
60 <tr><td>Kyran</td><td><span class="comments">1</span></td></tr>
61 <tr><td>Noelani</td><td><span class="comments">1</span></td></tr>
62 </table>
63 </body>
64 </html>

 

 1 import urllib2
 2 from bs4 import BeautifulSoup
 3 
 4 url = raw_input()
 5 html = urllib2.urlopen(url).read()
 6 soup = BeautifulSoup(html)
 7 tags = soup('span')
 8 count, sum = 0, 0
 9 for tag in tags:
10     count += 1
11     sum += int(tag.string)
12 print count, sum

 

posted @ 2015-12-01 16:40  早杰  阅读(616)  评论(0编辑  收藏  举报