python基础(xml,html,bs4)
http://python-data.dr-chuck.net/comments_42.xml
1 <commentinfo><note>This file contains the sample data for testing</note><comments><comment><name>Leven</name><count>100</count></comment><comment><name>Mahdiya</name><count>97</count></comment><comment><name>Ajayraj</name><count>87</count></comment><comment><name>Lillian</name><count>86</count></comment><comment><name>Aon</name><count>86</count></comment><comment><name>Ruaraidh</name><count>78</count></comment><comment><name>Gursees</name><count>75</count></comment><comment><name>Emmanuel</name><count>74</count></comment><comment><name>Christy</name><count>72</count></comment><comment><name>Annoushka</name><count>72</count></comment><comment><name>Inara</name><count>72</count></comment><comment><name>Caite</name><count>70</count></comment><comment><name>Rosangel</name><count>70</count></comment><comment><name>Iana</name><count>66</count></comment><comment><name>Anise</name><count>66</count></comment><comment><name>Jaosha</name><count>65</count></comment><comment><name>Cadyn</name><count>65</count></comment><comment><name>Edward</name><count>63</count></comment><comment><name>Charlotte</name><count>61</count></comment><comment><name>Sammy</name><count>60</count></comment><comment><name>Zarran</name><count>60</count></comment><comment><name>Rowen</name><count>59</count></comment><comment><name>Stanislaw</name><count>59</count></comment><comment><name>Maighdlin</name><count>57</count></comment><comment><name>Connan</name><count>56</count></comment><comment><name>Warrick</name><count>54</count></comment><comment><name>Diya</name><count>52</count></comment><comment><name>Lawson</name><count>52</count></comment><comment><name>Wu</name><count>51</count></comment><comment><name>Irmak</name><count>47</count></comment><comment><name>Emilija</name><count>47</count></comment><comment><name>Kayda</name><count>41</count></comment><comment><name>Ellenor</name><count>41</count></comment><comment><name>Kyra</name><count>41</count></comment><comment><name>Nikita</name><count>38</count></comment><comment><name>Kaelah</name><count>35</count></comment><comment><name>Meko</name><count>32</count></comment><comment><name>Marissa</name><count>31</count></comment><comment><name>Ayat</name><count>24</count></comment><comment><name>Sali</name><count>19</count></comment><comment><name>Hashem</name><count>19</count></comment><comment><name>Tygan</name><count>18</count></comment><comment><name>Rioden</name><count>17</count></comment><comment><name>Cruiz</name><count>16</count></comment><comment><name>Caoilfinn</name><count>13</count></comment><comment><name>Ewen</name><count>8</count></comment><comment><name>Baighley</name><count>7</count></comment><comment><name>Ramone</name><count>1</count></comment><comment><name>Kyran</name><count>1</count></comment><comment><name>Noelani</name><count>1</count></comment></comments></commentinfo>
1 import xml.etree.ElementTree as ET 2 import urllib2 3 input = urllib2.urlopen('http://python-data.dr-chuck.net/comments_42.xml').read() 4 5 commentinfo = ET.fromstring(input) 6 lst = commentinfo.findall('comments/comment') 7 sum = 0 8 for item in lst: 9 sum += int(item.find('count').text) 10 print sum #2553
1 from xml.dom import minidom 2 import os 3 import glob 4 import cv2 5 6 7 """ 8 <annotation> 9 """ 10 def InitVoc(vocfile): 11 doc = minidom.Document() 12 annotation = doc.createElement("annotation") 13 doc.appendChild(annotation) 14 addObject(doc, annotation, vocfile) 15 f = file(vocfile.split('.')[0] + ".xml","w") 16 doc.writexml(f) 17 f.close() 18 19 def addObject(doc, annotation, vocfile): 20 #<folder>VOC2007</folder> 21 folder = doc.createElement("folder") 22 folder.appendChild(doc.createTextNode("VOC2007")) 23 annotation.appendChild(folder) 24 25 #<filename>XXXXXX</filename> 26 filename = doc.createElement("filename") 27 filename.appendChild(doc.createTextNode(vocfile)) 28 annotation.appendChild(filename) 29 30 """ 31 <source> 32 <database>The VOC2007 Database</database> 33 <annotation>PASCAL VOC2007</annotation> 34 <image>flickr</image> 35 <flickrid>341012865</flickrid> 36 </source> 37 """ 38 source = doc.createElement("source") 39 annotation.appendChild(source) 40 41 database = doc.createElement("database") 42 database.appendChild(doc.createTextNode("The VOC2007 Database")) 43 annotation2 = doc.createElement("annotation") 44 annotation2.appendChild(doc.createTextNode("PASCAL VOC2007")) 45 image = doc.createElement("image") 46 image.appendChild(doc.createTextNode("flickr")) 47 flickrid = doc.createElement("flickrid") 48 flickrid.appendChild(doc.createTextNode("wang")) 49 source.appendChild(database) 50 source.appendChild(annotation2) 51 source.appendChild(image) 52 source.appendChild(flickrid) 53 54 """ 55 <owner> 56 <flickrid>Fried Camels</flickrid> 57 <name>Jinky the Fruit Bat</name> 58 </owner> 59 """ 60 owner = doc.createElement("owner") 61 annotation.appendChild(owner) 62 63 flickrid2 = doc.createElement("flickrid") 64 flickrid2.appendChild(doc.createTextNode("wang")) 65 name = doc.createElement("image") 66 name.appendChild(doc.createTextNode("wang")) 67 owner.appendChild(flickrid2) 68 owner.appendChild(name) 69 70 """ 71 <size> 72 <width>353</width> 73 <height>500</height> 74 <depth>3</depth> 75 </size> 76 """ 77 img = cv2.imread(bmpfile) 78 size = doc.createElement("size") 79 annotation.appendChild(size) 80 81 width = doc.createElement("width") 82 width.appendChild(doc.createTextNode(str(len(img[0])))) 83 height = doc.createElement("height") 84 height.appendChild(doc.createTextNode(str(len(img)))) 85 depth = doc.createElement("depth") 86 depth.appendChild(doc.createTextNode(str(img[0][0].size))) 87 size.appendChild(width) 88 size.appendChild(height) 89 size.appendChild(depth) 90 91 #<segmented>0</segmented> 92 segmented = doc.createElement("segmented") 93 segmented.appendChild(doc.createTextNode("0")) 94 annotation.appendChild(segmented) 95 """ 96 <object> 97 <name>dog</name> 98 <pose>Left</pose> 99 <truncated>1</truncated> 100 <difficult>0</difficult> 101 <bndbox> 102 <xmin>48</xmin> 103 <ymin>240</ymin> 104 <xmax>195</xmax> 105 <ymax>371</ymax> 106 </bndbox> 107 </object> 108 """ 109 f = open(vocfile.split('.')[0] + '.txs') 110 strs = "" 111 for i in f.read(): 112 if '\\x' not in repr(i): 113 strs += i 114 for line in strs.split(' '): 115 elem = line.split('{') 116 if len(elem) > 1: 117 if len(elem[1].split(',')) == 4: 118 nums = elem[1].split(',') 119 if len(elem[0]) == 1 and len(img) > int(nums[0]) and len(img) >= int(nums[2]) and len(img[0]) >= int(nums[3]) and len(img[0]) > int(nums[1]): 120 object = doc.createElement("object") 121 annotation.appendChild(object) 122 name2 = doc.createElement("name") 123 name2.appendChild(doc.createTextNode(elem[0])) 124 pose = doc.createElement("pose") 125 pose.appendChild(doc.createTextNode("Left")) 126 truncated = doc.createElement("truncated") 127 truncated.appendChild(doc.createTextNode("1")) 128 difficult = doc.createElement("difficult") 129 difficult.appendChild(doc.createTextNode("0")) 130 bndbox = doc.createElement("bndbox") 131 xmin = doc.createElement("xmin") 132 xmin.appendChild(doc.createTextNode(nums[0])) 133 ymin = doc.createElement("ymin") 134 ymin.appendChild(doc.createTextNode(nums[1])) 135 xmax = doc.createElement("xmax") 136 xmax.appendChild(doc.createTextNode(nums[2])) 137 ymax = doc.createElement("ymax") 138 ymax.appendChild(doc.createTextNode(nums[3])) 139 bndbox.appendChild(xmin) 140 bndbox.appendChild(ymin) 141 bndbox.appendChild(xmax) 142 bndbox.appendChild(ymax) 143 object.appendChild(name2) 144 object.appendChild(pose) 145 object.appendChild(truncated) 146 object.appendChild(difficult) 147 object.appendChild(bndbox) 148 149 os.chdir("E:\\shared\\Format_Trans_20160328\\src_txs") 150 bmpfiles = glob.glob("*.jpg") 151 for bmpfile in bmpfiles: 152 InitVoc(bmpfile)
1 from xml.dom.minidom import * 2 import struct 3 4 class PltHeader(object): 5 def __init__(self, uSize, uCharNum, uTest, uStrokeNum, reserve): 6 self.uSize = uSize 7 self.uCharNum = uCharNum 8 self.uText = uTest 9 self.uStrokeNum = uStrokeNum 10 self.reserve = reserve 11 12 my_plt = PltHeader(0, 0, [], 0, 0) 13 dom1 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/inkml/1233225548643.inkml") 14 dom2 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/upx/1233225548643.upx") 15 root1 = dom1.documentElement 16 root2 = dom2.documentElement 17 itemlist1 = root1.getElementsByTagName('trace') 18 itemText = root2.getElementsByTagName("alternate") 19 20 uTest = repr(itemText[0].getAttribute("value")).split('\\u') 21 22 for i in uTest: 23 if i != "u'": 24 my_plt.uText.append(int(i.strip(' ').strip("'"), 16)) 25 26 my_plt.uStrokeNum = len(itemlist1) 27 my_plt.uCharNum = len(my_plt.uText) 28 for j in range(256 - my_plt.uCharNum): 29 my_plt.uText.append(0) 30 my_plt.uText.append(my_plt.reserve) 31 nums = [] 32 count = 0 33 for item in itemlist1: 34 for coordinate in str(item.firstChild.data).split(','): 35 count += 1 36 nums.append(int(coordinate.split(" ")[0])) 37 nums.append(int(coordinate.split(" ")[1])) 38 nums.append(65535) 39 nums.append(0) 40 nums.append(65535) 41 nums.append(65535) 42 43 my_plt.uSize = 520 + (my_plt.uStrokeNum + 1 + count) * 2 * 2 44 45 binfile = open("C:/Users/samsung/Desktop/1.plt","w+b") 46 ss = struct.pack('HH258H', my_plt.uSize, my_plt.uCharNum, *my_plt.uText) 47 binfile.write(ss) 48 ss = struct.pack('%dH'%(len(nums)),*nums) 49 binfile.write(ss) 50 binfile.close() 51 print my_plt.uStrokeNum, my_plt.uSize, my_plt.uText, my_plt.uCharNum 52 print len(nums)
1 from xml.dom.minidom import * 2 import struct 3 import os 4 5 os.chdir("E:/ADAB_set/ADAB_set") 6 reserve = 0 7 8 for root, dirs, files in os.walk(os.getcwd()): 9 for dir in dirs: 10 binfile = open("C:/Users/samsung/Desktop/%s.plt"%dir,"ab") 11 inkml_files = [] 12 upx_files = [] 13 os.chdir(".\\%s"%dir) 14 15 for inkml_root, inkml_dirs, inkml_files in os.walk(".\\inkml"): 16 break 17 for upx_root, upx_dirs, upx_files in os.walk(".\\upx"): 18 break 19 for i in range(len(inkml_files)): 20 uText = [] 21 uStrokeNum = 0 22 uCharNum = 0 23 uSize = 0 24 domInkml = parse(".\\inkml\\%s"%inkml_files[i]) 25 domUpx = parse(".\\upx\\%s"%upx_files[i]) 26 rootInkml = domInkml.documentElement 27 rootUpx = domUpx.documentElement 28 itemListInkml = rootInkml.getElementsByTagName('trace') 29 itemValue = rootUpx.getElementsByTagName("alternate") 30 strTest = repr(itemValue[0].getAttribute("value")).split('\\u') 31 print strTest 32 print i 33 for i in strTest: 34 if i != strTest[0]: 35 if len(i.split(" ")) > 1: 36 uText.append(int(i.split(" ")[0], 16)) 37 else: 38 uText.append(int(i.strip(' ').strip("'"), 16)) 39 print uText 40 uStrokeNum = len(itemListInkml) 41 uCharNum = len(uText) 42 for j in range(256 - uCharNum): 43 uText.append(0) 44 45 uText.append(uStrokeNum) 46 uText.append(reserve) 47 48 nums = [] 49 count = 0 50 for item in itemListInkml: 51 for coordinate in str(item.firstChild.data).split(','): 52 count += 1 53 nums.append(int(coordinate.split(" ")[0])) 54 nums.append(int(coordinate.split(" ")[1])) 55 nums.append(65535) 56 nums.append(0) 57 nums.append(65535) 58 nums.append(65535) 59 uSize = 520 + (uStrokeNum + 1 + count) * 2 * 2 60 ss = struct.pack('HH258H', uSize, uCharNum, *uText) 61 binfile.write(ss) 62 ss = struct.pack('%dH'%(len(nums)),*nums) 63 binfile.write(ss) 64 binfile.close() 65 os.chdir("..\\") 66 break
1 <html> 2 <head> 3 <title>People that Avah knows</title> 4 <style> 5 .overlay{ 6 opacity:0.99; 7 background-color:#eee; 8 position:fixed; 9 width:100%; 10 height:100%; 11 top:0px; 12 left:0px; 13 z-index:1000; 14 } 15 </style> 16 </head> 17 <body> 18 <h1>People that Avah knows</h1> 19 <div class="overlay" id="overlay" style="display:none" > 20 <center> 21 <h2> 22 This screen randomly changes the height between list items and vanishes 23 after a while to make sure that you retrieve and process the data 24 in a Python program rather than simply counting down pressing links, and 25 and doing the assignment without writing a Python program :). 26 The names are in the same order in the HTML even though they 27 shift around on the screen visually. 28 Your Python program can look at the page as long as it likes. 29 </h2> 30 </center> 31 </div> 32 <ul> 33 <li style="margin-top: 7px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Daniyal.html">Daniyal</a></li> 34 <li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Fares.html">Fares</a></li> 35 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kynan.html">Kynan</a></li> 36 <li style="margin-top: 10px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Erika.html">Erika</a></li> 37 <li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Milly.html">Milly</a></li> 38 <li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ceara.html">Ceara</a></li> 39 <li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rennie.html">Rennie</a></li> 40 <li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Franco.html">Franco</a></li> 41 <li style="margin-top: 19px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Roxie.html">Roxie</a></li> 42 <li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Giyia.html">Giyia</a></li> 43 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zuzanna.html">Zuzanna</a></li> 44 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Calean.html">Calean</a></li> 45 <li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Robyn.html">Robyn</a></li> 46 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Sainabou.html">Sainabou</a></li> 47 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maximilian.html">Maximilian</a></li> 48 <li style="margin-top: 36px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Katso.html">Katso</a></li> 49 <li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Armaan.html">Armaan</a></li> 50 <li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keiva.html">Keiva</a></li> 51 <li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eiko.html">Eiko</a></li> 52 <li style="margin-top: 28px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Salahudin.html">Salahudin</a></li> 53 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwa.html">Marwa</a></li> 54 <li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nodoka.html">Nodoka</a></li> 55 <li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dhyia.html">Dhyia</a></li> 56 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lianne.html">Lianne</a></li> 57 <li style="margin-top: 5px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Tyree.html">Tyree</a></li> 58 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Diona.html">Diona</a></li> 59 <li style="margin-top: 35px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lliam.html">Lliam</a></li> 60 <li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Crystyn.html">Crystyn</a></li> 61 <li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maca.html">Maca</a></li> 62 <li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marina.html">Marina</a></li> 63 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Leah.html">Leah</a></li> 64 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avril.html">Avril</a></li> 65 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Riagan.html">Riagan</a></li> 66 <li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Jaying.html">Jaying</a></li> 67 <li style="margin-top: 59px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emaly.html">Emaly</a></li> 68 <li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rheanne.html">Rheanne</a></li> 69 <li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Owais.html">Owais</a></li> 70 <li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aria.html">Aria</a></li> 71 <li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyie.html">Kyie</a></li> 72 <li style="margin-top: 48px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keryis.html">Keryis</a></li> 73 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcous.html">Marcous</a></li> 74 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Catrin.html">Catrin</a></li> 75 <li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcelina.html">Marcelina</a></li> 76 <li style="margin-top: 52px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Madeline.html">Madeline</a></li> 77 <li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Patrikas.html">Patrikas</a></li> 78 <li style="margin-top: 66px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lacey.html">Lacey</a></li> 79 <li style="margin-top: 57px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eason.html">Eason</a></li> 80 <li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyrillos.html">Kyrillos</a></li> 81 <li style="margin-top: 16px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Benjamin.html">Benjamin</a></li> 82 <li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ege.html">Ege</a></li> 83 <li style="margin-top: 39px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwan.html">Marwan</a></li> 84 <li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Celik.html">Celik</a></li> 85 <li style="margin-top: 42px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kendal.html">Kendal</a></li> 86 <li style="margin-top: 18px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kasja.html">Kasja</a></li> 87 <li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Asena.html">Asena</a></li> 88 <li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Harris.html">Harris</a></li> 89 <li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Si.html">Si</a></li> 90 <li style="margin-top: 56px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lumi.html">Lumi</a></li> 91 <li style="margin-top: 33px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Chevy.html">Chevy</a></li> 92 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Famara.html">Famara</a></li> 93 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Cara.html">Cara</a></li> 94 <li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Elisa.html">Elisa</a></li> 95 <li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nihaal.html">Nihaal</a></li> 96 <li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vivian.html">Vivian</a></li> 97 <li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Farrah.html">Farrah</a></li> 98 <li style="margin-top: 69px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Shonagh.html">Shonagh</a></li> 99 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Monty.html">Monty</a></li> 100 <li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Turner.html">Turner</a></li> 101 <li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Oliver.html">Oliver</a></li> 102 <li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aayan.html">Aayan</a></li> 103 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Atom.html">Atom</a></li> 104 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abby.html">Abby</a></li> 105 <li style="margin-top: 68px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Makala.html">Makala</a></li> 106 <li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rupert.html">Rupert</a></li> 107 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aine.html">Aine</a></li> 108 <li style="margin-top: 50px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Feden.html">Feden</a></li> 109 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Annick.html">Annick</a></li> 110 <li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avah.html">Avah</a></li> 111 <li style="margin-top: 72px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emilie.html">Emilie</a></li> 112 <li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Autumn.html">Autumn</a></li> 113 <li style="margin-top: 25px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Edyn.html">Edyn</a></li> 114 <li style="margin-top: 43px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Efe.html">Efe</a></li> 115 <li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kie.html">Kie</a></li> 116 <li style="margin-top: 105px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dougal.html">Dougal</a></li> 117 <li style="margin-top: 58px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Carolyn.html">Carolyn</a></li> 118 <li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lana.html">Lana</a></li> 119 <li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miryn.html">Miryn</a></li> 120 <li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Pearce.html">Pearce</a></li> 121 <li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ash.html">Ash</a></li> 122 <li style="margin-top: 49px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zahide.html">Zahide</a></li> 123 <li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Mathias.html">Mathias</a></li> 124 <li style="margin-top: 53px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vaila.html">Vaila</a></li> 125 <li style="margin-top: 104px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abbey.html">Abbey</a></li> 126 <li style="margin-top: 64px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ayooluwa.html">Ayooluwa</a></li> 127 <li style="margin-top: 117px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miriam.html">Miriam</a></li> 128 <li style="margin-top: 83px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Levon.html">Levon</a></li> 129 <li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Colin.html">Colin</a></li> 130 <li style="margin-top: 65px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Noah.html">Noah</a></li> 131 <li style="margin-top: 70px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Taegan.html">Taegan</a></li> 132 <li style="margin-top: 122px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zennon.html">Zennon</a></li> 133 </ul> 134 <script> 135 // http://stackoverflow.com/questions/20423322/simple-setting-off-display-none-block-with-javascript 136 function showHide(id) { 137 var el = document.getElementById(id); 138 if( el && el.style.display == 'none') 139 el.style.display = 'block'; 140 else 141 el.style.display = 'none'; 142 } 143 setTimeout('showHide("overlay");', 2500); 144 145 </script> 146 </body> 147 </html>
1 import urllib2 2 from bs4 import BeautifulSoup 3 4 url = raw_input() 5 6 for i in range(7): 7 html = urllib2.urlopen(url).read() 8 soup = BeautifulSoup(html) 9 tags = soup('a') 10 print tags[17].string 11 url = tags[17].get('href', None)
1 <html> 2 <head> 3 <title>Welcome to the comments assignment from www.pythonlearn.com</title> 4 </head> 5 <body> 6 <h1>This file contains the sample data for testing</h1> 7 8 <table border="2"> 9 <tr> 10 <td>Name</td><td>Comments</td> 11 </tr> 12 <tr><td>Leven</td><td><span class="comments">100</span></td></tr> 13 <tr><td>Mahdiya</td><td><span class="comments">97</span></td></tr> 14 <tr><td>Ajayraj</td><td><span class="comments">87</span></td></tr> 15 <tr><td>Lillian</td><td><span class="comments">86</span></td></tr> 16 <tr><td>Aon</td><td><span class="comments">86</span></td></tr> 17 <tr><td>Ruaraidh</td><td><span class="comments">78</span></td></tr> 18 <tr><td>Gursees</td><td><span class="comments">75</span></td></tr> 19 <tr><td>Emmanuel</td><td><span class="comments">74</span></td></tr> 20 <tr><td>Christy</td><td><span class="comments">72</span></td></tr> 21 <tr><td>Annoushka</td><td><span class="comments">72</span></td></tr> 22 <tr><td>Inara</td><td><span class="comments">72</span></td></tr> 23 <tr><td>Caite</td><td><span class="comments">70</span></td></tr> 24 <tr><td>Rosangel</td><td><span class="comments">70</span></td></tr> 25 <tr><td>Iana</td><td><span class="comments">66</span></td></tr> 26 <tr><td>Anise</td><td><span class="comments">66</span></td></tr> 27 <tr><td>Jaosha</td><td><span class="comments">65</span></td></tr> 28 <tr><td>Cadyn</td><td><span class="comments">65</span></td></tr> 29 <tr><td>Edward</td><td><span class="comments">63</span></td></tr> 30 <tr><td>Charlotte</td><td><span class="comments">61</span></td></tr> 31 <tr><td>Sammy</td><td><span class="comments">60</span></td></tr> 32 <tr><td>Zarran</td><td><span class="comments">60</span></td></tr> 33 <tr><td>Rowen</td><td><span class="comments">59</span></td></tr> 34 <tr><td>Stanislaw</td><td><span class="comments">59</span></td></tr> 35 <tr><td>Maighdlin</td><td><span class="comments">57</span></td></tr> 36 <tr><td>Connan</td><td><span class="comments">56</span></td></tr> 37 <tr><td>Warrick</td><td><span class="comments">54</span></td></tr> 38 <tr><td>Diya</td><td><span class="comments">52</span></td></tr> 39 <tr><td>Lawson</td><td><span class="comments">52</span></td></tr> 40 <tr><td>Wu</td><td><span class="comments">51</span></td></tr> 41 <tr><td>Irmak</td><td><span class="comments">47</span></td></tr> 42 <tr><td>Emilija</td><td><span class="comments">47</span></td></tr> 43 <tr><td>Kayda</td><td><span class="comments">41</span></td></tr> 44 <tr><td>Ellenor</td><td><span class="comments">41</span></td></tr> 45 <tr><td>Kyra</td><td><span class="comments">41</span></td></tr> 46 <tr><td>Nikita</td><td><span class="comments">38</span></td></tr> 47 <tr><td>Kaelah</td><td><span class="comments">35</span></td></tr> 48 <tr><td>Meko</td><td><span class="comments">32</span></td></tr> 49 <tr><td>Marissa</td><td><span class="comments">31</span></td></tr> 50 <tr><td>Ayat</td><td><span class="comments">24</span></td></tr> 51 <tr><td>Sali</td><td><span class="comments">19</span></td></tr> 52 <tr><td>Hashem</td><td><span class="comments">19</span></td></tr> 53 <tr><td>Tygan</td><td><span class="comments">18</span></td></tr> 54 <tr><td>Rioden</td><td><span class="comments">17</span></td></tr> 55 <tr><td>Cruiz</td><td><span class="comments">16</span></td></tr> 56 <tr><td>Caoilfinn</td><td><span class="comments">13</span></td></tr> 57 <tr><td>Ewen</td><td><span class="comments">8</span></td></tr> 58 <tr><td>Baighley</td><td><span class="comments">7</span></td></tr> 59 <tr><td>Ramone</td><td><span class="comments">1</span></td></tr> 60 <tr><td>Kyran</td><td><span class="comments">1</span></td></tr> 61 <tr><td>Noelani</td><td><span class="comments">1</span></td></tr> 62 </table> 63 </body> 64 </html>
1 import urllib2 2 from bs4 import BeautifulSoup 3 4 url = raw_input() 5 html = urllib2.urlopen(url).read() 6 soup = BeautifulSoup(html) 7 tags = soup('span') 8 count, sum = 0, 0 9 for tag in tags: 10 count += 1 11 sum += int(tag.string) 12 print count, sum