在GAE中用Python编写webapp进行Post数据采集
#!/usr/bin/env python # -*- coding: cp936 -*- # # Copyright 2007 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import webapp2 #from lxml import etree from google.appengine.api import urlfetch import re class MainHandler(webapp2.RequestHandler): def get(self): #self.response.write(urlfetch.fetch('https://www.cnblogs.com/').content)#Get form_data = 'ssss=aaa&bb=ccc' result = urlfetch.fetch(url='https://www.cnblogs.com', payload=form_data, method=urlfetch.POST, follow_redirects=False, headers={'Content-Type': 'application/x-www-form-urlencoded'}) #self.response.headers['Content-Type'] = 'application/json' html="<a href='javascript:void(0)' onclick='opennewpage('54999')'>ddd</a><a onclickk='opennewpage('123456')'>aaa</a>" reg=r"opennewpage\('(\d+)'\)"#正则 result='Result:<br />' for m in list(set(re.findall(reg,html))): result=result+ m+'<br />' self.response.write(result) #ids= result.content.xpath("//a[start-with(@onclick,'opennewpage')]") #for i in ids: # print(i.text) #self.response.write(result.content) app = webapp2.WSGIApplication([ ('/', MainHandler) ], debug=True)