2.1 python使用MongoDB 示例代码

import pymongo


client = pymongo.MongoClient('localhost', 27017)  # MongoDB 客户端
walden = client['walden']  # 数据库中创建的名称
sheet_tab = walden['sheet_tab']  # 创建Table

# 演示代码1
# path = '/Users/qiongyanzhu/Documents/Plan-for-combating-master/week2/2_1/2_1code_of_video/walden.txt'
# with open(path, 'r') as f:
#     lines = f.readlines()
#     for index, line in enumerate(lines):
#         data = {
#             'index': index,
#             'line': line,
#             'words': len(line.split())
#         }
#         print(data)
#         sheet_tab.insert_one(data)

# 演示代码2
# for item in sheet_tab.find({'words': 0}):
#     print(item)

# 演示代码3
# $lt/$lte/$gt/$gte/$ne
for item in sheet_tab.find({'words': {'$lt': 5}}):
    print(item)

# 演示代码4
for item in sheet_tab.find():
    print(item['line'])

  

from bs4 import BeautifulSoup
import requests
import pymongo


client = pymongo.MongoClient('localhost', 27017)
xiaozhu = client['xiaozhu']
sheet_tab = xiaozhu['sheet_tab']

url_as = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(number)) for number in range(1, 4)]


def insert_house_info(url_s):
    for url_a in url_s:
        # 获取页面数据
        wb_data = requests.get(url_a)
        # 采用lxml解析引擎,解析数据
        soup = BeautifulSoup(wb_data.text, 'lxml')
        prices = soup.select('span.result_price')
        titles = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span')
        urls = soup.select('#page_list > ul > li > div.result_btm_con.lodgeunitname')

        for price, title, url in zip(prices, titles, urls):
            info = {
                'price': int(price.get_text()[1:len(price.get_text())-2]),
                'title': title.get_text(),
                'url': url.get('detailurl')
            }
            # print(info)
            sheet_tab.insert_one(info)


def find_house():
    for info in sheet_tab.find({'price': {'$gt': 500}}):
        print(info)


insert_house_info(url_as)
find_house()

  

posted @ 2016-07-01 20:56  莫西西杯  阅读(268)  评论(0编辑  收藏  举报