第1章 1.6 从结构化字符串中提取数据
一、例子:
>>> import delorean
>>> from decimal import Decimal
>>>
>>> log = '[2018-05-05T11:07:12.267897] - SALE - PRODUCT: 1345 - PRICE: $09.99'
>>> devide_it = log.split(' - ')
>>> print(devide_it)
['[2018-05-05T11:07:12.267897]', 'SALE', 'PRODUCT: 1345', 'PRICE: $09.99']
>>> timestamp_string, _, product_string, price_string = devide_it
>>> timestamp = delorean.parse(timestamp_string.strip('[]'))
#string.strip('[]')可以将字段串前后的'['或者']'去掉,如下:
>>> print(timestamp_string.strip('[]'))
2018-05-05T11:07:12.267897
>>> product_id = int(product_string.split(': ')[-1])
>>> price = Decimal(price_string.split('$')[-1])
>>> timestamp, product_id, price
(Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC'), 1345, Decimal('9.99'))
>>> print(timestamp)
Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC')
>>> timestamp.shift('Asia/Shanghai')
Delorean(datetime=datetime.datetime(2018, 5, 5, 19, 7, 12, 267897), timezone='Asia/Shanghai')
#通过shift('Asia/Shanghai')转换成上海时区
>>> timestamp.datetime
datetime.datetime(2018, 5, 5, 19, 7, 12, 267897, tzinfo=<DstTzInfo 'Asia/Shanghai' CST+8:00:00 STD>)
>>> timestamp.date
datetime.date(2018, 5, 5)
>>> print(timestamp.date)
2018-05-05
#提取日期,但未找到提取时间的方法
>>> date_string = timestamp.date
>>> date_string
datetime.date(2018, 5, 5)
>>> print(date_string)
2018-05-05
>>> str(date_string)
'2018-05-05'
>>> print(timestamp.datetime)
2018-05-05 19:07:12.267897+08:00
#可以通过将datetime转成字符串之后,通过截取的方式取时间要素
>>> log = '[2018-05-05 11:07:12.267897]'
>>> timestamp = delorean.parse(log.strip('[]'))
>>> print(timestamp)
Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC')
>>> import datetime
>>> timestamp += datetime.timedelta(hours=2)
#增加2个小时
>>> print(timestamp)
Delorean(datetime=datetime.datetime(2018, 5, 5, 13, 7, 12, 267897), timezone='UTC')
>>> timestamp += datetime.timedelta(days=-2)
#增加2天
>>> print(timestamp)
Delorean(datetime=datetime.datetime(2018, 5, 3, 13, 7, 12, 267897), timezone='UTC')
>>> delorean.parse('2018-05-06')
Delorean(datetime=datetime.datetime(2018, 6, 5, 0, 0), timezone='UTC')
#默认天数在月份之前,所以默认得到的日期不准确
>>> delorean.parse('2018-05-06', dayfirst=False)
Delorean(datetime=datetime.datetime(2018, 5, 6, 0, 0), timezone='UTC')
#通过dayfirst=False参数后可以得到正确的日期
>>> d = delorean.Delorean()
>>> print(d)
Delorean(datetime=datetime.datetime(2022, 4, 8, 12, 29, 9, 273825), timezone='UTC')
>>> d = d.shift('Asia/Shanghai')
>>> print(d)
Delorean(datetime=datetime.datetime(2022, 4, 8, 20, 29, 9, 273825), timezone='Asia/Shanghai')