借助ltp语义分析提取特征,之后,文本生成
"""
地点-哪里有做-业务-的(正规|靠谱)-公司?
地点-做-业务-的(正规|靠谱)-公司(有哪些?|的联系方式是什么?|哪家口碑好值得信赖?)
地点-做-业务-(怎么能省钱?|费用是多少?|需要注意什么?|有哪些流程?)
地点-业务-(靠谱吗?到底怎样?)
地点-(最好的|靠谱的)业务-是哪家?
"""
p, b = '深圳市', ['广告设计', '网络推广']
ltp_model = ['地点-哪里有做-业务-的(正规|靠谱)-公司?', '地点-做-业务-的(正规|靠谱)-公司(有哪些|的联系方式是什么|哪家口碑好值得信赖)?',
'地点-做-业务-(怎么能省钱|费用是多少|需要注意什么|有哪些流程|靠谱吗|到底怎样)?', '地点-(最好的|靠谱的)业务-是哪家?']
r_l = []
for s in ltp_model:
s = s.replace('地点', p).replace('-', '')
for i in b:
r_l.append(s.replace('业务', i))
def deal_first_splittag_str(i):
s_l_1 = []
psl, psr = i.find('(', 0), i.find(')', 0)
sl, sm, sr = i[0:psl], i[psl + 1:psr], i[psr + 1:]
l = sm.split('|')
for ii in l:
s_l_1.append('%s%s%s' % (sl, ii, sr))
return s_l_1
def deal_first_splittag(s_l_0):
s_l_1 = []
for i in s_l_0:
psl, psr = i.find('(', 0), i.find(')', 0)
if psl == -1:
s_l_1.append(i)
else:
sl, sm, sr = i[0:psl], i[psl + 1:psr], i[psr + 1:]
l = sm.split('|')
for ii in l:
s_l_1.append('%s%s%s' % (sl, ii, sr))
return s_l_1
while True:
f = 0
for i in r_l:
if '(' in i:
f = 1
del r_l[r_l.index(i)]
l = deal_first_splittag_str(i)
r_l += l
if f == 0:
break
d = 9
def gen_r(uid):
from pymongo import MongoClient
def select_to_dic(k, collection_name,
database=MongoClient("mongodb://192.168.2.50:27017/", username='admin', password='admin')[
"personas_test"], where={}, fields={}):
collection, r = database[collection_name], {}
if fields == {}:
cursor = collection.find(where)
else:
cursor = collection.find(where, fields)
try:
for doc in cursor:
r[doc[k]] = doc
finally:
cursor.close()
return r
uid = int(uid)
user_info = select_to_dic('_id', 'Analyse_user',
fields={'city': 1, 'city': 1, 'province': 1, 'core_keyword': 1, 'extract_keyword': 1,
'biz': 1},
where={'uid': uid})
if user_info == {}:
return ['uid not in db']
for k in user_info:
d = user_info[k]
p = '%s%s' % (d['province'], d['city'])
b = d['core_keyword'] + d['extract_keyword'] + d['biz']
b = [i for i in set(b)]
"""
地点-哪里有做-业务-的(正规|靠谱)-公司?
地点-做-业务-的(正规|靠谱)-公司(有哪些?|的联系方式是什么?|哪家口碑好值得信赖?)
地点-做-业务-(怎么能省钱?|费用是多少?|需要注意什么?|有哪些流程?)
地点-业务-(靠谱吗?到底怎样?)
地点-(最好的|靠谱的)业务-是哪家?
"""
# p, b = '深圳市', ['广告设计', '网络推广']
ltp_model = ['地点-哪里有做-业务-的(正规|靠谱)-公司?', '地点-做-业务-的(正规|靠谱)-公司(有哪些|的联系方式是什么|哪家口碑好值得信赖)?',
'地点-做-业务-(怎么能省钱|费用是多少|需要注意什么|有哪些流程|靠谱吗|到底怎样)?', '地点-(最好的|靠谱的)业务-是哪家?']
r_l = []
for s in ltp_model:
s = s.replace('地点', p).replace('-', '')
for i in b:
r_l.append(s.replace('业务', i))
def deal_first_splittag_str(i):
s_l_1 = []
psl, psr = i.find('(', 0), i.find(')', 0)
sl, sm, sr = i[0:psl], i[psl + 1:psr], i[psr + 1:]
l = sm.split('|')
for ii in l:
s_l_1.append('%s%s%s' % (sl, ii, sr))
return s_l_1
def deal_first_splittag(s_l_0):
s_l_1 = []
for i in s_l_0:
psl, psr = i.find('(', 0), i.find(')', 0)
if psl == -1:
s_l_1.append(i)
else:
sl, sm, sr = i[0:psl], i[psl + 1:psr], i[psr + 1:]
l = sm.split('|')
for ii in l:
s_l_1.append('%s%s%s' % (sl, ii, sr))
return s_l_1
while True:
f = 0
for i in r_l:
if '(' in i:
f = 1
del r_l[r_l.index(i)]
l = deal_first_splittag_str(i)
r_l += l
if f == 0:
break
return r_l
from wsgiref.util import setup_testing_defaults, request_uri
from wsgiref.simple_server import make_server
# A relatively simple WSGI application. It's going to print out the
# environment dictionary after being updated by setup_testing_defaults
def simple_app(environ, start_response):
setup_testing_defaults(environ)
status = '200 OK'
headers = [('Content-type', 'text/plain; charset=utf-8')]
start_response(status, headers)
r_uri = request_uri(environ, include_query=True)
if 'test_dump' in r_uri:
return [("%s: %s\n" % (key, value)).encode('utf-8') for key, value in environ.items()]
elif 'uid' in r_uri:
try:
uid = int(r_uri.split('/')[-1])
if uid > 0:
r_l = gen_r(uid)
return [("%s: %s\n" % (i, r_l[i])).encode('utf-8') for i in range(len(r_l))]
else:
return [('%s%s' % ('BAD-REQUEST ', r_uri)).encode('utf-8')]
except Exception as e:
return [('%s%s%s' % (e, ' retry...,please.... ', r_uri)).encode('utf-8')]
else:
return [('%s%s' % ('dev-ING- ', r_uri)).encode('utf-8')]
with make_server('', 8000, simple_app) as httpd:
print("Serving on port 8000...")
httpd.serve_forever()