#筛选类型数据 class ShaiXuanLeiXing: def __init__(self,file_name): self.file_name = file_name self.mubiao_list = [] self.sheqi_list=[] self.read_list=self.readText(self.file_name) self.end_num = 0 # 读取文件,以列表形式获取所有内容 def readText(self,file_name): with open(file=file_name, mode='r',encoding="utf-8") as f: read_list = f.readlines() return read_list #处理DataType所在行的数据,获取dataType的值 def getDataType(self,datatype_hang): datatype_list = datatype_hang.split(":") print("datatype_list:") print(datatype_list) ziduan_zhi_list = datatype_list[1].split(",") print("ziduan_zhi_list:") print(ziduan_zhi_list) ziduan_zhi = ziduan_zhi_list[0].strip() print("ziduan_zhi:") print(ziduan_zhi) print(type(ziduan_zhi)) return ziduan_zhi #获取要从保留的内容中删除,添加到删除列表中数据 def getDeleteDataList(self): print("处理前self.mubiao_list个数") print(len(self.mubiao_list)) print(self.mubiao_list) zhongjian_list = [] num = 0 while True: num = num + 1 print("删除目标,保存数据到中间列表中,循环处理第%s次" % str(num)) #获取 self.mubiao_list 中最后一项的内容 zuihouyixiang = self.mubiao_list[-1] zuihouyixiang_pan= zuihouyixiang.strip() #去掉前后空格 print("zuihouyixiang:") print(zuihouyixiang) self.mubiao_list.pop() # self.mubiao_list删除最后一项 zhongjian_list.append(zuihouyixiang) # 判断该行内容是否是""data": {",如果是则终止循环 if zuihouyixiang_pan == '"data": {': print("zuihouyixiangdai{") print(zuihouyixiang) break # 获取 self.mubiao_list 中最后一项的内容 zuihouyixiang = self.mubiao_list[-1] self.mubiao_list.pop() # self.mubiao_list删除最后一项 zhongjian_list.append(zuihouyixiang) print("处理后self.mubiao_list个数") print(len(self.mubiao_list)) print("zhongjian_list个数") print(len(zhongjian_list)) return zhongjian_list #将zhongjian_list中的内容导向存储到self.sheqi_list中 def getSheQiList(self,zhongjian_list): print("zhongjian_list:") print(zhongjian_list) zhongjian_list_len = len(zhongjian_list) for i in range(0,zhongjian_list_len): self.sheqi_list.append(zhongjian_list[-1]) #将zhongjian_list的最后一项加入到self.sheqi_list zhongjian_list.pop() #删除zhongjian_list的最后一项 print("self.sheqi_list:") print(self.sheqi_list) # 向下继续查找,找到第一个”},“,则停止 def xiangxia(self): k=0 for j in range(0,10): k=k+1 if (self.end_num+j < self.read_list_len): print("处理到self.read_list中第%s下表的的内容" % str(self.end_num+j)) one_hang = self.read_list[self.end_num+j] self.sheqi_list.append(one_hang) if "}" in one_hang.strip(): print("one_hang},") print(one_hang) break print("k:") print(k) return k #写入列表数据到文件中 def writeListToTxt(self,file_name,list_data): with open(file_name,"w",encoding="utf-8") as f: for one in list_data: f.write(one) #处理一个实体对象,即一个中括号对象 # { # "data": { # "tagList": [ # "测试" # ], # "roomLiveTitle": "直播间标题-大会直播", # "coverOne": "\/tojoy\/tojoyClould\/backstageSystem\/image\/1633680869417.jpg", # "screenShot": "\/tojoy\/tojoyClould\/serverUpload\/202207\/14\/image\/1657783758511.jpg", # "roomLiveId": 4003879, # "coverTwo": "\/tojoy\/tojoyClould\/backstageSystem\/image\/1633680876038.jpg", # "status": 4, # "videoPlayUrl": "http:\/\/1259323955.vod2.myqcloud.com\/685cdfeevodcq1259323955\/b520d1e2387702293080090030\/f0.mp4?oss-cn-beijing.aliyuncs.com" # }, # "dataIndex": 1, # "dataType": 4, # "dataSource": 3 # }, def handleOneShiTi(self): # 遍历每一行 #第一次开始处理 for i in range(0,self.read_list_len): print("处理到第%s行的内容" % str(i)) #读取一行内容 one_hang = self.read_list[i] #如果 dataType 不在该行中 if ziduan.lower() not in one_hang.lower(): # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 else: # 如果判断 dataType 在该行中,则判断dataType的值 print("遇到第一个%s值不是%s的%s行的内容" % (ziduan, ziduanzhi,i)) print(one_hang) data_type_zhi = self.getDataType(datatype_hang=one_hang) print(data_type_zhi) #如果字段值不是4 if data_type_zhi != ziduanzhi: break # 终止循环 else: # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 print("处理到self.mubiao_list第%s个下标" % str(self.end_num)) print("从断点下标%s开始处理" % str(self.end_num)) zhongjian_list = self.getDeleteDataList() print("self.mubiao_list_hou:") print(self.mubiao_list) print(len(self.mubiao_list)) # 将zhongjian_list的内容倒向保存到self.sheqi_list中 self.getSheQiList(zhongjian_list) #向下继续查找,找到第一个”}“,则停止 k = self.xiangxia() self.end_num = self.end_num+k print("接着从断点 %s行开始处理" % str(self.end_num)) #循环处理后续内容 while self.end_num <self.read_list_len: # 第二次开始处理 # 接着从self.end_num开始读取,此时需要再进行判断 for i in range(self.end_num, self.read_list_len): print("处理到第%s行的内容" % str(i)) # 读取一行内容 one_hang = self.read_list[i] # 如果 dataType 不在该行中 if ziduan.lower() not in one_hang.lower(): # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 else: # 如果判断 dataType 在该行中,则判断dataType的值 print("遇到一个%s值不是%s的%s行的内容" % (ziduan, ziduanzhi, i)) print(one_hang) data_type_zhi = self.getDataType(datatype_hang=one_hang) print(data_type_zhi) # 如果字段值不是4(预期值) if data_type_zhi != ziduanzhi: break # 终止循环 else: # 将一行内容添加到self.mubiao_list,即要保留的内容 self.mubiao_list.append(one_hang) self.end_num = self.end_num + 1 print("处理到self.mubiao_list第%s个下标" % str(self.end_num)) print("从断点下标%s开始处理" % str(self.end_num)) #如果起始值大于等于数列长度 if self.end_num>=self.read_list_len: print("从断点下标%s超过超过数列%s长度,终止循环" % (str(self.end_num),str(self.read_list_len))) break #就终止while循环 zhongjian_list = self.getDeleteDataList() print("self.mubiao_list_hou:") print(self.mubiao_list) print(len(self.mubiao_list)) # 将zhongjian_list的内容倒向保存到self.sheqi_list中 self.getSheQiList(zhongjian_list) # 向下继续查找,找到第一个”},“,则停止 k = self.xiangxia() self.end_num = self.end_num + k print("接着从断点 %s行开始处理" % str(self.end_num)) def handleFile(self,ziduan="dataType",ziduanzhi="4"): print("self.read_list:") print(self.read_list) #获取数列的长度 self.read_list_len = len(self.read_list) print("self.read_list_len:") print(self.read_list_len) self.handleOneShiTi() # #将最后一行的内容写入到mubiao_list中 # self.mubiao_list.append(self.read_list[-1]) print(self.read_list_len) print(len(self.mubiao_list)) print(len(self.sheqi_list)) self.writeListToTxt(file_name="mubiao_%s_%s.txt"%(ziduan,ziduanzhi), list_data=self.mubiao_list) self.writeListToTxt(file_name="sheqi_not_%s_%s.txt"%(ziduan,ziduanzhi), list_data=self.sheqi_list) if __name__ == '__main__': #获取某个dataType的数据 file_name = "new 1.txt" ziduan = "dataType" ziduanzhi = "4" sx = ShaiXuanLeiXing(file_name) sx.handleFile(ziduan=ziduan,ziduanzhi=ziduanzhi)