tempValue = 0 def replaceTextByTable(pageText, headerText, footerText, tableText, preFooterPosition=0): headerPosition = pageText.find(headerText, preFooterPosition) footerPosition = pageText.find(footerText, preFooterPosition) while True: # 如果找到的footerPosition在headerPosition前面,重新找下一个footerPosition if footerPosition <= headerPosition: footerPosition = pageText.find(footerText, footerPosition + 1) # 如果找到的前后下标比table的长度长,即找下一个headerPosition elif abs(footerPosition - headerPosition + len(footerText) - tempValue) > len(tableText): while True: tempHeaderPosition = pageText.find(headerText, headerPosition + 1) if tempHeaderPosition != -1 and abs( footerPosition - tempHeaderPosition + len(footerText) - tempValue) > len(tableText): headerPosition = tempHeaderPosition else: break # 如果找到的前后下标比table的长度短,即找下一个footerPosition elif abs(footerPosition - headerPosition + len(footerText) - tempValue) < len(tableText): while True: tempFooterPosition = pageText.find(footerText, footerPosition + 1) if tempFooterPosition != -1 and abs( footerPosition - tempFooterPosition + len(footerText) - tempValue) < len(tableText): footerPosition = tempFooterPosition else: break else: break oldStr = pageText[headerPosition: footerPosition + len(footerText)] newStr = pageText.replace(oldStr, tableText, 1) print("-------------------") print("ori str:" + pageText) print("old str:" + oldStr) print("new str:" + newStr) print("-------------------") return newStr, footerPosition + len(footerText) # # 表格前面字符串在文档中出现的下标数组 # headerPositions = getPositions(pageText, headerText) # # 表格最后面的字符串在文档中出现的下标数据 # footerPositions = getPositions(pageText, footerText) # # # 表格的字符串长度 # tableLength = len(tableText) # # gapLength为前后两个下标中间的字符串长度与tableText的长度差,长度差越小说明该前后下标所在的字符串就是需要替换的字符串 # minLength = tableLength # # 记录前后两个下标位置 # gapHeaderPosition = -1 # gapFooterPosition = -1 # # # 前后下标两两计算长度,与tableText的长度进行比较 # for pHeader in headerPositions: # for pFooter in footerPositions: # indexDistance = abs(int(pHeader) - int(pFooter)) # # 此处要更新前后两个下标位置,条件是前后两个下标 # gap = abs(indexDistance - tableLength) # if gap < minLength: # minLength = gap # gapHeaderPosition = pHeader # gapFooterPosition = pFooter # # # 如果找到了合适的前后两个下标,则进行字符串替换操作 # if gapHeaderPosition != -1 and gapFooterPosition != -1: # oldStr = pageText[gapHeaderPosition: gapFooterPosition + len(footerText)] # newStr = pageText.replace(oldStr, tableText) # print("oldStr:" + oldStr) # print("originStr:" + pageText) # print(" newStr:" + newStr) # return newStr # else: # return pageText # # print(headerPositions) # # print(footerPositions) # # # def getPositions(pageText, subText): # positions = [] # start = 0 # while True: # # 查找指定字符串在文档中首次出现的下标 # position = pageText.find(subText, start) # # 如果返回值为-1,则代表没有找到,则循环结束 # if position == -1: # break # # 如果找到后将下标保存到数组 # positions.append(position) # # 重新计算起始查找的下标,+1是从下标后面一个字符串开始查找,否则查询出来会一直是0 # start = position + 1 # return positions originStr = "hello word + you. Do you want word + you have word + you?" newStr1, preFooterPosition1 = replaceTextByTable(originStr, "word", "you", "XXXXXXXXXX") newStr2, preFooterPosition2 = replaceTextByTable(newStr1, "word", "you", "XXXXXXXXXX", preFooterPosition1) newStr3, preFooterPosition3 = replaceTextByTable(newStr2, "word", "you", "XXXXXXXXXX", preFooterPosition2) # replaceTextByTable("hello word, i have a word give you. Do you want have?", "word", "you", # "XXXXXXXXXXXXXXXXXXXXX") # 2:4 # replaceTextByTable("hello word, i have a word give you. Do you want have?", "word", "you", # "XXXXXXXXXXXXXXXXXXXXXXXXXXXX") # 1:3 # replaceTextByTable("hello word, i have a word give you. Do you want have?", "word", "you", # "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") # 1:4