python学习笔记(一) python3: urlopen()使用出现TypeError: can't convert 'bytes' object to str implicitly
最近写程序时采用urllib.request中的urlopen来读取网页文本文件,结果出现了TypeError,现在将问题和解决办法记录如下,希望能为遇到相似问题的朋友提供帮助:
使用的python版本为3.5.2
1 # -*- coding: UTF-8 -*- 2 #!/usr/bin/python 3 # Filename: urllib.py 4 import random 5 from urllib.request import urlopen 6 import sys 7 8 WORD_URL = "http://learncodethehardway.org/words.txt" 9 WORDS = [] 10 11 PHRASES = { 12 " class %%%(%%%):": 13 "Make a class named %%% that is-a %%%.", 14 "class %%%(object):\n\tdef__init__(self,***)": 15 "class %%% has-a __init__ that takes self and *** parameters.", 16 "*** = %%%()": 17 "Set *** to an instance of class %%%.", 18 "***.***(@@@)": 19 "From *** get the *** function, and call it with parameters self,@@@.", 20 "***.*** = '***'": 21 "From *** get the *** attribute and set it to '***'." 22 } 23 24 PHRASES_FIRST = False 25 if len(sys.argv) == 2 and sys.argv[1] == "english": 26 PHRASES_FIRST = True 27 28 for word in urlopen(WORD_URL).readlines(): 29 WORDS.append(word.strip()) 30 31 def convert(snippet, phrase): 32 class_names = [w.capitalize() for w in 33 random.sample(WORDS, snippet.count("%%%"))] 34 other_names = random.sample(WORDS, snippet.count("***")) 35 results = [] 36 param_names = [] 37 38 for i in range(0, snippet.count("@@@")): 39 param_count = random.randint(1,3) 40 param_names.append(', '.join(random.sample(WORDS, param_count))) 41 #param_names.append(', '.join('%s' %id for id in random.sample(WORDS, param_count))) 42 43 for sentence in snippet, phrase: 44 result = sentence[:] 45 print(result) 46 47 for word in class_names: 48 result = result.replace("%%%", word, 1) 49 50 for word in other_names: 51 result = result.replace("***",word, 1) 52 53 for word in param_names: 54 result = result.replace("@@@", word, 1) 55 56 results.append(result) 57 58 return results 59 60 try: 61 while True: 62 snippets = list(PHRASES.keys()) 63 random.shuffle(snippets) 64 65 for snippet in snippets: 66 phrase = PHRASES[snippet] 67 question, answer = convert(snippet, phrase) 68 if PHRASES_FIRST: 69 question, answer = answer,question 70 71 print (question) 72 73 input ("> ") 74 print ("ANSWER: %s\n\n" % answer) 75 except EOFError: 76 print ("\nBye!")
出现的问题为:TypeError: can't convert 'bytes' object to str implicitly
查阅资料之后发现,urlopen()返回的是一个bytes对象,如果需要对他进行字符串的操作的话,需要显式地将其转换成字符串,否则会出现上述问题。解决办法如下:
我们在读取网页内容的时候就直接将其转换成编码方式为‘utf-8’,则可以继续使用了。将上述的代码中29行改为:
WORDS.append(word.strip().decode('utf-8'))
问题解决。