爬虫第五篇:图片验证码识别

在阿里云的云市场上买了一个图形验证码识别的接口,用这个接口写了python图形验证码识别的代码:

https://market.aliyun.com/products/57124001/cmapi031324.html?spm=5176.730005.productlist.d_cmapi031324.77823524enObx1&innerSource=search_%E5%9B%BE%E7%89%87%E9%AA%8C%E8%AF%81%E7%A0%81%E8%AF%86%E5%88%AB#sku=yuncode2532400001

第一种方法

import urllib
import json

host = 'http://tupian.market.alicloudapi.com'
path = '/clouds/ocr/webImage'
method = 'POST'
appcode = 'XXXXXXXX'
querys = ''
url = host + path


data = {"image": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAoAPoDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwB+tiHxDqSaRaMBasA106DdtHUIAO5x+VX4tFsfBd7LfophhW3UvBK+1doJO4dgenJ/MVh29tb6PdRxw6u0d7cyHEowTNJ7DoP0GMVqeJNauLbRopLnR5dUuhG8Uix8rIDjaChB4AHOM1qyTpfENsdQ8K3tpDDZXlxdxoyJPLJ5KqxBO4xkH7ucHPUCo7Swntr25T7eXsWRPItioxAAuMBurZ681m6Zr+lXOm2txZTeRIIwFTZgjH8LKcZHH+GKluPF2lWUtvBNEJLu5nCkRsXIOOBxwF780rAafhaCH7Abi+tGspVlYG2mOXAyedw4we1XLy8tbi1uLFJo/MjYzwbnK7cclcgckjjHevPfGfj2z0XWW02O2mvr0oqy28bFUUnkKSOSea5tvH1vNqOnvqmiarpn2d8H7NNlZAcAh0kA3cA85BGTg0roZ3Hm31lq5jnuxdacEYyW1tJ+7cOvDb+oKnuPTBqlpujS3Gn29rqzMyKoMU0sod5cHPzMBkECupudIh1S+m086ncQRXdkbdbePC+VISGWUHBKtxg47e9cZP4gtdC0CPw/deKNIl1O3l5vmSWTaBwF+RTn/OTTvqBo3kem+ErqO41CExWd6hRLgA8MCNuT6EZGfbmjTdJ0bRtalPhyGSeVn3tIWZYsMvfdyOew9OAar66tnq1lYa0ZINStbOJIneKRjCXwwLsmMKzZHUDoMAVPY+I59QtLe5WK737BGsUy4VUU8LxxkDv1piNiXSoLGb+3dYureeMjKkthIWGc/QYxz39KmupYhLHJ58SCQDaSw+cYzhfXj864oeK9N8QWdyiw+Y8RZJLeUBI2yGGCwPIPY8dOgqxYaLaeKZNN1Bre9hkskjRYFulMCFCcYHUkdM55HagDvLG30+yjvYI54pr8TLJJLFn7jLlAfes3SvCdjpVmDaahExnle5VXlVShc/dwcHg1OtrpFpr11BYTwi+miSeeIja7AfKGPqAeM15t4z8M6nq/xMs72wtFlscwKZElQD5MeZ1PXqT9alsZ3+m6o2oXuoQDCT2MvkzDaQQ2M/iPen6zeXt7pUttaXNvb3qMpR5k3J1Gcj3GfxxWjrWraJo14Lq7aSyiuZCr3Pkgq0mCQTt524B5NUNQFlBpa3kNut/Jex+XZjn5ncZVhjrxk89hTQjA8SAeJdbi0m2WO2toAr3MiHAOM4UD1PBP0rfgnlsrhm1KVrmJQqRygDdgKAAVHXv/AJ6ZMkGo+HfFVidB0CRbOeF5ryO9RpFMncI33scZ7+nSuis72fTtbt9Q1Cy+xC6hklWJuU2qDu5XOBhsgEUXAEu205b3UdW8o6QiK9v9iJed1x8+5cHABxjHbOavNbW2oaaXS7UKyDEAjO45Pr2456VwngbX45J9TXTfENzqkFuqvtms/J+8xB+YHOSWH8OMCrmt/EXT9L8Z2WgLocs0z+Sjy/aggy4B3bdhycH1FK4zd0mymtNc8ia6tEsTB5UaycStNuznceCNvaqms3kKDFsvmSIW3yDhQOh+tZWsPYSR3+pa+Slpps3mIqA5D/dXGerGuTX4py3LO9l4Oa5tB8u8tIxx+AxTvYDd02W6t7ttUEpuMKEWGdt0BAPynYOuemRitD/hE9Ki1q+uL2OC6kvSJMpGfLZT/DsYnBH4isXRvE1vPpUd9oNncaHc6PMim1MXnpOj7sxYPOG557YJyOtT3HiHUbm3kivki0+zunVxHECJI13Z+SQH5eDjjt+dNagbVvd2OiNJbXi2ptdQUxWuYzK63K9IyF5XcMYY8cHNRzaW6eOLe6s5rEHSw8Vx5sXmBmdRgBMgqygnnPvyKoanp+n+E5rW5tZI7oXUZZxEdzZYggMeuMiqcuratbaNKL3VxZ2k0jlNsalcMAiuW+9vByDyVPy8AjkYjsbTUE0+yv4J4zfiAyTwNKo35KjOz3JB4HFZWpT6nd+FDrWixbJLgKYkuUxuORkNzwQM8e1Zc13YeFNIgMOqS6nNbBI3Eqnz5Uc8kAnorDH06E1uw3F/q1gJY5I7eCME26TP8ok6BnHfntQBDqaazonhiGTQLaK6v5Jg08b8AqwOSOR3xV6ygmawtryaL7PqJiVpoByA5HIH41b8IXkl1p8kmrQyQXQV4JUliKKsg/jjz/AeoPpWtDaIl6IDPCzA5I3/AHuMnB+lK4zFga5kJaSIQt6ipft12OMpx/sGte8c3MsLW1sIrcxhlkfHzqeje9UDpG47jPJk8nA/+vQIuSeH4Reh7SU2sIxtQKHZT3O5s9aztZ8Iwf8ACSRaifE93aalcjcomcFZyikf6rI3BVycLj1qS8lsPhxpNpaaN4euryzkld5Rbzb3QcFm+cksT2Ge1PWSDxTFY6te6ZJbqgY28N1EBNHng56lcgdB2qbjMPxhcvb6baR6VYW93cS3KQyXj2u5AG6s45wAAeTkCqVh4G0e4l/tC+WW1lRwS6zMryg56HoAPSt/UbW+u/Ks7C6W2nEyuP3IkXywclSD6jir2pC/uL+zjt7SC7Ek4+0KSUWOPGSU9Tx0qr2A8e8V6Hq+n/EO58U+F1XUjHP50iKBI8LsOQ6dSCDkEetadj8V7TVpG03xLZyadcSDYJJEWW3VvWSN1JA+mcVS8Q+Pdb0DxDeRT+H4ohDcMkUy7oXeNWwMleoIFcxrGo6z8UddskttGjil/wBWGiBOcnOWc/1qBnpXizV9e0Xwz4jknkikU7ES4GPNSVztfBAGBtz0ry3wQngto7t/FMx+0ZAt45BKIj6lmjBYflXsWq6ba+JNO1zwdau51K0t4VLOMJJME3KVPocHmvLvAt14KtTPo3jnSRHIs3F0yyBk7FW2EMPY8ih7gM1S60nwbr9lqXhXVIbuwvI2S7slkLqFyAyODglSDkZGcj2r1O10a6tLu0vbLV1fTWtzdxxtblHRWjLY3biDgYOcCuOvfC3wqu5CtnrU0UskgS3itLkybyTgAqyEj8WFes6jYw6H4Evf3Jxa6XLEJpCNzKiEZwDxxxTTsI8C8LeOPFFxqc1l/pOvSXSFIrO8dp1BzkHDZ6Yrr0vvido11E2o6FaJp00qJIltbQgKCcDmL5l+pqh+z/ZGbxLql4se+S3tQFOM4LN/gD+Ve73ESLIJolZZGyXUD5Tz0GKSGcP4v0jWNE09rrwzaNqt+8whZJ4vMaKPknDZBKkgfia8v0zxZ4i0/wAbW2kXej6ZZXk98gmjNku/fIVBPJOCQRXvO67XV5pJ7y2OmvEvlQhD5qyZ5JPcYrwvUf8ATv2igigtt1RE45IK4z+RB/Khgeraxr9vYCdtdtY7CSS4e2t7ON/O+0xghQ6quc5DDI7d64Lxt4s1fWvF0PgbwqUgEcgt5JYVAZ5AMMAf4UUZHHoa73XrnTDbaXcyW8xuZ2inQyRfvbZHdcEgDK5JCkfXPSvJvh1cxWHxvlW9lETtcXMKySHG2T5sc+pIx+NDEHi/4f658PtMt9ftfEMlwxmVJXi3RtG5BIPU7hwea9F0XxDJ4n+HE+u30sT3kWnXccqlsMdqYZ1H0I/E1T+Ouv2EHhOHRY5d15cXAcxkEMqpkbiD2J4B79qTw5ol7pPwOuomjC3N3p1xdbCvzlCCyj8QufxFAzhfhVHJ/Yfiq4jgM5jhhJjGck5YgDBHJwcfSm6oGuPjlpsexlmFzZrtfqrBUwD9OB+FUPhx440/wit/DqFhPdJclHQQkZ3rnAOe3Jqbw7Peaz8cLW4v4vKu5b9nki/55kAkj6j+lAHpniTQb/xR4dutJTUrGVHw1qFj8t1eMtgSHochjg/SvPtM8R+KfAWlHRdV0OVtORnG5coV3H5sSLwefWvRfFWo6t4c0qTUtMsFvbo3KxyhoyVWPaT26cj9a4K4+MWsXFpcad/YFkr3CMjLlmHIxnaeM03uI7Twrrmna7or3enTzzfZpo2uLe8K+YjfNsJZQC6/eHX1B60/W7HUPEazXuq6ctk7thYu0keMK6kdiPx4rI+Efgq8ttO1LUNXSW1gv0EMca4WTry4B6Yzwa29KubRvCursTfXE+jXM9pKj4eSURc5GMZJHanFgzmdQ8KWv2dLq2u72A7Q0myXjHTn1rR1Lw0W8N29qkupajZwkutpDIvAlYF2X5cnJO7BJ9q1PLmvotLjigkiN3EtyGkBUrC3IJHua6PULvSfC2lQXF/MlvEG8tDtZy7YJCqB3IB4qmxHPW3w50q11ZL+Ke73rEYZFZy5fPOSTz+VaWo+BdD1h5C8VyqPF5LxC6dlbuG575wRU2pXuizXeg3pu52nndDZpAzLu80YUyKOQh6ZI4Na9leeIJLiIaloUOmWgt95i+0CaRJdxBUsOGGPmBHrUXGZOh+GL20+1QXX+kx2wDWcj3EjvIoHPm54zn+7xiqdn4Qv2j1C4kupdOvdRG97eJvNS0l3ZLRluxGOK09Y1aXRfECavHq6TRTAC6tLib5IoY1JeSJQOWA+8K3HubnXtMSa2nVLe4iDxXCjLFWGQw/AgigDJs9Mi0Kwgs4pAI4UCiNnIA/PoPbtXOS/FLS4pnjENzIEYqHjiZlbHcEcEe9bkmn69DqNj9m16K5azjUXzXEKmZichWHYHAPFXV8OaWFA+wp096aYHJw2PjnUNQi1DVNRs9FEbqJINi3LuoOThv4c9OK7ZVa/uS0G7yA3DMMZoopAZ2mXWovqupx3VgLZLefyraUEnzo+u7+lWfC8N54b0L7Nf3Yvr4u7rOcn5SxI6+gOKKKAI/tJvJGeW1s5lz1mgVyfzrJXVpbZ76LTrG0gn8lmiMEQTeQPunHuP1ooq7IRNBpU3iTwgItSlk0nULi3VpJYOHikUg8HPt696qeJ/BHhjxJOk2oWUq3cgAkvLV1jZzjq2VOTRRUDMrRvhx4R0EwatEs93dQy7oo7mUZUqeHwowRn2rW1mx1i58KxaGNSS+l1KKeOa9WDhcsrJuCZ24AZSR1zk0UU7AY/gfw5cfDCyvH1S+sf+JjNGiXMTuFXbnaDuUEEljXU6b4RtzDdHVZ5dQvLx42urkgxeesZzGrKDj5QAMjG4daKKkDm/Fmp/EPTPE1xB4Z8NRvpq7TDJ9l3545+bd615vbeGviRaeLP+Enj0OWPUmnefzJFQLvfOThjjuaKKQz1/wCHsfjv7dfah4xht13ogttsduCTznJi54GOprJ8bfCPS/EPiBNSstTOnahfO7tF5ZZHdRlmBH3T3x3NFFMDIt/hTo3hRodY1ibUNfkEyL9ngtwoBJ+82WJZR36V6zZTG6d5Hg3xkAITwAO4x3BHFFFAjlrbwB4QsdebUbTR8TLJlQ0pMan2XGK030Pw7p+rPrFvolmmps7yvc4JYM2csMnjOTRRTAkgvSNMuFaIMzy+ZlgCCMdMVl2OoRXlxMk2m6cWjb5W+yICQeh6UUVdkIreNz4lu7axtNBcW/nOUmuAP+PdRggj0GQR+NJK03h7W7U6dpcckF9cNNqEgJwHwCXP1IooqQLcc5e9N5cOELyBVUnIhi/hUewyfzrY1m8tYNCLw2EmqsLqNY1gi3lc/wDLQew65HSiihgZZ0JpvFUevS3eLlLM2flxAFShbdznvn/9Va+tefY6Rc3Qt7jU54Yt0cEb72kPoB/TH4UUUhmTYeH01K/0rxBqdrLbTxWjRDT3YPHAz/ePTqR8ufTjFa4v7u11GWylgtbfT9iJZzBurY5DKOgHHSiigDNn8OX1zpckS3axz3kv+myPHuEsJyHQdCMg4BHI7Vv2lppdhZQWdvYyLDBGsUY+0McKowOSc9BRRQB//9k="}


# data = urllib.parse.urlencode(data).encode("utf-8")
data = json.dumps(data)
data = bytes(data, 'utf-8')

req = urllib.request.Request(url, data=data)

req.add_header('Authorization', 'APPCODE ' + appcode)
req.add_header('Content-Type', 'application/json; charset=UTF-8')

response = urllib.request.urlopen(req)
content = response.read()
if (content):
    print(content.decode('utf-8'))

 

 

第二种方法:直接输入图片地址,用base64模块进行编码

from urllib import request
from base64 import b64encode
import requests
import json

captcha_url = "https://www.douban.com/misc/captcha?id=WRNBVea8CRa3rvzN9F5YnS9f:en&size=s"
request.urlretrieve(captcha_url, 'captcha.png')

recognize_url = 'http://tupian.market.alicloudapi.com/clouds/ocr/webImage'

formdata = {}

with open('captcha.png', 'rb') as fp:
    data = fp.read()
    pic = b64encode(data)
    formdata['image'] = pic.decode('utf-8')
print(formdata)
print(type(formdata))
formdata = json.dumps(formdata)
formdata = bytes(formdata, 'utf-8')

appcode = 'bb78c442150e4ba693a5584b93d42518'
headers = {
    'Content-Type': 'application/json; charset=UTF-8',
    'Authorization': 'APPCODE ' + appcode
}

response = requests.post(recognize_url, data=formdata, headers=headers)
print(response.json())

 

posted @ 2017-12-15 09:59  风起了,风停了  阅读(4994)  评论(0编辑  收藏  举报