request爬取网页等

本文首发于个人博客https://kezunlin.me/post/77697d8d/，欢迎阅读最新内容！

python useful tools and code snippets

Guide

flip

import cv2
image = cv2.imread("demo.jpg")

# Flipped Horizontally 水平翻转
h_flip = cv2.flip(image, 1)
cv2.imwrite("demo-h.jpg", h_flip)

# Flipped Vertically 垂直翻转
v_flip = cv2.flip(image, 0)
cv2.imwrite("demo-v.jpg", v_flip)

# Flipped Horizontally & Vertically 水平垂直翻转
hv_flip = cv2.flip(image, -1)
cv2.imwrite("demo-hv.jpg", hv_flip)

rotate

def rotate_anti_90(image):
    image = cv2.transpose(image)
    image = cv2.flip(image, 0)
    return image

def rotate_anti_180(image):
    image = cv2.flip(image, 0)
    image = cv2.flip(image, 1)
    return image

def rotate_anti_270(image):
    image = cv2.transpose(image)
    image = cv2.flip(image, 1)
    return image

def rotate(image, angle, center=None, scale=1.0):
    # rotate by angle
    (h, w) = image.shape[:2] # hwc
    if center is None:
        center = (w / 2., h / 2.)

    M = cv2.getRotationMatrix2D(center, angle, scale)

    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated

compression

cv2.imwrite(full_image_path, image, [int( cv2.IMWRITE_JPEG_QUALITY), 100]) # no compression for jpg
# [int(cv2.IMWRITE_PNG_COMPRESSION), 9]  0-9 for png
# [int( cv2.IMWRITE_JPEG_QUALITY), 100]  0-100 for jpg

get video info

import datetime
import cv2
from moviepy.editor import VideoFileClip
import numpy as np

def get_video_info(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return
    
    frame_number = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    h  = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    w  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    cap.release() # release video capture

    print("fps = ",fps)
    print("frame_number = ",frame_number)
    size = (w,h)
    print("size = ",size)

    # frame number MAY BE WRONG ! so video time may be also wrong!
    duration = int(frame_number / fps)
    print("seconds=",duration)
    video_time = str(datetime.timedelta(seconds = duration))
    print("video_time=",video_time)

    print("-----------------------using VideoFileClip------------------")
    clip = VideoFileClip(video_path)
    duration = clip.duration
    print("video duration is "+ str(duration) + " seconds")
    video_time = str(datetime.timedelta(seconds = int(duration)))
    print("video_time=",video_time)

def clip_video():
    clip = VideoFileClip("1.mp4")
    starting_point = 120  # start at second minute
    end_point = 420  # record for 300 seconds (120+300)
    subclip = clip.subclip(starting_point, end_point)
    subclip.write_videofile("/path/to/new/video.mp4")

numpy argmax

numpy argmax for 2-dim and 3-dim

import numpy as np

# for 2-dim
array = np.array([
    [1,2,3],
    [4,5,6],
    [9,8,7],
    [1,2,3],
    [10,1,2]
])
print("array.shape=",array.shape)
result1 = array.argmax(axis=0) # hw  axis-0 ===> h  shape=(w,) value range[0,1,2,3,4]
result2 = array.argmax(axis=1) # hw  axis-0 ===> w  shape=(h,) value range[0,1,2]
print(result1)
print(result1.shape)
print(result2)
print(result2.shape)

output

('array.shape=', (5, 3))
[4 2 2]
(3,)
[2 2 0 2 0]
(5,)

# for 3-dim
array2 = np.array([
    [3,2,1],
    [6,5,4],
    [7,8,9],
    [1,2,3],
    [1,1,10]
])
image = np.array([
    array,array2
])
print("image.shape=",image.shape)
print(image)

out = image.argmax(axis=0) # chw  axis=0 ===> c  shape=(h,w) value range[0,1]
print(out)
print(out.shape)
print(out.dtype)

output

('image.shape=', (2, 5, 3))
[[[ 1  2  3]
  [ 4  5  6]
  [ 9  8  7]
  [ 1  2  3]
  [10  1  2]]

 [[ 3  2  1]
  [ 6  5  4]
  [ 7  8  9]
  [ 1  2  3]
  [ 1  1 10]]]
[[1 0 0]
 [1 0 0]
 [0 0 1]
 [0 0 0]
 [0 0 1]]
(5, 3)
int64

# for view segmentation output
output = np.load('output.npy')
print("output.shape=",output.shape) # 1, 2, 512, 512
print("output.dtype=",output.dtype)

image = output[0]
print("image.shape=",image.shape) # 2, 512, 512
print("image.dtype=",image.dtype)

out = image.argmax(axis=0)     # chw   axis=0 ===> c 
print("out.shape=",out.shape) # 512, 512
print("out.dtype=",out.dtype) # int64

print(out.min())
print(out.max())

output

('output.shape=', (1, 2, 512, 512))
('output.dtype=', dtype('float32'))
('image.shape=', (2, 512, 512))
('image.dtype=', dtype('float32'))
('out.shape=', (512, 512))
('out.dtype=', dtype('int64'))
0
1

pandas quantile

basic

"""
for array of length n:
1. pos = 1 + (n-1)*p
2. get integer part and fraction part of pos: i, f 
3. return a[i-1] + (a[i]-a[i-1])*f
"""
import math
def quantile_p(data, p, method=1):
    data.sort()
    if method == 2:
        pos = 1 + (len(data)-1)*p
    else:
        pos = (len(data) + 1)*p
    i = int(math.modf(pos)[1])
    f = pos - i
    Q = data[i-1] + (data[i]-data[i-1])*f
    Q1 = quantile_p(data, 0.25)
    Q2 = quantile_p(data, 0.5)
    Q3 = quantile_p(data, 0.75)
    IQR = Q3 - Q1
    return Q1, Q2, Q3, IQR

quantile

import pandas as pd
import numpy as np
df = pd.Series(np.array([6, 47, 49, 15, 42, 41, 7, 39, 43, 40, 36])

print(dt)
print('Q1:', df.quantile(.25))
print('Q2:', df.quantile(.5))
print('Q3:', df.quantile(.75))

pandas use method 2: pos = 1 + (n-1)*p

image to/from base64 string

import json
import base64

def get_base64_str_from_file(filepath):
    with open(filepath, "rb") as f:
        bytes_content = f.read() # bytes
        bytes_64 = base64.b64encode(bytes_content)
    return bytes_64.decode('utf-8') # bytes--->str  (remove `b`)

def save_base64_str_to_file(str_base64, to_file):
    bytes_64 = str_base64.encode('utf-8') # str---> bytes (add `b`)
    bytes_content = base64.decodebytes(bytes_64) # bytes
    with open(to_file, "wb") as f:
        f.write(bytes_content)

def test_base64():
    # image to/from base64
    image_path = "images/1.jpg"
    str_base64 = get_base64_str_from_file(image_path)
    save_base64_str_to_file(str_base64, "images/2.jpg")
    print("OK")

if __name__ == "__main__":
    test_base64()

output

OK

normal string to/from base64 string

def str_to_base64(normal_str):
    bytes_str = normal_str.encode('utf-8') # str ===> bytes
    bytes_64 = base64.b64encode(bytes_str) # bytes ===> bytes
    return bytes_64.decode('utf-8') # bytes ===> str

def base64_to_str(base64_str):
    bytes_64 = base64_str.encode('utf-8') # str ===> bytes
    bytes_content = base64.decodebytes(bytes_64) # bytes ===> bytes
    return bytes_content.decode('utf-8') # bytes ===> str

def test_base64():
    normal_str = "Hello World !"
    str_base64 = str_to_base64(normal_str)
    normal_str2 = base64_to_str(str_base64)
    print("normal_str = ",normal_str)
    print("str_base64 = ",str_base64)
    print("normal_str2 = ",normal_str2)

if __name__ == "__main__":
    test_base64()

output

normal_str =  Hello World !
str_base64 =  SGVsbG8gV29ybGQgIQ==
normal_str2 =  Hello World !

json load and dumps

import json

str_text='{"status":0,"msg":"成功"}'
dict_json = json.loads(str_text)

print(type(dict_json)) # dict 
print(dict_json)

str_pretty_result = json.dumps(
    dict_json, indent=4, 
    sort_keys=True, ensure_ascii=False)

print(type(str_pretty_result)) # str
print(str_pretty_result)

output

<class 'dict'>
{'status': 0, 'msg': '成功'}
<class 'str'>
{
    "msg": "成功",
    "status": 0
}

str to dict
dict to str

simplekml

import simplekml
kml = simplekml.Kml()
kml.newpoint(name="point a", coords=[(18.432314,-33.988862)])  # lon, lat, optional height
kml.newpoint(name="point b", coords=[(28.432314,-43.988862)])  # lon, lat, optional height
kml.save("1.kml")

1.kml

<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2">
    <Document id="1">
        <Placemark id="3">
            <name>point a</name>
            <Point id="2">
                <coordinates>18.432314,-33.988862,0.0</coordinates>
            </Point>
        </Placemark>
        <Placemark id="5">
            <name>point b</name>
            <Point id="4">
                <coordinates>28.432314,-43.988862,0.0</coordinates>
            </Point>
        </Placemark>
    </Document>
</kml>

python requests

install

pip install requests
conda install requests

usage

>>> requests.get('https://httpbin.org/get')
>>> requests.post('https://httpbin.org/post', data={'key':'value'})
>>> requests.put('https://httpbin.org/put', data={'key':'value'})
>>> requests.delete('https://httpbin.org/delete')
>>> requests.head('https://httpbin.org/get')
>>> requests.patch('https://httpbin.org/patch', data={'key':'value'})
>>> requests.options('https://httpbin.org/get')

code example

import requests
import json

data = {"name":"admin",
        "password":"21232f297a57a5a743894a0e4a801fc3"}
r = requests.post(
    "127.0.0.1/api/login",
    headers={"Accept": "application/json", 
             "Content-Type": "application/json"},
    data=json.dumps(data)
)
print(r.text)

r = requests.post(
    "127.0.0.1/api/login",
    headers={"Accept": "application/json", 
             "Content-Type": "application/json"},
    json=data
)
print(r.text)


session_id = "157480890@kezunlin.me"
myheaders={"Accept": "application/json", 
           "Content-Type": "application/json", 
           "session_id":session_id}
r=requests.get(
    "127.0.0.1/api/book", 
    headers=myheaders
)
print(r.text)

requests-html

requests-html for human

install

pip install requests-html

usage

>>> from requests_html import HTMLSession
>>> session = HTMLSession()
>>> r = session.get('https://python.org/')
>>> r.text
>>> r.html.find('title', first=True).text

selenium chromedriver

versions

Selenium – version 3.11.0
Chrome Browser – version 77
ChromeDriver – version 77

steps

download and install chrome browser 77
download ChromeDriver for Chrome Browser 77
install and check version

commands

sudo cp chromedriver /usr/local/bin
chromedriver -v
ChromeDriver 77.0.3865.40

smote

using smote to over sampling datasets

install

pip install smote_variants
pip install imbalanced_databases

class member vs instance member

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import time

class Info(object):
    rects = []   # by ref (object):  all instances has only 1 same copy.
    counter = 0  # by value: all instances has 1 different copy
    def __init__(self):
        self.new_rects = []
        self.new_counter = 0

def func():
    it = Info()
    return it

def test1():
    info = func()

    info.rects.extend([1,2,3,4,5])
    info.counter += 1
    info.new_rects.extend([1,2,3,4,5])
    info.new_counter += 1

    print("rect size", len(info.rects),len(info.new_rects))
    print("counter", info.counter,info.new_counter)
    print("--------------------------")

    info2 = func()
    info2.rects.extend([1,2,3,4,5])
    info2.counter += 1
    info2.new_rects.extend([1,2,3,4,5])
    info2.new_counter += 1

    print("rect size", len(info2.rects),len(info2.new_rects))
    print("counter", info2.counter,info2.new_counter)
    print("--------------------------")
   


if __name__ == "__main__":
    test1()

"""
rect size 5 5
counter 1 1
--------------------------
rect size 10 5
counter 1 1
--------------------------
"""

pyyaml

install

pip install pyyaml
pip freeze > requirements.txt

usage
cfg.yaml

---
# cfg.yaml
debug: true # debug or not (default: false)
input_dir: "./input/" # input dir 
output_dir: "./output/" # output dir

code

import yaml 

def load_cfg(cfg_path='./cfg.yaml'):
    cfg = None
    if os.path.exists(cfg_path):
        cfg = yaml.load(open(cfg_path))
        input_dir = cfg.get("input_dir")
    else:
        print("{} not exist".format(cfg_path))
    return cfg

cupy

CuPy: NumPy-like API accelerated with CUDA.
CuPy: numpy on GPU

install

(For CUDA 8.0)
% pip install cupy-cuda80

(For CUDA 9.0)
% pip install cupy-cuda90

(For CUDA 9.1)
% pip install cupy-cuda91

(For CUDA 9.2)
% pip install cupy-cuda92

(For CUDA 10.0)
% pip install cupy-cuda100

(For CUDA 10.1)
% pip install cupy-cuda101

(Install CuPy from source)
% pip install cupy

usage

>>> import cupy as cp
>>> x = cp.arange(6).reshape(2, 3).astype('f')
>>> x
array([[ 0.,  1.,  2.],
       [ 3.,  4.,  5.]], dtype=float32)
>>> x.sum(axis=1)
array([  3.,  12.], dtype=float32)



>>> x = cp.arange(6, dtype='f').reshape(2, 3)
>>> y = cp.arange(3, dtype='f')
>>> kernel = cp.ElementwiseKernel(
...     'float32 x, float32 y', 'float32 z',
...     '''if (x - 2 > y) {
...       z = x * y;
...     } else {
...       z = x + y;
...     }''', 'my_kernel')
>>> kernel(x, y)
array([[ 0.,  2.,  4.],
       [ 0.,  4.,  10.]], dtype=float32)

SORT

SORT: A Simple, Online and Realtime Tracker based on Kalman

code

from sort import *

#create instance of SORT
mot_tracker = Sort() 

# get detections
...

# update SORT
track_bbs_ids = mot_tracker.update(detections)

# track_bbs_ids is a np array where each row contains a valid bounding box and track_id (last column)
...

Reference

History

2019/11/08: created.

Copyright

Post author: kezunlin
Post link: https://kezunlin.me/post/77697d8d/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 3.0 unless stating additionally.

posted @ 2019-12-31 17:52 kezunlin 阅读(654) 评论(0) 编辑收藏举报

刷新页面返回顶部

kezunlin

Live and Learn

python有用代码合集旋转图像/base64图像编码/json序列化/request爬取网页等

Guide

flip

rotate

compression

get video info

numpy argmax

pandas quantile

basic

quantile

image to/from base64 string

normal string to/from base64 string

json load and dumps

simplekml

python requests

install

usage

code example

requests-html

selenium chromedriver

smote

class member vs instance member

pyyaml

cupy

install

usage

SORT

Reference

History

Copyright

公告

kezunlin

Live and Learn

python有用代码合集 旋转图像/base64图像编码/json序列化/request爬取网页等

Guide

flip

rotate

compression

get video info

numpy argmax

pandas quantile

basic

quantile

image to/from base64 string

normal string to/from base64 string

json load and dumps

simplekml

python requests

install

usage

code example

requests-html

selenium chromedriver

smote

class member vs instance member

pyyaml

cupy

install

usage

SORT

Reference

History

Copyright

公告

python有用代码合集旋转图像/base64图像编码/json序列化/request爬取网页等