python学习笔记

Author: maddock

Date: 2015-03-15 21:42:01

(暂时没有整理)

python json文件处理

#coding:utf-8
import json

# data = [ { 'a' : 1, 'b' : 2, 'c' : 3, 'd' : 4, 'e' : 5 } ]

# # 打开键值排序、缩进为 4、以',', ': '为分隔
# json = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
# print(json)


# jsonData = '{"a":1,"b":2,"c":3,"d":4,"e":5}';

# text = json.loads(jsonData)
# print(text)

# with open("model-symbol1.json", "w", encoding='utf-8') as f:
#     # indent 超级好用，格式化保存字典，默认为None，小于0为零个空格
#     f.write(json.dumps(a, indent=4))
#     # json.dump(a,f,indent=4)   # 和上面的效果一样


#格式化打印json文件
# with open("model-symbol.json", "r", encoding='utf-8') as f:
with open("model-symbol.json", "r") as f:
    aa = json.loads(f.read())
    print(json.dumps(aa, indent=4))

python 不以科学计数法输出

其实很简单只需两个语句：

import numpy as np
np.set_printoptions(suppress=True)

这样就可以搞定

python交换矩阵的两行

A = np.mat('1 2;3 4')
print(A)
#A[0,:],A[1,:] = A[1,:],A[0,:]
A[[0,1]] = A[[1,0]]
#上面相当于 A[[0,1],:] = A[[1,0],:]
print(A)

python numpy 矩阵拼接

img为一个矩阵160*160

data_list为一个list 两个元素为矩阵

data_list[0][0, ...] = img

python dict按照key 排序：

1、method 1.

items = dict.items()
items.sort()
for key,value in items:
   print key, value # print key,dict[key]

2、method 2.

print key, dict[key] for key in sorted(dict.keys())

python dict按照value排序：

method 1：

把dictionary中的元素分离出来放到一个list中，对list排序，从而间接实现对dictionary的排序。这个“元素”可以是key，value或者item。

method2：

#用lambda表达式来排序，更灵活：
sorted(dict.items(), lambda x, y: cmp(x[1], y[1]))
#降序
sorted(dict.items(), lambda x, y: cmp(x[1], y[1]), reverse=True)

下面给出python内置sorted函数的帮助文档：

sorted(...)
sorted(iterable, cmp=None, key=None, reverse=False) --> new sorted list

python 调试

`python -m pdb err.py`

pdb.set_trace()

这个方法也是用pdb，但是不需要单步执行，我们只需要import pdb，然后，在可能出错的地方放一个pdb.set_trace()，就可以设置一个断点：

# err.py
import pdb

s = '0'
n = int(s)
pdb.set_trace() # 运行到这里会自动暂停
print 10 / n

python : 将txt文件中的数据读为numpy数组或列表

python笔记2-数据类型：列表[List]常用操作

序列是Python中最基本的数据结构。序列中的每个元素都分配一个数字 - 它的位置，或索引，索引从0开始，依此类推。

序列都可以进行的操作：包括 索引，切片，加，乘，检查成员。

列表是最常用的Python数据类型，list 是一种有序的集合、列表是可变变量，即能通过列表的下标进行修改值

创建一个列表，只要把逗号分隔的不同的数据项使用方括号括起来即可。如下所示：

nums = [1, 2, 3, 'beijing', 'shanghai']  #定义列表
num1 = [1, 2, 3, 4, 5, 6]  #List内元素为int类型
num2 = ['a',' b', 'c', 'd'] #List内元素为str类型

List数据类型的如下：

增加元素：

nums = [1, 2, 3, 'beijing', 'shanghai']  #定义列表
nums.append('byz') #添加的值从最末尾添加，执行结果：[1, 2, 3,'beijing', 'shanghai','byz']
nums.insert(2, '888') #在指定位置添加元素，2代表下标，执行结果：[1, 2, '888', 3, 'beijing', 'shanghai', 'byz']

删除元素:

nums = [1, 2, 3, 'beijing', 'shanghai'] 
# pop(num) 方法的返回值就是删除的指定位置的元素值
nums.pop() #默认删除最后一个元素，执行结果：[1, 2, 3, 'beijing']
nums.pop(2) #删除指定位置的元素，执行结果：[1, 2, 'beijing']
nums.clear()  #清空列表，执行结果:[]
del nums[0]   #使用del关键字，删除指定位置的元素，0代表下标，执行结果：[ 2, 'beijing']
nums.remove('beijing')  #remove()删除方法，传入的参数是 元素值，pop()删除指定位置元素，传入的参数是下标

修改元素：

nums = [1, 2, 3, 'beijing', 'shanghai']  #定义列表
#修改，通过下标来修改list元素值
nums[0] = 'hello'  #修改List内的元素值，执行结果：['hello', 2, 3, 'beijing', 'shanghai']
nums[5] = 'haha' #修改不存在的下标，报错信息：IndexError: list assignment index out of range

查询元素：

nums = [1, 2, 3, 'beijing', 'shanghai']  #定义列表
#查询，通过下标和for循环来遍历取值
print(nums[3])   #通过下标取值，下标从0开始，执行结果：beijing
for num in nums:
    print(num)  #通过for循环，遍历list内的元素

List中的count(元素)方法，可以用来判断元素是否存在List内，用法如下：

#判断元素是否存在List内，可以使用in方法，也可以使用count()方法
names = [1, 2, 3, 'beijing', 'beijing', 'shanghai']
print(names.count('beijing'))  #查找元素在list里面的次数,执行结果为：2
print(names.count('hahah')) #若查找的元素不在list里面，返回结果为0
num = names.count('beijing')
if num >0:
    print('说明元素存在')
else:
    print('元素不存在')

获取List元素的下标，用法如下：

names = [1, 2, 3, 'beijing', 'beijing', 'shanghai']
print(names.index('beijing'))   #获取beijing这个元素的下标值，如果有多个元素时，返回第一个元素的下标值
print(names.index('a')) #如果查找的元素不存，则报错：ValueError: 'a' is not in list
print(names.index('beijing', 2))    #可以指定从哪个元素开始查找，2代表开始查找的下标
print(names.index('beijing', 2, 3)) #可以指定查找范围，2,3代表开始、结束的下标值，查找范围不包含结束下标对应的元素，顾头不顾尾

List的extend方法，更改列表的值，用法如下：

names = [1, 2, 3, 'beijing', 'beijing', 'shanghai']
status = ['ywq', 'lhl', 'yqq']
print('列表合并的结果：', status + names)  #两个列表List合并，产生一个新的变量,执行结果：['ywq', 'lhl', 'yqq', 1, 2, 3, 'beijing', 'beijing', 'shanghai']
print('这个是extend:', status.extend(names))  #extens没有返回值，执行结果为：None
print('extens后的status:', status)   #将names列表的 添加 到status里，status的值进行了更改

list的extend 与 appen方法的区别：

nums = [23, 34, 21, 2, 33, 456, 12]
status = ['a','b','c']
print(status.extend(nums))
print(status)  #extends是将nums的添加到 status，执行结果为一维数组：['a', 'b', 'c', 23, 34, 21, 2, 33, 456, 12]

nums = [23, 34, 21, 2, 33, 456, 12]
status = ['a','b','c']
print(status.append(nums)) 
print(status)  #append 是nums列表作为一个元素 添加到status列表的最后面，执行结果为二维数组：['a', 'b', 'c', [23, 34, 21, 2, 33, 456, 12]]

排序sort方法，用法如下：

nums = [23, 34, 21, 2, 33, 456, 12]
print('nums排序前：', nums)          #执行结果：[23, 34, 21, 2, 33, 456, 12]
nums.sort()              #默认是升序排列
print('sort 排序后的nums:', nums)    #执行结果：[2, 12, 21, 23, 33, 34, 456]
nums.sort(reverse=True)  #指定reverse=True，排序为降序排列
print('降序排列的nums:', nums)       #执行结果：[456, 34, 33, 23, 21, 12, 2]

反转数组，如下：

nums = [23, 34, 21, 2, 33, 456, 12]
print(nums.reverse()) #反转的是nums的值，nums.reverse()没有返回值，执行结果为None
print(nums)  #反转后的nums，执行结果：[12, 456, 33, 2, 21, 34, 23]

多维数组，获取元素的值：

num = [1, 2, 9.8, ['a', 'b', ['hehe', 'haha']]]
print(num[3][0])  #获取二维数组 a这个元素值
print(num[3][2][0])  #获取三维数组，hehe这个元素值

切片操作

切片也就是另一种方式获取列表的值，它可以获取多个元素，可以理解为，从第几个元素开始，到第几个元素结束，获取他们之间的值，格式是name:[1:10]，比如说要获取name的第一个元素到第五个元素，就可以用name[0:6]，切片是不包含后面那个元素的值的，记住顾头不顾尾；

names = ['zcl','py','byz']
print(names[0:2])   #切片的意思就是从list或者字符串里面，取几个元素，执行结果['zcl', 'py']
print(names[:2])    #默认从0开始取值，开始下标0可以不用写，执行结果：['zcl', 'py']
print(names[1:])    #从第1个下标开始取值，取后面所有的值，那么结束下标值可以不写
print(names[:])     #不写开始、结束的下标时，默认取整个列表的值
print(names[-1:])   #取最后一个值，下标从-1开始

切片的步长：

nums = list(range(1,21))
print(nums[0:11])  #执行结果：[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
print(nums[0:11:2])  #每隔2个字符取一个值，2是步长，执行结果：[1, 3, 5, 7, 9, 11]
print(nums[::-1])  #切片步长为负数时，从后往前取值，取整个列表的值，执行结果：[20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]

如果有写的不对的，欢迎指出~~

print的用法

print('  %d %1.4f  ' % (j,Distance_matrix[i][j]))

分词空格，读取每一行

    with open("image_files.txt","r") as face_file:
        for line in face_file.readlines():
            path = line.strip().split("\n")
            image_files.append(path[0])
            #print(line)

将txt的数据读取到一个矩阵中

'''
数据文件：2.txt内容：（以空格分开每个数据）
1 2 2.5
3 4 4
7 8 7
'''

from numpy import *
A = zeros((3,3),dtype=float)    #先创建一个 3x3的全零方阵A，并且数据的类型设置为float浮点型

f = open('2.txt')               #打开数据文件文件
lines = f.readlines()           #把全部数据文件读到一个列表lines中
A_row = 0                       #表示矩阵的行，从0行开始
for line in lines:              #把lines中的数据逐行读取出来
    list = line.strip('\n').split(' ')      #处理逐行数据：strip表示把头尾的'\n'去掉，split表示以空格来分割行数据，然后把处理后的行数据返回到list列表中
    A[A_row:] = list[0:3]                    #把处理后的数据放到方阵A中。list[0:3]表示列表的0,1,2列数据放到矩阵A中的A_row行
    A_row+=1                                #然后方阵A的下一行接着读
    #print(line)

print(A)    #打印 方阵A里的数据

打印结果：
[[ 1.   2.   2.5]
 [ 3.   4.   4. ]
 [ 7.   8.   7. ]]

将一个np矩阵写入到一个txt文件，用空格分开

Distance_matrix = np.array([[1.0000, 0.4001, 0.9652, 0.4112, 0.2134, 0.1759],
[0.4001, 1.0000, 0.3673, 0.9457, 0.2191, 0.2402],
[0.9652, 0.3673, 1.0000, 0.3582, 0.2022, 0.2267],
[0.4112, 0.9457, 0.3582, 1.0000, 0.4616, 0.4515],
[0.2134, 0.2191, 0.2022, 0.4616, 1.0000, 0.9628],
[0.1759, 0.2402, 0.2267, 0.4515, 0.9628, 1.0000]])
print(Distance_matrix)

# Print distance matrix
with open("Distance_matrix.txt","w") as f:
    for i in range(nrof_images):
        for j in range(nrof_images):
            dist = Distance_matrix[i][j]
            f.write('%1.4f ' % dist)
        f.write('\n')

python2的print替换为python3的print( )

print (.*?);?$
print$ $1$

python print 颜色显示

显示颜色格式：\033[显示方式;字体色;背景色m......[\033[0m]

-------------------------------------------
-------------------------------------------
字体色     |       背景色     |      颜色描述
-------------------------------------------
30        |        40       |       黑色
31        |        41       |       红色
32        |        42       |       绿色
33        |        43       |       黃色
34        |        44       |       蓝色
35        |        45       |       紫红色
36        |        46       |       青蓝色
37        |        47       |       白色
-------------------------------------------
-------------------------------
显示方式     |      效果
-------------------------------
0           |     终端默认设置
1           |     高亮显示
4           |     使用下划线
5           |     闪烁
7           |     反白显示
8           |     不可见
-------------------------------

例：

print('This is a \033[1;35m test \033[0m!')
print('This is a \033[1;32;43m test \033[0m!')
print('\033[1;33;44mThis is a test !\033[0m')

Python 五个知识点搞定作用域

python split分词函数以及字符串的strip().split("/")函数

获取路径的文件名，去掉后缀名
>>> import os
>>> os.path.splitext(os.path.split("/data/ert/123.jpg")[1])[0]
'123'

>>> os.path.split("/data/ert/123.jpg")
('/data/ert', '123.jpg')
>>>

>>> facepath = "/data/ert/123.jpg"
>>> faceID = facepath.strip().split("/")
>>> faceID = faceID[-1]
>>> print faceID
123.jpg
>>>

numpy增加一个维度

    #numpy增加一个维度
    a = np.array([1, 2, 3])  
    b = np.array([2, 3, 4])  
    c = np.vstack((a,b))
    d = a[np.newaxis, :]
    print(d)
    print(a.shape)
    print(d.shape)
    print(c[0])
    print(c.sum(axis=0))

遍历图像文件夹，找出所有的子目录

def findAllfile(path, allfile):
    filelist =  os.listdir(path)  
    for filename in filelist:  
        filepath = os.path.join(path, filename)  
        if os.path.isdir(filepath):
            #print(filepath)  
            findAllfile(filepath, allfile)  
        else:  
            allfile.append(filepath)  
    return allfile

#遍历图像文件夹
clusterpath  = "/DATA
filelist =  os.listdir(clusterpath) 
error_floder = []  
for filename in filelist:  
    filepath = os.path.join(clusterpath, filename)  
    if os.path.isdir(filepath):
        print(filepath)
　　　　image_files = findAllfile(filepath,[])

#遍历图像文件夹
clusterpath  = "/DATA
filelist =  os.listdir(clusterpath) 
error_floder = []  
for filename in filelist:  
    filepath = os.path.join(clusterpath, filename)  
    if os.path.isdir(filepath):
        print(filepath)
　　　　image_files = findAllfile(filepath,[])

找出一个list中出现次数最多的id和标签

#coding:utf-8
from collections import Counter

def getmaxNum(srclist):
    counter = Counter(srclist)
    most_common_elem = counter.most_common(1)
    temp = most_common_elem[0]  
    most_common_id = temp[0]
    most_common_counter = temp[1]
    return most_common_id, most_common_counter


if __name__ == '__main__':
    a = [4, 4, 1, 1, 1, 1, 2,2,2,2,2,2,2, 3]
    most_common_id, most_common_counter = getmaxNum(a)
    print(most_common_id)
    print(most_common_counter)

nonzero的用法，返回非零元素的索引下表

>>> a = np.array([[0,0,3],[0,0,0],[0,0,9]])
>>> b = np.nonzero(a)
>>> print(b)
(array([0, 2]), array([2, 2]))
>>> a = np.array([[0,0,3],[0,0,0],[0,5,9]])
>>> b = np.nonzero(a)
>>> print(b)
(array([0, 2, 2]), array([2, 1, 2]))
>>> a = np.array([[0,0,3],[4,0,0],[0,5,9]])
>>> b = np.nonzero(a)
>>> print(b)
(array([0, 1, 2, 2]), array([2, 0, 1, 2]))
>>>

python中循环遍历目录中所有文件

def findAllfile(path, allfile):
    filelist =  os.listdir(path)  
    for filename in filelist:  
        filepath = os.path.join(path, filename)  
        if os.path.isdir(filepath):
            #print(filepath)  
            findAllfile(filepath, allfile)  
        else:  
            allfile.append(filepath)  
    return allfile

#coding=utf-8
import os
import cv2

def dirlist(path, allfile):  
    filelist =  os.listdir(path)  
  
    for filename in filelist:  
        filepath = os.path.join(path, filename)  
        if os.path.isdir(filepath):  
            dirlist(filepath, allfile)  
        else:  
            allfile.append(filepath)  
    return allfile  
 
jpgpath = "../lfw_160_dlib_aligned/"  
file = dirlist(jpgpath, []) 
for srcpath in file:
    print srcpath
    savepath = srcpath.replace("jpg", "png")
    print savepath
    img = cv2.imread(srcpath)
    cv2.imwrite(savepath, img)
    os.system("rm " +  srcpath )
    num += 1

python代码中执行shell命令
cppath = "./src/" + args.model_def.replace(".", "/") + ".py"
os.system("cp " + cppath + " " + log_dir)

if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
    os.makedirs(log_dir)

目录存在删除目录
if os.path.exists(face_clustr_result_center):
    os.system("rm -rf " + face_clustr_result_center)
if not os.path.isdir(face_clustr_result_center): # Create the log directory if it doesn't exist
    os.makedirs(face_clustr_result_center)

spyder安装

python2

sudo pip install spyder
sudo apt-get install python-pyqt*

python3

sudo pip3 install spyder
sudo apt-get install python3-pyqt*

文件处理获取文件夹中的所有文件，写入文件

for picFile in os.listdir("/data/"):
     print(picFile)

#coding:utf-8
#https://www.cnblogs.com/wktwj/p/7257526.html

import os
import tensorflow as tf
from PIL import Image

root_dir = os.getcwd()

def getTrianList():
    with open("train_class.txt","w") as f:
        for file in os.listdir(root_dir+'/data'):
            for picFile in os.listdir(root_dir+"/data/"+file):
                print(picFile)
                f.write("data/"+file+"/"+picFile+" "+file+"\n")

if __name__=="__main__":
    getTrianList()

打开文件读取每一行，并分词

with open("faceimg/face_template .txt","r") as face_file:
for line in face_file.readlines():
path, label = line.strip().split()

Numpy建立空矩阵

exist_face_vec = np.zeros([0, face_vec_size])

Numpy垂直方向拼接矩阵

exist_face_vec = np.vstack((exist_face_vec, face_detect_vec_one))

给深度学习入门者的Python快速教程 - numpy和Matplotlib篇

Numpy使用 list转换为narray

Numpy拼接两个array

#coding:utf-8
import numpy as np
nrof_samples = 2
img_list = [None] * nrof_samples
img_list[0] = [1,2,3]
img_list[1] = [3,4,5]
images = np.stack(img_list)
print images.shape
print images
x = np.array([8,9,10])
images_new = np.vstack((images,x))
print images_new

# x=np.array([[9,8,7],[6,5,4]])
# print x

# y = np.array([2,3])
# print y.shape


l0 = np.arange(6).reshape((2, 3))
l1 = np.arange(6, 12).reshape((2, 3))

'''
vstack是指沿着纵轴拼接两个array，vertical
hstack是指沿着横轴拼接两个array，horizontal
更广义的拼接用concatenate实现，horizontal后的两句依次等效于vstack和hstack
stack不是拼接而是在输入array的基础上增加一个新的维度
'''
print l0
print l1
m = np.vstack((l0, l1))
print m
p = np.hstack((l0, l1))
print p
q = np.concatenate((l0, l1))
r = np.concatenate((l0, l1), axis=-1)
s = np.stack((l0, l1))
print s

python计时函数的使用

http://www.jb51.net/article/114797.htm

import time
time1 = time.time()
time.sleep(15)
time2 = time.time()
print time2 - time1

#python 各种for循环 for elem in list_array

for iterating_var in sequence:
   statements(s)

(1) 列表for循环
actual_issame_bool = [False, True, True]
actual_issame = []
for i in actual_issame_bool:
    if i == True:
        actual_issame.append(1)
    else:
        actual_issame.append(0)

(2) 文件行for循环读取
csv_file = 'interest.bak'
with open(csv_file, "r") as f:
    for line in f.readline():
        print line

(3) 利用range产生等差数列0:99
total_images = 100
range(total_images)
for i in range(total_images):
    print i

python 文件操作

print 输出到文件

　f = open("data/face_train_num.txt", 'w')
    print>> f, '\t people\tpicture'
    print >> f, 'total:\t%6d\t%7d' % (total_people, total_picture)
    print >> f, 'test:\t%6d\t%7d' % (len(test_people), len(test_set))
    print >> f, 'valid:\t%6d\t%7d' % (label, len(valid_set))
    print >> f, 'train:\t%6d\t%7d' % (label, len(train_set))