python 文件操作和深浅copy

一、编码的进阶

1 str 与 bytes 形式

s1 = 'alex'
print(s1,type(s1))    #alex <class 'str'>
s2 = b'alex'          
print(s2.upper())     #b'ALEX'
print(s2,type(s2))    # b'alex' <class 'bytes'>

2，encode 解码 decode 解码

s1 = '中国'
print(s1, type(s1))       #中国 <class 'str'>
b1 = s1.encode('utf-8')
print(b1,type(b1))        #b'\xe4\xb8\xad\xe5\x9b\xbd' <class 'bytes'>

str ----> utf-8 bytes

s1 = 'a太白'
b1 = s1.encode('utf-8')
print(b1)  # b'a\xe5\xa4\xaa\xe7\x99\xbd'

str ----> gbk bytes

s1 = 'a太白'
b1 = s1.encode('gbk')
print(b1)  # b'a\xcc\xab\xb0\xd7'

utf-8 bytes ----> str Unicode

b3 = b'a\xe5\xa4\xaa\xe7\x99\xbd'
s2 = b3.decode('utf-8')
print(s2)          #a太白

utf-8 bytes --------> gbk bytes

b3 = b'a\xe5\xa4\xaa\xe7\x99\xbd'

# 先将b3 转化成UNicode
s = b3.decode('utf-8')

# 再将 s 编码成 gbk
b4 = s.encode('gbk')
print(b4)  # b'a\xcc\xab\xb0\xd7'

s1 = 'lao男孩'
b1 = s1.encode('utf-8')
s2 = b1.decode('utf-8')
b2 = s2.encode('gbk')
print(b2)

二、文件操作初识

计算机系统分为：计算机硬件，操作系统，应用程序三部分。

我们用python或其他语言编写的应用程序若想要把数据永久保存下来，必须要保存于硬盘中，这就涉及到应用程序要操作硬件，众所周知，应用程序是无法直接操作硬件的，

这就用到了操作系统。操作系统把复杂的硬件操作封装成简单的接口给用户/应用程序使用，其中文件就是操作系统提供给应用程序来操作硬盘虚拟概念，用户或应用程序通过操作文件，

可以将自己的数据永久保存下来。

.1.文件编码

windows: 编码:gbk.

linux,ms: 编码是utf-8

2.操作文件的流程：

1,打开文件,产生一个文件句柄.

2, 对文件句柄进行相应的操作.

3,关闭文件句柄.

#1. 打开文件，得到文件句柄并赋值给一个变量
f=open('a.txt','r',encoding='utf-8') #默认打开模式就为r

#2. 通过句柄对文件进行操作
data=f.read()

#3. 关闭文件
f.close()

for打开文件

with open('a.txt','w') as f:
    pass
 
with open('a.txt','r') as read_f,\
    open('b.txt','w') as write_f:
    data=read_f.read()
    write_f.write(data)

with打开，不用close

3.打开文件的方式：

　　r,w,a

　　r+,w+,a+

　　rb,wb,ab

　　r+b,w+b,a+b

默认使用的是r(只读模式)

（1）文件的读 r r+ rb r+b

read() #全部读取

f1 = open('r模式',encoding='utf-8')
content = f1.read()
print(content,type(content))
f1.close()

注： r模式: n 字符

rb模式: n 字节

f1 = open('r模式',encoding='utf-8')
print(f1.read(3))  #读三个字符
f1.close()

f1 = open('r模式',mode='rb')
print(f1.read(3)) #读三个字节
f1.close()

readline() #按行读取

f1 = open('r模式',encoding='utf-8')
print(f1.readline().strip())
f1.close()

readlines() # 返回一个list 列表的每个元素是源文件的每一行.

f1 = open('r模式',encoding='utf-8')
print(f1.readlines())
f1.close()

循环读取

f1 = open('r模式',encoding='utf-8')
for line in f1:
    print(line)
f1.close()

rb 以字节的形式读取 # 带b的一般操作的都是非文字类的文件.

f = open('赵丽颖.jpg',mode='rb')
content = f.read()
f.close()

r+ 读写模式:先读后写

f1 = open('r模式',encoding='utf-8',mode='r+')
content = f1.read()
print(content)
f1.write('666')
f1.close()

（2）文件的写 w w+ wb w+b

W # 没有文件,创建文件,写入内容

# 如果有文件,先清空内容,后写入

f = open('w模式',encoding='utf-8',mode='w')
f.write('随便写一点')
f.close()

f = open('w模式',encoding='utf-8',mode='w')
f.write('1alex is a lower man\n')
f.write('1alex is a lower man\n')
f.write('1alex is a lower man\n')
f.write('1alex is a lower man\n')
f.close()

写多次

f = open('w模式',encoding='utf-8',mode='w')
for i in range(4):
    f.write('Alex is a lower man\n')
f.close()

f = open('赵丽颖.jpg',mode='rb')
content = f.read()
f.close()

f1 = open('美女.jpg',mode='wb')
f1.write(content)
f1.close()

（3）文件的追加 a ab a+ a+b

a 没有文件,创建文件,写入内容

f = open('a模式',encoding='utf-8',mode='a')
f.write('很多让人很有成就感的事情')
f.write('很多让人很有成就感的事情')
f.write('很多让人很有成就感的事情')
f.close()

f = open('a模式',encoding='utf-8',mode='a')
f.write('666')
f.close()

（4）文件的其他操作

f1 = open('r模式',encoding='utf-8')
print(f1.fileno())  #   文件描述符 用不到

f1 = open('其他操作方法',encoding='utf-8',mode='w')
f1.write('jfkdlsfjdsafkds')
f1.flush()  # 强制保存
f1.close()

readable writeable```````` 是否可读，是否可写

f1 = open('其他操作方法',encoding='utf-8')
# print(f1.readable())  # True
# f1.write('fdsafs')
print(f1.writable())  # False
if f1.writable():
    f1.write('fhdsklafjds')
f1.close()

seek # 按照字节调整光标位置（网络编程: FTP的作业,断点续传的功能. seek tell）

tell # 获取光标的位置

f1 = open('其他操作方法',encoding='utf-8')
# f1.seek(9)  # 按照字节调整光标位置
print(f1.tell())  # 获取光标的位置
print(f1.read())
print(f1.tell())
f1.close()

truncate # 对原文件进行截取他必须在可写情况下使用.

f = open('其他操作方法',encoding='utf-8',mode='r+')
f.seek(3)  # 调整光标对truncate不管用
f.truncate(9)  # truncate都是从文件的开始进行截取,以字节为单位.
f.close()

（5）文件的改

1,以读的模式打开原文件.

2,以写的模式创建一个新文件.

import os
with open('alex自述',encoding='utf-8') as f1,\
    open('alex自述.bak',encoding='utf-8',mode='w') as f2:
# 3,将原文件内容读取出来,按照你的要求改成新内容,写入新文件.
    old_content = f1.read()
    new_content = old_content.replace('alex','sb')
    f2.write(new_content)
# 4,删除原文件.
os.remove('alex自述')
# 5,将新文件重命名成原文件.
os.rename('alex自述.bak','alex自述')

import os
with open('alex自述',encoding='utf-8') as f1,\
    open('alex自述.bak',encoding='utf-8',mode='w') as f2:
# 3,将原文件内容读取出来,按照你的要求改成新内容,写入新文件.
    for old_line in f1:
        new_line = old_line.replace('alex','sb')
        f2.write(new_line)

# 4,删除原文件.
os.remove('alex自述')
# 5,将新文件重命名成原文件.
os.rename('alex自述.bak','alex自述')

三、深浅copy

1，先看赋值运算。

l1 = [1,2,3,['barry','alex']]
l2 = l1

l1[0] = 111
print(l1)  # [111, 2, 3, ['barry', 'alex']]
print(l2)  # [111, 2, 3, ['barry', 'alex']]

l1[3][0] = 'wusir'
print(l1)  # [111, 2, 3, ['wusir', 'alex']]
print(l2)  # [111, 2, 3, ['wusir', 'alex']]

赋值运算，l1与l2指向的是同一个内存地址，所以他们是完全一样的。

2，浅拷贝copy

#同一代码块下：
l1 = [1, '太白', True, (1,2,3), [22, 33]]
l2 = l1.copy()
print(id(l1), id(l2))  # 2713214468360 2713214524680
print(id(l1[-2]), id(l2[-2]))  # 2547618888008 2547618888008
print(id(l1[-1]),id(l2[-1]))  # 2547620322952 2547620322952

# 不同代码块下：
>>> l1 = [1, '太白', True, (1, 2, 3), [22, 33]]
>>> l2 = l1.copy()
>>> print(id(l1), id(l2))
1477183162120 1477183162696
>>> print(id(l1[-2]), id(l2[-2]))
1477181814032 1477181814032
>>> print(id(l1[-1]), id(l2[-1]))
1477183162504 1477183162504

浅copy，只是在内存中重新创建了开辟了一个空间存放一个新列表，但是新列表中的元素与原列表中的元素是公用的。

3，深拷贝deepcopy

# 同一代码块下
import copy
l1 = [1, 'alex', True, (1,2,3), [22, 33]]
l2 = copy.deepcopy(l1)
print(id(l1), id(l2))  # 2788324482440 2788324483016
print(id(l1[0]),id(l2[0]))  # 1470562768 1470562768
print(id(l1[-1]),id(l2[-1]))  # 2788324482632 2788324482696
print(id(l1[-2]),id(l2[-2]))  # 2788323047752 2788323047752

# 不同代码块下
>>> import copy
>>> l1 = [1, '太白', True, (1, 2, 3), [22, 33]]
>>> l2 = copy.deepcopy(l1)
>>> print(id(l1), id(l2))
1477183162824 1477183162632
>>> print(id(0), id(0))
1470562736 1470562736
>>> print(id(-2), id(-2))
1470562672 1470562672
>>> print(id(l1[-1]), id(l2[-1]))
1477183162120 1477183162312

深copy，列表是在内存中重新创建的，列表中可变的数据类型是重新创建的，列表中的不可变的数据类型是公用的。

posted @ 2019-03-14 22:15 Xiao_Xu 阅读(292) 评论(0) 编辑收藏举报

刷新页面返回顶部

Xiao_Xu

python 文件操作和深浅copy

一、编码的进阶

二、文件操作初识

三、深浅copy

公告