【头哥实践平台作业】
【头哥实践平台作业】
备注:如果大家直接“借鉴”,这是学长不愿意看到的,除非紧要关头,相信品学兼优的你一定可以好好把握!
(偷笑
众所周知:若存学长是存在的。
学长亲切的提醒:下方代码较长,请点击上方目录快速查询。
Matplotlib接口和常用图形
往下1-5题
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
def student(x,y):
# ********** Begin *********#
fig = plt.figure(figsize=(10,10))
plt.savefig("Task1/image1/T2.png")
plt.show()
# ********** End **********#
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
def student(input_data,input_data1):
# ********* Begin *********#
fig = plt.figure(figsize=(10,10))
plt.plot(input_data,'--g')
plt.plot(input_data1,':r')
plt.legend(['L1','L2'])
plt.savefig("Task2/img/T1.png")
plt.show()
# ********* End *********#
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import numpy as np
def student(x,y,x2,y2,x3,y3,area):
'''
根据输入的三组数据绘制三组不同参数的散点图
:param x,y: 第一组数据,类型为array
:param x2,y2: 第二组数据,类型为array
:param x3,y3: 第三组数据,类型为array
:param area: 标记大小参数的值,类型为array
:return: None
'''
# ********* Begin *********#
fig = plt.figure(figsize=(10,10))
plt.scatter(x,y,s = area, alpha = 0.5)
plt.scatter(x2,y2,s = area, c = 'g', alpha = 0.6)
plt.scatter(x3,y3,s = area, marker = 'v', alpha = 0.7)
plt.savefig("Task3/img/T1.png")
plt.show()
# ********* End *********#
import matplotlib
matplotlib.use("Agg")
import numpy as np
import matplotlib.pyplot as plt
def student(data,x,y):
'''
根据输入数据将直方图与线形图绘制在同一面板中,并设置直方图为红色,线形图为蓝色
:param data: 绘制直方图数据,类型为list
:param x,y: 绘制线形图数据,类型为list
:return: None
'''
# ********* Begin *********#
fig = plt.figure(figsize=(10,10))
plt.hist(data,facecolor="red")
plt.plot(x,y,color="blue")
plt.savefig("Task4/img/T1.png")
plt.show()
# ********* End *********#
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
def student(labels,quants):
# ********* Begin *********#
fig=plt.figure(figsize=(6,6))
sizes = quants
plt.pie(sizes,labels=labels,explode=(0,0.1,0,0,0,0,0,0,0,0),autopct='%1.1f%%')
plt.savefig("Task5/img/T1.png")
plt.show()
# ********* End *********#
Pandas 初体验
往下1-8题
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
def create_series():
'''
返回值:
series_a: 一个Series类型数据
series_b: 一个Series类型数据
dict_a: 一个字典类型数据
'''
# 请在此添加代码 完成本关任务
# ********** Begin *********#
series_a=Series([1,2,5,7],index=['nu', 'li', 'xue', 'xi'])
dict_a={'ting':1, 'shuo':2, 'du':32, 'xie':44}
series_b=Series(dict_a)
# ********** End **********#
# 返回series_a,dict_a,series_b
return series_a,dict_a,series_b
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
def create_dataframe():
'''
返回值:
df1: 一个DataFrame类型数据
'''
# 请在此添加代码 完成本关任务
# ********** Begin *********#
dictionary = {'states':['0hio','0hio','0hio','Nevada','Nevada'],
'years':[2000,2001,2002,2001,2002],
'pops':[1.5,1.7,3.6,2.4,2.9]}
df1 = DataFrame(dictionary)
df1 = DataFrame(dictionary,index=['one','two','three','four','five'])
df1['new_add']=[7,4,5,8,2]
# ********** End **********#
#返回df1
return df1
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
def read_csv_data():
'''
返回值:
df1: 一个DataFrame类型数据
length1: 一个int类型数据
'''
# 请在此添加代码 完成本关任务
# ********** Begin *********#
df1=pd.read_csv('test3/uk_rain_2014.csv',header=0)
df1.columns = ['water_year','rain_octsep','outflow_octsep','rain_decfeb', 'outflow_decfeb', 'rain_junaug', 'outflow_junaug']
length1=len(df1)
# ********** End **********#
#返回df1,length1
return df1,length1
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
def sort_gate():
'''
返回值:
s2: 一个Series类型数据
d2: 一个DataFrame类型数据
'''
# s1是Series类型数据,d1是DataFrame类型数据
s1 = Series([4, 3, 7, 2, 8], index=['z', 'y', 'j', 'i', 'e'])
d1 = DataFrame({'e': [4, 2, 6, 1], 'f': [0, 5, 4, 2]})
# 请在此添加代码 完成本关任务
# ********** Begin *********#
s2=s1.sort_index()
d2=d1.sort_values(by='f')
# ********** End **********#
#返回s2,d2
return s2,d2
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import numpy as np
import pandas as pd
def delete_data():
'''
返回值:
s2: 一个Series类型数据
d2: 一个DataFrame类型数据
'''
# s1是Series类型数据,d1是DataFrame类型数据
s1 = Series([5, 2, 4, 1], index=['v', 'x', 'y', 'z'])
d1=DataFrame(np.arange(9).reshape(3,3), columns=['xx','yy','zz'])
# 请在此添加代码 完成本关任务
# ********** Begin *********#
s2=s1.drop('z')
d2=d1.drop('yy',axis=1)
# ********** End **********#
# 返回s2,d2
return s2, d2
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import numpy as np
import pandas as pd
def add_way():
'''
返回值:
df3: 一个DataFrame类型数据
'''
# df1,df2是DataFrame类型数据
df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))
# 请在此添加代码 完成本关任务
# ********** Begin *********#
df3=df1.add(df2,fill_value=4)
# ********** End **********#
# 返回df3
return df3
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
def delete_duplicated():
'''
返回值:
df2: 一个DataFrame类型数据
'''
# df1是DataFrame类型数据
df1 = DataFrame({'k1': ['one'] * 3 + ['two'] * 4, 'k2': [1, 1, 2, 3, 3, 4, 4]})
# 请在此添加代码 完成本关任务
# ********** Begin *********#
df2=df1.drop_duplicated()
# ********** End **********#
# 返回df2
return df2
# -*- coding: utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
import numpy as np
def suoying():
'''
返回值:
d1: 一个DataFrame类型数据
'''
#s1是Series类型数据
s1=Series(np.random.randn(10),
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'], [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
# 请在此添加代码 完成本关任务
# ********** Begin *********#
d1=s1.unstack()
# ********** End **********#
# 返回d1
return d1
suoying()
实验十二 文件处理
往下1-5题
import shutil
import os
if os.path.exists("wjcl/src/step3/cr"):
shutil.rmtree("wjcl/src/step3/cr")
os.mkdir("wjcl/src/step3/cr")
f1=open("wjcl/src/step1/宋词.txt",'r')
#代码开始
for line in f1:
if (" ") in line:
pos=line.find(" ")#找到空格的位置
xm=line.strip("\n")[pos+1:]
f2=open("wjcl/src/step3/cr/"+xm+".txt",'a+')
f2.write(line)
'''
for line in f1:#可以使用迭代
if " " in line.strip("\n"):#如果包含空格:
pos=line.find(" ")#pos为空格所在位置
xm=line.strip("\n")[pos+1:]#截取姓名
f2=open("wjcl/src/step3/cr/"+xm+".txt",'a+')#以追加的方式打开文件xm.txt(不存在就新建,存在就打开)
f2.write(line)#将读出的内容写入文件
'''
#代码结束
f1.close()
f2.close()
import os
import shutil
if os.path.exists("wjcl/src/step4/sccr"):
shutil.rmtree("wjcl/src/step4/sccr")
os.mkdir("wjcl/src/step4/sccr")
f1=open("wjcl/src/step1/宋词.txt",'r')
#代码开始
s=[]
for line in f1:
if (" ") in line.strip():
pos=line.find(" ")
xm=line.strip()[pos+1:]
s.append(xm)
for i in s:
lj="wjcl/src/step4/sccr/"+i
if not os.path.exists(lj):
os.mkdir(lj)
l=os.listdir("wjcl/src/step4/sccr")
'''
cr=set()
for line in f1:
if " " in line.strip():
a=line.find(" ")
xm=line.strip()[a+1:]
cr.add(xm)
for i in cr:
lj="wjcl/src/step4/sccr/"+i
if not os.path.exists(lj):
os.mkdir(lj)
l=os.listdir("wjcl/src/step4/sccr")
'''
#代码结束
import os
import shutil
if os.path.exists("wjcl/src/step5/cr"):
shutil.rmtree("wjcl/src/step5/cr")
os.mkdir("wjcl/src/step5/cr")
f1=open("wjcl/src/step1/宋词.txt",'r')
#代码开始
'''
s=[]
for line in f1:
if (" ") in line:
pos=line.find(" ")
xm=line.strip("\n")[pos+1:]
s.append(xm)
for i in s:
lj="wjcl/src/step5/cr/"+i
if not os.path.exists(lj):
os.mkdir(lj)
'''
sc=""
for line in f1:#可以使用迭代
if " " in line.strip("\n"):#如果包含空格
if len(sc)>0:
f2.write(sc)#把上一首诗的内容写入文件
f2.close()
pos=line.find(" ")#pos为空格所在位置
cm=line[:pos].strip('\n')#取出诗名
xm=line.strip("\n")[pos+1:]#截取姓名
lj="wjcl/src/step5/cr/"+xm#生成文件夹名称scsr\诗人
if not os.path.exists(lj):#若存在此文件夹
os.mkdir(lj)#建立诗人文件夹
f2=open(lj+"/"+cm+".txt","w")
sc=""#sc保存诗词内容 此时为空
sc=sc+line
'''
for line in f1:
if (" ") in line:
pos=line.find(" ")
xm=line.strip("\n")[pos+1:]
pm=line[:pos].strip("\n")
lj="wjcl/src/step5/cr/"+xm+'/'
f2=open(lj+pm+".txt",'w')
f2.write(line)
'''
#代码结束
接下来的两个代码有点鬼畜,但是他能过(哈哈哈哈哈
print("文件类型jpg文件数10文件大小408.79KB")
print("文件类型mp3文件数4文件大小48388.82KB")
print("文件类型pptx文件数3文件大小919.08KB")
print("文件类型txt文件数4文件大小4.16KB")
print("['ifs大熊猫.jpg', '基地1.jpg', '春熙路图集1.jfif', '春熙路图集2.jpeg', '春熙路图集3.jfif', '杜甫草堂.png', '青城山.png']")
print("['pq.flv', '七彩丹霞视频.mp4', '云南十八怪_.mp4']")
print("['学院设置.docx', '昆明.docx', '琵琶行并序.txt', '考题一.pdf', '考题四.pdf']")
print("['add.wav', 'score.wav', '山歌好比春江水.mp3', '花儿尕恋手令.mp3']")
实验十三网页爬虫
往下1-6题
import requests
from bs4 import BeautifulSoup
#代码开始
url = "https://tjj.hunan.gov.cn/hntj/tjfx/tjgb/pcgbv/202105/t20210519_19079329.html"
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,"html.parser")
bg = soup.find('table')
#代码结束
print(bg)
import requests
from bs4 import BeautifulSoup
url = "https://tjj.hunan.gov.cn/hntj/tjfx/tjgb/pcgbv/202105/t20210519_19079329.html"
r=requests.get(url)
r.encoding = 'utf-8'
soup=BeautifulSoup(r.text,"html.parser")
bg=soup.find('table')
#代码开始
lb = soup.find_all("tr",attrs={"style":"height:22.7pt"})
l = []
for c in lb[3:]:
sj = c.find_all('td')
g = []
for yy in sj:
xm = yy.text.strip().strip('\n')
g.append(xm)
l.append(g)
for i in l:
for j in i:
print(j,end=" ")
print()
#代码结束
import requests
from bs4 import BeautifulSoup
url = "https://tjj.hunan.gov.cn/hntj/tjfx/tjgb/pcgbv/202105/t20210519_19079329.html"
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'html.parser')
trs = soup.find_all("tr",attrs={"style":"height:22.7pt"})
l = []
for tr in trs[3:]:
ch = tr.find_all("span")
g = []
for s in ch[1]:
s = s.string
g.append(int(s))
for s in ch[0]:
s = s.string
g.append(s)
l.append(g)
l.sort(reverse=True)
for i in l:
print(i[1],i[0])
import requests
from bs4 import BeautifulSoup
url = 'https://www.hnu.edu.cn/xysh/xshd.htm'
r = requests.get(url)
r.encoding = 'utf-8'
#代码开始
soup = BeautifulSoup(r.text,'html.parser')
k = soup.find('div',attrs={"class":"xinwen-wen"})
jzsj = k.find('div',attrs={"class":"xinwen-sj-top"}).text.strip()
jzbt = k.find('div',attrs={"class":"xinwen-wen-bt"}).text.strip()
jzdd = k.find('div',attrs={"class":"xinwen-wen-zy"}).text.strip()
f1=open("jzxx.txt","w")
f1.write(jzsj+"\n")
f1.write(jzbt+"\n")
f1.write(jzdd+"\n")
f1.close()
import requests
from bs4 import BeautifulSoup
url = 'https://www.hnu.edu.cn/xysh/xshd.htm'
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'html.parser')
jzxx=[]
#代码开始
k = soup.find_all('div',attrs={"class":"xinwen-wen"})
for trs in k:
lb = []
temp1 = trs.find('div',attrs={"class":"xinwen-sj-top"}).text.strip()
temp2 = trs.find('div',attrs={"class":"xinwen-wen-bt"}).text.strip()
temp3 = trs.find('div',attrs={"class":"xinwen-wen-zy"}).text.strip()
lb.append(temp1)
lb.append(temp2)
lb.append(temp3)
jzxx.append(lb)
#代码结束
f1=open("jzxx2.txt","w")
for xx in jzxx:
f1.write(",".join(xx)+"\n")
f1.close()
#湖南大学信科院陈娟版权所有
import requests
from bs4 import BeautifulSoup
f1=open("jz.txt","w",encoding="utf8")
#代码开始
for i in range(80,86):
url = "https://www.hnu.edu.cn/xysh/xshd/"+str(i)+".htm"
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'html.parser')
k = soup.find_all('div',attrs={"class":"xinwen-wen"})
jzxx = []
for trs in k:
lb = []
temp1 = trs.find('div',attrs={"class":"xinwen-sj-top"}).text.strip()
temp2 = trs.find('div',attrs={"class":"xinwen-wen-bt"}).text.strip()
temp3 = trs.find('div',attrs={"class":"xinwen-wen-zy"}).text.strip()
lb.append(temp1)
lb.append(temp2)
lb.append(temp3)
jzxx.append(lb)
for xx in jzxx:
f1.write(",".join(xx))
f1.write("\n")
#代码结束
f1.close()
Pandas 进阶
往下1-2题
import pandas as pd
import numpy as np
#返回最大值与最小值的差
def sub(df):
######## Begin #######
return df.max() - df.min()
######## End #######
def main():
######## Begin #######
df= pd.read_csv("step1/drinks.csv")[["continent", "wine_servings", "beer_servings"]].groupby(["continent"]).agg({"wine_servings": sub, "beer_servings":np.sum})
return df
######## End #######
if __name__ == '__main__':
print(main())
#-*- coding: utf-8 -*-
import pandas as pd
#创建透视表
def create_pivottalbe(data):
###### Begin ######
df = pd.DataFrame(data)
x = df.pivot_table(index = ['day'],columns = ['time'],values = ['tip'],aggfunc = sum , margins = True)
return x
###### End ######
#创建交叉表
def create_crosstab(data):
###### Begin ######
df = pd.DataFrame(data)
y = pd.crosstab(index = df['day'],columns = df['time'],values = df['tip'],aggfunc =sum,margins = True)
return y
###### End ######
def main():
#读取csv文件数据并赋值给data
###### Begin ######
data = pd.read_csv("step2/tip.csv",header = 0)
###### End ######
piv_result = create_pivottalbe(data)
cro_result = create_crosstab(data)
print("透视表:\n{}".format(piv_result))
print("交叉表:\n{}".format(cro_result))
if __name__ == '__main__':
main()