中位数
将多个样本按照大小排序,取中间位置的元素。
若样本数量为奇数,中位数为最中间的元素
1 2000 3000 4000 10000000
若样本数量为偶数,中位数为最中间的两个元素的平均值
1 2000 3000 4000 5000 10000000
案例:分析中位数的算法,测试numpy提供的中位数API:
import numpy as np closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(6), unpack=True) size = closing_prices.size sorted_prices = np.msort(closing_prices) median = (sorted_prices[int((size - 1) / 2)] + sorted_prices[int(size / 2)]) / 2 print(median) median = np.median(closing_prices) print(median)
#中位数 import numpy as np import matplotlib.pyplot as mp import datetime as dt import matplotlib.dates as md def dmy2ymd(dmy): """ 把日月年转年月日 :param day: :return: """ dmy = str(dmy, encoding='utf-8') t = dt.datetime.strptime(dmy, '%d-%m-%Y') s = t.date().strftime('%Y-%m-%d') return s dates, opening_prices, \ highest_prices, lowest_prices, \ closing_prices ,volumes= \ np.loadtxt('aapl.csv', delimiter=',', usecols=(1, 3, 4, 5, 6,7), unpack=True, dtype='M8[D],f8,f8,f8,f8,f8', converters={1: dmy2ymd}) # 日月年转年月日 # 绘制收盘价的折现图 mp.figure('APPL', facecolor='lightgray') mp.title('APPL', fontsize=18) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) mp.grid(linestyle=":") # 设置刻度定位器 # 每周一一个主刻度,一天一个次刻度 ax = mp.gca() ma_loc = md.WeekdayLocator(byweekday=md.MO) ax.xaxis.set_major_locator(ma_loc) ax.xaxis.set_major_formatter(md.DateFormatter('%Y-%m-%d')) ax.xaxis.set_minor_locator(md.DayLocator()) # 修改dates的dtype为md.datetime.datetiem dates = dates.astype(md.datetime.datetime) mp.plot(dates, closing_prices, color='dodgerblue', linewidth=2, linestyle='--', alpha=0.8, label='APPL Closing Price') #中位数 # median = np.median(closing_prices) sorted_prices = np.msort(closing_prices) size = sorted_prices.size median =(sorted_prices[int(size/2)]+sorted_prices[int((size-1)/2)])/2 print(median)#352.055 mp.hlines(median,dates[0],dates[-1],color='gold',label='median') mp.legend() mp.gcf().autofmt_xdate() mp.show()