fall detection

#!/usr/bin/env python
# coding: utf-8

# In[4]:


import sys
from statistics import median
from statistics import stdev
from scipy.stats import kurtosis,skew
import math
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import csv
from sklearn import svm
from random import randint
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn import datasets
from joblib import dump, load


# In[42]:


def feature(FOLDER, label):

    FALL_SIZE = 1200

    df_list = []
    sum_df = pd.DataFrame()
    #sum_df = df_.fillna(0) # with 0s rather than NaNs
    PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/Annotated Data/'
    OUTPUT_PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/train_data_trainsform_lstm/'
    #FOLDER = 'CSI'
    FILE_PATH = PATH + FOLDER
    OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv'
    count = 0
    final = []
    for file in os.listdir(FILE_PATH):
        #print(file)
        df = pd.read_csv(os.path.join(FILE_PATH,file))
        df = df[(df['label'] == FOLDER).idxmax():]

        df = df.reset_index(drop=True)
        #print(df.head())
        #print(df.count())
    #    if not df.empty:
    #        df_list.append(df)
        #print(df.dtypes)
        print(file)

        df["acc_x"]= df["acc_x"].astype('float64')
        df["acc_y"]= df["acc_y"].astype('float64')
        df["acc_z"]= df["acc_z"].astype('float64')
        #print(df.dtypes)
        df['mag'] = df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z']
        #mag = math.sqrt(df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z'])

        #print(df.head())
        OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '/' + file
        OUTPUT_FOLDER_PATH = OUTPUT_PATH + FOLDER
        if not os.path.exists(OUTPUT_FOLDER_PATH):
            os.makedirs(OUTPUT_FOLDER_PATH)
        #if(os.path.isdir(OUTPUT_FOLDER_PATH)):
        #else:
        #    os.mkdir(OUTPUT_FOLDER_PATH)
        
        exists = os.path.isfile(OUTPUT_FILE_PATH)
        if(exists):
            print(OUTPUT_FILE_PATH + " exist , skip...")
        else:
            df.to_csv(OUTPUT_FILE_PATH,index=False)
        
        

        #X = [float(df[k][2]) for k in range(1,1+50)]
        X = []
        Y = []
        Z = []
        MAG = []
        ymag = []
        #X_list = df["acc_x"].tolist()
        df_count = df.shape[0]
        print(df_count)
        if(df_count<FALL_SIZE):
            FALL_SIZE = df_count
        for i in range(0,FALL_SIZE):
            #label = data.iloc[i, 0]
            X.append(df.iloc[i, 2])
            Y.append(df.iloc[i, 3])
            Z.append(df.iloc[i, 4])
            MAG.append(df.iloc[i, 12])
            ymag.append(float(Y[i])/float(math.sqrt(MAG[i])))

        #for fast test
        TA = [math.asin(ymag[k]) for k in range(0,FALL_SIZE)]
        avgX = sum(X)/len(X)
        avgY = sum(Y)/len(Y)
        avgZ = sum(Z)/len(Z)
        medianX = median(X)
        medianY = median(Y)
        medianZ = median(Z)
        stdX = stdev(X)
        stdY = stdev(Y)
        stdZ = stdev(Z)
        skewX = skew(X)
        skewY = skew(Y)
        skewZ = skew(Z)
        kurtosisX = kurtosis(X)
        kurtosisY = kurtosis(Y)
        kurtosisZ = kurtosis(Z)
        minX = min(X)
        minY = min(Y)
        minZ = min(Z)
        maxX = max(X)
        maxY = max(Y)
        maxZ  = max(Z)
        slope = math.sqrt((maxX - minX)**2 + (maxY - minY)**2 + (maxZ - minZ)**2)
        meanTA = sum(TA)/len(TA)
        stdTA = stdev(TA)
        skewTA = skew(TA)
        kurtosisTA = kurtosis(TA)

        absX = sum([abs(X[k] - avgX) for k in range(0,FALL_SIZE) ]) / len(X)
        absY = sum([abs(Y[k] - avgY) for k in range(0,FALL_SIZE) ]) / len(Y)
        absZ = sum([abs(Z[k] - avgZ) for k in range(0,FALL_SIZE) ]) / len(Z)
        abs_meanX = sum([abs(X[k]) for k in range(0,FALL_SIZE)])/len(X)
        abs_meanY = sum([abs(Y[k]) for k in range(0,FALL_SIZE)])/len(Y)
        abs_meanZ = sum([abs(Z[k]) for k in range(0,FALL_SIZE)])/len(Z)
        abs_medianX = median([abs(X[k]) for k in range(0,FALL_SIZE)])
        abs_medianY = median([abs(Y[k]) for k in range(0,FALL_SIZE)])
        abs_medianZ = median([abs(Z[k]) for k in range(0,FALL_SIZE)])
        abs_stdX = stdev([abs(X[k]) for k in range(0,FALL_SIZE)])
        abs_stdY = stdev([abs(Y[k]) for k in range(0,FALL_SIZE)])
        abs_stdZ = stdev([abs(Z[k]) for k in range(0,FALL_SIZE)])
        abs_skewX = skew([abs(X[k]) for k in range(0,FALL_SIZE)])
        abs_skewY = skew([abs(Y[k]) for k in range(0,FALL_SIZE)])
        abs_skewZ = skew([abs(Z[k]) for k in range(0,FALL_SIZE)])
        abs_kurtosisX = kurtosis([abs(X[k]) for k in range(0,FALL_SIZE)])
        abs_kurtosisY = kurtosis([abs(Y[k]) for k in range(0,FALL_SIZE)])
        abs_kurtosisZ = kurtosis([abs(Z[k]) for k in range(0,FALL_SIZE)])
        abs_minX = min([abs(X[k]) for k in range(0,FALL_SIZE)])
        abs_minY = min([abs(Y[k]) for k in range(0,FALL_SIZE)])
        abs_minZ = min([abs(Z[k]) for k in range(0,FALL_SIZE)])
        abs_maxX = max([abs(X[k]) for k in range(0,FALL_SIZE)])
        abs_maxY = max([abs(Y[k]) for k in range(0,FALL_SIZE)])
        abs_maxZ  = max([abs(Z[k]) for k in range(0,FALL_SIZE)])
        abs_slope = math.sqrt((abs_maxX - abs_minX)**2 + (abs_maxY - abs_minY)**2 + (abs_maxZ - abs_minZ)**2)
        meanMag = sum(MAG)/len(MAG)
        stdMag = stdev(MAG)
        minMag = min(MAG)
        maxMag = max(MAG)
        DiffMinMaxMag = maxMag - minMag
        ZCR_Mag = 0
        AvgResAcc = (1/len(MAG))*sum(MAG)
        
        #label = 0

        #print(minX)

        test = [avgX,avgY,avgZ,medianX,medianY,medianZ,stdX,stdY,stdZ,skewX,skewY,skewZ,kurtosisX,kurtosisY,kurtosisZ,
                                          minX,minY,minZ,maxX,maxY,maxZ,slope,meanTA,stdTA,skewTA,kurtosisTA,absX,
                                          absY,absZ,abs_meanX,abs_meanY,abs_meanZ,abs_medianX,abs_medianY,abs_medianZ,
                                          abs_stdX,abs_stdY,abs_stdZ,abs_skewX,abs_skewY,abs_skewZ,abs_kurtosisX,
                                          abs_kurtosisY,abs_kurtosisZ,abs_minX,abs_minY,abs_minZ,abs_maxX,abs_maxY
                                          ,abs_maxZ,abs_slope,meanMag,stdMag,minMag,maxMag,DiffMinMaxMag,ZCR_Mag,AvgResAcc,label]
        final.append(test)
        #count = count +1
        #if(count > 1):
        #    break
    return final


# In[59]:


OUTPUT_PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/featured/'
FOLDER = 'WAL'
label = 0
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv'
if(os.path.isfile(OUTPUT_FILE_PATH)):
    os.remove(OUTPUT_FILE_PATH)
            
with open(OUTPUT_FILE_PATH,'a') as f1:
    writer=csv.writer(f1, delimiter=',',lineterminator='\n',)
    writer.writerow(['AvgX','AvgY','AvgZ','MedianX','MedianY','MedianZ','StdX',
    'StdY','StdZ','SkewX','SkewY','SkewZ','KurtosisX','KurtosisY','KurtosisZ','MinX','MinY',
    'MinZ','MaxX','MaxY','MaxZ','Slope','MeanTA','StdTA','SkewTA','KurtosisTA',
    'AbsX','AbsY','AbsZ','AbsMeanX','AbsMeanY','AbsMeanZ','AbsMedianX','AbsMedianY','AbsMedianZ',
    'AbsStdX','AbsStdY','AbsStdZ','AbsSkewX','AbsSkewY','AbsSkewZ',
    'AbsKurtosisX','AbsKurtosisY','AbsKurtosisZ','AbsMinX','AbsMinY','AbsMinZ',
    'AbsMaxX','AbsMaxY','AbsMaxZ','AbsSlope','MeanMag',
    'StdMag','MinMag','MaxMag','DiffMinMaxMag','ZCR_Mag','AverageResultantAcceleration','label'])
    lala = feature(FOLDER, label)
    data_len = len(lala)
    for p in range(0,data_len):
        writer.writerow(lala[p])
        
print("total ", data_len," records process done")


# In[2]:


def get_all_data():
    PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/featured/'

    fs = os.listdir(PATH)
    all_data = pd.DataFrame()
    count = 0
    for f in fs:
        file_path = os.path.join(PATH, f)
        #print(file_path)
        if 'csv' in f:
            #data = pd.read_csv(file_path, index_col=False, nrows=200, low_memory=False)
            data = pd.read_csv(file_path, index_col=False, low_memory=False)
            #data.info()
            data = data.iloc[0:,0:59]
            #data.info()
            #data = data.fillna(method='ffill')
            #print(data.dtypes)
            #data = data.convert_objects(convert_numeric=True)
            #print(data.dtypes)
            #break
            all_data = all_data.append(data)
            #for fast test
            #break
            #count = count +1
            #if(count > 5):
            #    break

    count_row = all_data.shape[0]
    #print(count_row)
    count_row = all_data.shape[1]
    #print(count_row)
    np.random.shuffle(all_data.values)
    count_row = all_data.shape[1]
    #print(count_row)
    return all_data


# In[16]:


#train model

all_data = get_all_data() 
#print(all_data.head())
_all_data_x = []
_all_data_y = []    
count = all_data.shape[0] 
count1 = all_data.shape[1]

for i in range(0,count):
    _all_data_x.append(all_data.iloc[i, 0:58])
    _all_data_y.append(all_data.iloc[i, 58:59])

#print(_all_data_x[0])
#print(_all_data_y[0])

X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.2, random_state=42)


clf = svm.SVC(gamma='scale')
clf.fit(X_train, y_train) 


test_count = len(X_test)

print(test_count)

y_predict = clf.predict(X_test)

score = roc_auc_score(y_test, y_predict)

#print(y_predict)
#print("actual result")
#print(y_test)
#for i in range(0, test_count):
#    print("actual vs predict", clf.predict(X_test[i]), ":", y_test[i])
print(score)

precision, recall, thresholds = precision_recall_curve(y_test, y_predict)
f1 = f1_score(y_test, y_predict)
auc = auc(recall, precision)
print("precision is ", precision, "recall is ", recall, "thresholds is " , thresholds)
print("f1 is ", f1, "auc is ", auc)

#print(y_predict)

#print(y_test)
    
print("done...")

#Persistence the model
dump(clf, 'fall_detect_svm.joblib') 
#this is how to load the model
#clf_load = load('fall_detect_svm.joblib') 

# for i in range(15):
#     _test_x = []S
#     _test_y = [] 
#     test_index = randint(0, count)
#     print(randint(0, 9))

#     _test_x.append(all_data.iloc[test_index, 0:58])
#     _test_y.append(all_data.iloc[test_index, 58:59])

#     print("actual vs predict", clf.predict(_test_x), ":", _test_y[0])


# In[5]:


#load the model to test

all_data = get_all_data() 
#print(all_data.head())
_all_data_x = []
_all_data_y = []    
count = all_data.shape[0] 
count1 = all_data.shape[1]

for i in range(0,count):
    _all_data_x.append(all_data.iloc[i, 0:58])
    _all_data_y.append(all_data.iloc[i, 58:59])

#print(_all_data_x[0])
#print(_all_data_y[0])

clf_load = load('fall_detect_svm.joblib') 

X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.1, random_state=42)

y_predict = clf_load.predict(X_train)

score = roc_auc_score(y_train, y_predict)

print(score)


# In[ ]:

 

posted @ 2019-04-11 16:16  调皮的贝叶斯  阅读(574)  评论(0编辑  收藏  举报