libsvm 之 easy.py（流程化脚本）注释

鉴于该脚本的重要性，很有必要对该脚本做一个全面的注释，以便可以灵活的使用libsvm。

#!/usr/bin/env python
# 这种设置python路径的方法更为科学

import sys
import os
from subprocess import *

# 输入参数太少就会提示程序用法
if len(sys.argv) <= 1:
    print('Usage: {0} training_file [testing_file]'.format(sys.argv[0]))
    raise SystemExit

# svm, grid, and gnuplot executable files

is_win32 = (sys.platform == 'win32')
if not is_win32:
    # Linux系统下的程序路径配置
    svmscale_exe = "../svm-scale"
    svmtrain_exe = "../svm-train"
    svmpredict_exe = "../svm-predict"
    grid_py = "./grid.py"
    gnuplot_exe = "/usr/bin/gnuplot"   #需要修改次路径，gnuplot为可执行程序的路径，不是文件夹路径
else:
    # windows系统下的程序路径配置
    svmscale_exe = r"..\windows\svm-scale.exe"
    svmtrain_exe = r"..\windows\svm-train.exe"
    svmpredict_exe = r"..\windows\svm-predict.exe"
    gnuplot_exe = r"C:\gnuplot\bin\gnuplot.exe"
    grid_py = r".\grid.py"

assert os.path.exists(svmscale_exe),"svm-scale executable not found"
assert os.path.exists(svmtrain_exe),"svm-train executable not found"
assert os.path.exists(svmpredict_exe),"svm-predict executable not found"
assert os.path.exists(gnuplot_exe),"gnuplot executable not found"
assert os.path.exists(grid_py),"grid.py not found"

# 创建训练数据集相关的文件：".scale"，".model"，".range"三个文件
train_pathname = sys.argv[1]
assert os.path.exists(train_pathname),"training file not found"
file_name = os.path.split(train_pathname)[1]
scaled_file = file_name + ".scale"
model_file = file_name + ".model"
range_file = file_name + ".range"

# 创建测试数据集相关文件：".scale"，".predict"两个文件
if len(sys.argv) > 2:
    test_pathname = sys.argv[2]
    file_name = os.path.split(test_pathname)[1]
    assert os.path.exists(test_pathname),"testing file not found"
    scaled_test_file = file_name + ".scale"
    predict_test_file = file_name + ".predict"

# 流程化命令一：svm-scale缩放，训练集缩放，参数如下：
cmd = '{0} -s "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, train_pathname, scaled_file)
print('Scaling training data...')
Popen(cmd, shell = True, stdout = PIPE).communicate()    

# 流程化命令二：参数选优，使用grid.py脚本，进行交叉验证，参数如下:
cmd = '{0} -svmtrain "{1}" -gnuplot "{2}" "{3}"'.format(grid_py, svmtrain_exe, gnuplot_exe, scaled_file)
print('Cross validation...')
f = Popen(cmd, shell = True, stdout = PIPE).stdout

line = ''
while True:
    last_line = line
    line = f.readline()
    if not line: break
c,g,rate = map(float,last_line.split())
# 输出最优参数c，g
print('Best c={0}, g={1} CV rate={2}'.format(c,g,rate))

# 流程化命令三：svm-train训练，参数设置如下
cmd = '{0} -c {1} -g {2} "{3}" "{4}"'.format(svmtrain_exe,c,g,scaled_file,model_file)
print('Training...')
Popen(cmd, shell = True, stdout = PIPE).communicate()
print('Output model: {0}'.format(model_file))
if len(sys.argv) > 2:    
    # 流程化命令四：svm-scale缩放，测试数据缩放，参数设置如下：
    cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_pathname, scaled_test_file)
    print('Scaling testing data...')
    Popen(cmd, shell = True, stdout = PIPE).communicate()    

    # 流程化命令五：svm-predict预测，参数设置如下：    
    cmd = '{0} "{1}" "{2}" "{3}"'.format(svmpredict_exe, scaled_test_file, model_file, predict_test_file)
    print('Testing...')
    Popen(cmd, shell = True).communicate()    

    print('Output prediction: {0}'.format(predict_test_file))

posted @ 2016-07-13 23:44 Life·Intelligence 阅读(2038) 评论(0) 编辑收藏举报

刷新页面返回顶部

（评论功能已被禁用）

阅读排行：
· 全程不用写代码，我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了，比商业数据库还牛
· .NET10 - 预览版1新功能体验（一）

2025年3月

日

一

二

三

四

五

六

Digital-LI

libsvm 之 easy.py（流程化脚本）注释

搜索

我的标签

积分与排名

阅读排行榜