比如这样一种格式化的文本文件,文件说明及下载地址:/pub/machine-learning-databases/statlog/german/ 的索引


这里写图片描述

fid = fopen('german.data', 'r');
C = textscan(fid, '%s, %d, %s, %s, %d, %s, %s, %d, %s, %s, %d, %s, %d, %s, %s, %d, %s, %d, %s, %s, %d');
fclose(fid);

n1 = numel(C);      % 属性列的个数
n2 = numel(C{1});   % 样本的个数

X = zeros(n1, n2);


for i = 1:n1,
    if iscell(C{i})
        for j = 1:n2,
                % A12 ⇒ 2
                if i < 10,
                    d = textscan(C{i}{j}, '%c%c%d');
                else
                % A103 ⇒ 3
                    d = textscan(C{i}{j}, '%c%c%c%d');
                end 
                X(i, j) = d{end};
        end
    else
        X(i, :) = C{i};
    end
end 

y = X(end, :);
X(end, :) = [];

posX = X(:, y == 1);            % 700
negX = X(:, y == 2);            % 300

trainX = [posX(:, 1:350), negX(:, 1:150)];
trainY = [ones(1, 350), 2*ones(1, 150)];

testX = [posX(:, 351:end), negX(:, 151:end)];
testY = [ones(1, 350), 2*ones(1, 150)];

[trainX, s1] = mapminmax(trainX);
testX = mapminmax('apply', testX, s1);
posted on 2017-03-02 23:04  未雨愁眸  阅读(182)  评论(0编辑  收藏  举报