  • 在项目中用到了迭代最小二乘法的方法,几次被专业人问这个学术名词或者算法叫什么?都傻眼

RANSAC是“RANdom SAmple Consensus(随机抽样一致)”的缩写。它可以从一组包含“局外点”的观测数据集中,通过迭代方式估计数学模型的参数。它是一种不确定的算法——它有一定的概率得出一个合理的结果;为了提高概率必须提高迭代次数。







三 普通最小二乘是保守派:在现有数据下,如何实现最优。是从一个整体误差最小的角度去考虑,尽量谁也不得罪。RANSAC是改革派:首先假设数据具有某种特性(目的),为了达到目的,适当割舍一些现有的数据。给出最小二乘拟合(红线)、RANSAC(绿线)对于一阶直线、二阶曲线的拟合对比:






    data – a set of observed data points
    model – a model that can be fitted to data points
    n – the minimum number of data values required to fit the model
    k – the maximum number of iterations allowed in the algorithm
    t – a threshold value for determining when a data point fits a model
    d – the number of close data values required to assert that a model fits well to data

    bestfit – model parameters which best fit the data (or nul if no good model is found)

iterations = 0
bestfit = nul
besterr = something really large
while iterations < k {
    maybeinliers = n randomly selected values from data
    maybemodel = model parameters fitted to maybeinliers
    alsoinliers = empty set
    for every point in data not in maybeinliers {
        if point fits maybemodel with an error smaller than t
             add point to alsoinliers
    if the number of elements in alsoinliers is > d {
        % this implies that we may have found a good model
        % now test how good it is
        bettermodel = model parameters fitted to all points in maybeinliers and alsoinliers
        thiserr = a measure of how well model fits these points
        if thiserr < besterr {
            bestfit = bettermodel
            besterr = thiserr
    increment iterations
return bestfit



# -*- coding: utf-8 -*-
import numpy
import scipy # use numpy if scipy unavailable
import scipy.linalg # use numpy if scipy unavailable
import pylab

## Copyright (c) 2004-2007, Andrew D. Straw. All rights reserved.

def ransac(data,model,n,k,t,d,debug=False,return_all=False):
    """fit model parameters to data using the RANSAC algorithm

This implementation written from pseudocode found at

    data - a set of observed data points # 可观测数据点集
    model - a model that can be fitted to data points #
    n - the minimum number of data values required to fit the model # 拟合模型所需的最小数据点数目
    k - the maximum number of iterations allowed in the algorithm # 最大允许迭代次数
    t - a threshold value for determining when a data point fits a model #确认某一数据点是否符合模型的阈值
    d - the number of close data values required to assert that a model fits well to data
    bestfit - model parameters which best fit the data (or nil if no good model is found)
    iterations = 0
    bestfit = None
    besterr = numpy.inf
    best_inlier_idxs = None
    while iterations < k:
        maybe_idxs, test_idxs = random_partition(n,data.shape[0])
        maybeinliers = data[maybe_idxs,:]
        test_points = data[test_idxs]
        maybemodel =
        test_err = model.get_error( test_points, maybemodel)
        also_idxs = test_idxs[test_err < t] # select indices of rows with accepted points
        alsoinliers = data[also_idxs,:]
        if debug:
            print 'test_err.min()',test_err.min()
            print 'test_err.max()',test_err.max()
            print 'numpy.mean(test_err)',numpy.mean(test_err)
            print 'iteration %d:len(alsoinliers) = %d'%(
        if len(alsoinliers) > d:
            betterdata = numpy.concatenate( (maybeinliers, alsoinliers) )
            bettermodel =
            better_errs = model.get_error( betterdata, bettermodel)
            thiserr = numpy.mean( better_errs )
            if thiserr < besterr:
                bestfit = bettermodel
                besterr = thiserr
                best_inlier_idxs = numpy.concatenate( (maybe_idxs, also_idxs) )
    if bestfit is None:
        raise ValueError("did not meet fit acceptance criteria")
    if return_all:
        return bestfit, {'inliers':best_inlier_idxs}
        return bestfit

def random_partition(n,n_data):
    """return n random rows of data (and also the other len(data)-n rows)"""
    all_idxs = numpy.arange( n_data )
    idxs1 = all_idxs[:n]
    idxs2 = all_idxs[n:]
    return idxs1, idxs2

class LinearLeastSquaresModel:
    """linear system solved using linear least squares

    This class serves as an example that fulfills the model interface
    needed by the ransac() function.

    def __init__(self,input_columns,output_columns,debug=False):
        self.input_columns = input_columns
        self.output_columns = output_columns
        self.debug = debug
    def fit(self, data):
        A = numpy.vstack([data[:,i] for i in self.input_columns]).T
        B = numpy.vstack([data[:,i] for i in self.output_columns]).T
        x,resids,rank,s = scipy.linalg.lstsq(A,B)
        return x
    def get_error( self, data, model):
        A = numpy.vstack([data[:,i] for i in self.input_columns]).T
        B = numpy.vstack([data[:,i] for i in self.output_columns]).T
        B_fit =,model)
        err_per_point = numpy.sum((B-B_fit)**2,axis=1) # sum squared error per row
        return err_per_point

def test():
    # generate perfect input data
    n_samples = 500
    n_inputs = 1
    n_outputs = 1
    A_exact = 20*numpy.random.random((n_samples,n_inputs) ) # x坐标
    perfect_fit = 60*numpy.random.normal(size=(n_inputs,n_outputs) ) # the model(斜率)
    B_exact =,perfect_fit) # y坐标
    assert B_exact.shape == (n_samples,n_outputs) #验证y坐标数组的大小
    #pylab.plot( A_exact, B_exact, 'b.', label='data' )

    # add a little gaussian noise (linear least squares alone should handle this well)
    A_noisy = A_exact + numpy.random.normal(size=A_exact.shape ) # x坐标添加高斯噪声
    B_noisy = B_exact + numpy.random.normal(size=B_exact.shape ) # y坐标....
    #pylab.plot( A_noisy, B_noisy, 'b.', label='data' )

    if 1:
        # add some outliers
        n_outliers = 100 # 500个数据点有100个是putliers
        all_idxs = numpy.arange( A_noisy.shape[0] )
        numpy.random.shuffle(all_idxs) # 索引随机排列
        outlier_idxs = all_idxs[:n_outliers] # 选取all_idxs前100个做outlier_idxs
        non_outlier_idxs = all_idxs[n_outliers:] # 后面的不是outlier_idxs
        A_noisy[outlier_idxs] =  20*numpy.random.random((n_outliers,n_inputs) ) # 外点的横坐标
        B_noisy[outlier_idxs] = 50*numpy.random.normal(size=(n_outliers,n_outputs) ) # 外点的纵坐标
        #pylab.plot( A_noisy, B_noisy, 'b.', label='data' )

    # setup model

    all_data = numpy.hstack( (A_noisy,B_noisy) ) # 组成坐标对
    input_columns = range(n_inputs) # the first columns of the array
    output_columns = [n_inputs+i for i in range(n_outputs)] # the last columns of the array
    debug = False
    model = LinearLeastSquaresModel(input_columns,output_columns,debug=debug)

    linear_fit,resids,rank,s = scipy.linalg.lstsq(all_data[:,input_columns],

    # run RANSAC algorithm
    ransac_fit, ransac_data = ransac(all_data,model,
                                     50, 1000, 7e3, 300, # misc. parameters
    if 1:
        import pylab

        sort_idxs = numpy.argsort(A_exact[:,0]) # 对A_exact排序, sort_idxs为排序索引
        A_col0_sorted = A_exact[sort_idxs] # maintain as rank-2 array

        if 1:
            pylab.plot( A_noisy[:,0], B_noisy[:,0], 'k.', label='data' )
            pylab.plot( A_noisy[ransac_data['inliers'],0], B_noisy[ransac_data['inliers'],0], 'bx', label='RANSAC data' )
            pylab.plot( A_noisy[non_outlier_idxs,0], B_noisy[non_outlier_idxs,0], 'k.', label='noisy data' )
            pylab.plot( A_noisy[outlier_idxs,0], B_noisy[outlier_idxs,0], 'r.', label='outlier data' )
        pylab.plot( A_col0_sorted[:,0],
                    label='RANSAC fit' )
        pylab.plot( A_col0_sorted[:,0],
                    label='exact system' )
        pylab.plot( A_col0_sorted[:,0],
                    label='linear fit' )

if __name__=='__main__':


- 另外针对least square对垂直线段不行。于是引入另外一种total least square



- 其他应用:图像配准以及拼接上得到广泛的应用


