快速堆栈模糊算法

上一篇快速高斯模糊的原作者也有另一个比较快速的模糊算法Stack Blur,字面意思为堆栈模糊。

源地址为:http://incubator.quasimondo.com/processing/fast_blur_deluxe.php

这个算法在多个平台上都有实现,安卓以及IOS,JS等。

processing源码:http://incubator.quasimondo.com/processing/stackblur.pde

效果图:

转为C语言实现版本。

代码如下:
// Stack Blur v1.0
//
// Author: Mario Klingemann <mario@quasimondo.com>
// http://incubator.quasimondo.com
// created Feburary 29, 2004
// C version updated and performance optimization by tntmonks(http://tntmonks.cnblogs.com)

// This is a compromise between Gaussian Blur and Box blur
// It creates much better looking blurs than Box Blur, but is
// 7x faster than my Gaussian Blur implementation.
//
// I called it Stack Blur because this describes best how this
// filter works internally: it creates a kind of moving stack
// of colors whilst scanning through the image. Thereby it
// just has to add one new block of color to the right side
// of the stack and remove the leftmost color. The remaining
// colors on the topmost layer of the stack are either added on
// or reduced by one, depending on if they are on the right or
// on the left side of the stack. 
//
// If you are using this algorithm in your code please add
// the following line:
// 
// Stack Blur Algorithm by Mario Klingemann <mario@quasimondo.com>

#define MAX(x,y) (x>y?x:y) 
#define MIN(x,y) (x>y?y:x)
void fastStackBlur(unsigned char* pix, unsigned int w, unsigned int h, unsigned int comp, int radius) {
	unsigned int wm = w - 1;
	unsigned int hm = h - 1;
	unsigned int imageSize = w * h;
	unsigned int div = radius + radius + 1;

	unsigned char *	rgb = (unsigned char *)malloc(sizeof(unsigned char) * imageSize * 3);
	unsigned char *	r = rgb;
	unsigned char *	g = rgb + imageSize;
	unsigned char *	b = rgb + imageSize * 2;
	int rsum, gsum, bsum, x, y, i, p, yp, yi, yw;

	unsigned int *vmin = (unsigned int *)malloc(MAX(w, h) * sizeof(unsigned int));

	int divsum = (div + 1) >> 1;
	divsum *= divsum;
	int *dv = (int *)malloc(256 * divsum * sizeof(int));
	for (i = 0; i < 256 * divsum; i++) {
		dv[i] = (i / divsum);
	}

	yw = yi = 0;

	int(*stack)[3] = (int(*)[3])malloc(div * 3 * sizeof(int));
	unsigned int stackpointer;
	unsigned int stackstart;
	int *sir;
	int rbs;
	int r1 = radius + 1;
	int routsum, goutsum, boutsum;
	int rinsum, ginsum, binsum;

	for (y = 0; y < h; y++) {
		rinsum = ginsum = binsum = routsum = goutsum = boutsum = rsum = gsum = bsum = 0;
		for (i = -radius; i <= radius; i++) {
			p = yi + (MIN(wm, MAX(i, 0)));
			sir = stack[i + radius];
			sir[0] = pix[(p*comp)];
			sir[1] = pix[(p*comp) + 1];
			sir[2] = pix[(p*comp) + 2];

			rbs = r1 - abs(i);
			rsum += sir[0] * rbs;
			gsum += sir[1] * rbs;
			bsum += sir[2] * rbs;
			if (i > 0) {
				rinsum += sir[0];
				ginsum += sir[1];
				binsum += sir[2];
			}
			else {
				routsum += sir[0];
				goutsum += sir[1];
				boutsum += sir[2];
			}
		}
		stackpointer = radius;

		for (x = 0; x < w; x++) {

			r[yi] = dv[rsum];
			g[yi] = dv[gsum];
			b[yi] = dv[bsum];

			rsum -= routsum;
			gsum -= goutsum;
			bsum -= boutsum;

			stackstart = stackpointer - radius + div;
			sir = stack[stackstart % div];

			routsum -= sir[0];
			goutsum -= sir[1];
			boutsum -= sir[2];

			if (y == 0) {
				vmin[x] = MIN(x + radius + 1, wm);
			}
			p = yw + vmin[x];


			sir[0] = pix[(p*comp)];
			sir[1] = pix[(p*comp) + 1];
			sir[2] = pix[(p*comp) + 2];
			rinsum += sir[0];
			ginsum += sir[1];
			binsum += sir[2];

			rsum += rinsum;
			gsum += ginsum;
			bsum += binsum;

			stackpointer = (stackpointer + 1) % div;
			sir = stack[(stackpointer) % div];

			routsum += sir[0];
			goutsum += sir[1];
			boutsum += sir[2];

			rinsum -= sir[0];
			ginsum -= sir[1];
			binsum -= sir[2];

			yi++;
		}
		yw += w;
	}
	for (x = 0; x < w; x++) {
		rinsum = ginsum = binsum = routsum = goutsum = boutsum = rsum = gsum = bsum = 0;
		yp = -radius * w;
		for (i = -radius; i <= radius; i++) {
			yi = MAX(0, yp) + x;

			sir = stack[i + radius];

			sir[0] = r[yi];
			sir[1] = g[yi];
			sir[2] = b[yi];

			rbs = r1 - abs(i);

			rsum += r[yi] * rbs;
			gsum += g[yi] * rbs;
			bsum += b[yi] * rbs;

			if (i > 0) {
				rinsum += sir[0];
				ginsum += sir[1];
				binsum += sir[2];
			}
			else {
				routsum += sir[0];
				goutsum += sir[1];
				boutsum += sir[2];
			}

			if (i < hm) {
				yp += w;
			}
		}
		yi = x;
		stackpointer = radius;
		for (y = 0; y < h; y++) {

			pix[(yi*comp)] = dv[rsum];
			pix[(yi*comp) + 1] = dv[gsum];
			pix[(yi*comp) + 2] = dv[bsum];
			rsum -= routsum;
			gsum -= goutsum;
			bsum -= boutsum;

			stackstart = stackpointer - radius + div;
			sir = stack[stackstart % div];

			routsum -= sir[0];
			goutsum -= sir[1];
			boutsum -= sir[2];

			if (x == 0) {
				vmin[y] = MIN(y + r1, hm) * w;
			}
			p = x + vmin[y];

			sir[0] = r[p];
			sir[1] = g[p];
			sir[2] = b[p];

			rinsum += sir[0];
			ginsum += sir[1];
			binsum += sir[2];

			rsum += rinsum;
			gsum += ginsum;
			bsum += binsum;

			stackpointer = (stackpointer + 1) % div;
			sir = stack[stackpointer];

			routsum += sir[0];
			goutsum += sir[1];
			boutsum += sir[2];

			rinsum -= sir[0];
			ginsum -= sir[1];
			binsum -= sir[2];

			yi += w;
		}
	}

	free(rgb);
	free(vmin);
	free(dv);
	free(stack);
}

  在博主机子上测试一张5000x3000的图像,模糊半径为10的情况下,耗时1s.

posted @ 2015-10-22 03:07  cpuimage  阅读(1037)  评论(0编辑  收藏  举报