leptonica 学习笔记2——pixBackgroundNormSimple
1 pixBackgroundNormSimple
函数功能:自适应背影标准化
位置:adampmap.c
/*------------------------------------------------------------------* * Adaptive background normalization * *------------------------------------------------------------------*/ /*! * pixBackgroundNormSimple() * * Input: pixs (8 bpp grayscale or 32 bpp rgb) * pixim (<optional> 1 bpp 'image' mask; can be null) * pixg (<optional> 8 bpp grayscale version; can be null) * Return: pixd (8 bpp or 32 bpp rgb), or null on error * * Notes: * (1) This is a simplified interface to pixBackgroundNorm(), * where seven parameters are defaulted. * (2) The input image is either grayscale or rgb. * (3) See pixBackgroundNorm() for usage and function. */ PIX * pixBackgroundNormSimple(PIX *pixs, PIX *pixim, PIX *pixg) { return pixBackgroundNorm(pixs, pixim, pixg, DEFAULT_TILE_WIDTH, DEFAULT_TILE_HEIGHT, DEFAULT_FG_THRESHOLD, DEFAULT_MIN_COUNT, DEFAULT_BG_VAL, DEFAULT_X_SMOOTH_SIZE, DEFAULT_Y_SMOOTH_SIZE); }
由代码可知,此函数是pixBackgroundNorm的简化接口,参数已经由默认设置。默认参数为:
/* Default input parameters for pixBackgroundNormSimple() * Note: * (1) mincount must never exceed the tile area (width * height) * (2) bgval must be sufficiently below 255 to avoid accidental * saturation; otherwise it should be large to avoid * shrinking the dynamic range * (3) results should otherwise not be sensitive to these values */ static const l_int32 DEFAULT_TILE_WIDTH = 10; static const l_int32 DEFAULT_TILE_HEIGHT = 15; static const l_int32 DEFAULT_FG_THRESHOLD = 60; static const l_int32 DEFAULT_MIN_COUNT = 40; static const l_int32 DEFAULT_BG_VAL = 200; static const l_int32 DEFAULT_X_SMOOTH_SIZE = 2; static const l_int32 DEFAULT_Y_SMOOTH_SIZE = 1;
2 pixBackgroundNorm
函数位置:adaptmap.c
/*! * pixBackgroundNorm() * * Input: pixs (8 bpp grayscale or 32 bpp rgb) * pixim (<optional> 1 bpp 'image' mask; can be null) * pixg (<optional> 8 bpp grayscale version; can be null) * sx, sy (tile size in pixels) * thresh (threshold for determining foreground) * mincount (min threshold on counts in a tile) * bgval (target bg val; typ. > 128) * smoothx (half-width of block convolution kernel width) * smoothy (half-width of block convolution kernel height) * Return: pixd (8 bpp or 32 bpp rgb), or null on error * * Notes: * (1) This is a top-level interface for normalizing the image intensity * by mapping the image so that the background is near the input * value 'bgval'. * (2) The input image is either grayscale or rgb. * (3) For each component in the input image, the background value * in each tile is estimated using the values in the tile that * are not part of the foreground, where the foreground is * determined by the input 'thresh' argument. * (4) An optional binary mask can be specified, with the foreground * pixels typically over image regions. The resulting background * map values will be determined by surrounding pixels that are * not under the mask foreground. The origin (0,0) of this mask * is assumed to be aligned with the origin of the input image. * This binary mask must not fully cover pixs, because then there * will be no pixels in the input image available to compute * the background. * (5) An optional grayscale version of the input pixs can be supplied. * The only reason to do this is if the input is RGB and this * grayscale version can be used elsewhere. If the input is RGB * and this is not supplied, it is made internally using only * the green component, and destroyed after use. * (6) The dimensions of the pixel tile (sx, sy) give the amount by * by which the map is reduced in size from the input image. * (7) The threshold is used to binarize the input image, in order to * locate the foreground components. If this is set too low, * some actual foreground may be used to determine the maps; * if set too high, there may not be enough background * to determine the map values accurately. Typically, it's * better to err by setting the threshold too high. * (8) A 'mincount' threshold is a minimum count of pixels in a * tile for which a background reading is made, in order for that * pixel in the map to be valid. This number should perhaps be * at least 1/3 the size of the tile. * (9) A 'bgval' target background value for the normalized image. This * should be at least 128. If set too close to 255, some * clipping will occur in the result. * (10) Two factors, 'smoothx' and 'smoothy', are input for smoothing * the map. Each low-pass filter kernel dimension is * is 2 * (smoothing factor) + 1, so a * value of 0 means no smoothing. A value of 1 or 2 is recommended. */ PIX * pixBackgroundNorm(PIX *pixs, PIX *pixim, PIX *pixg, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, l_int32 bgval, l_int32 smoothx, l_int32 smoothy) { l_int32 d, allfg; PIX *pixm, *pixmi, *pixd; PIX *pixmr, *pixmg, *pixmb, *pixmri, *pixmgi, *pixmbi; PROCNAME("pixBackgroundNorm"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); d = pixGetDepth(pixs); if (d != 8 && d != 32) return (PIX *)ERROR_PTR("pixs not 8 or 32 bpp", procName, NULL); if (sx < 4 || sy < 4) return (PIX *)ERROR_PTR("sx and sy must be >= 4", procName, NULL); if (mincount > sx * sy) { L_WARNING("mincount too large for tile size\n", procName); mincount = (sx * sy) / 3; } /* If pixim exists, verify that it is not all foreground. */ if (pixim) { pixInvert(pixim, pixim); pixZero(pixim, &allfg); pixInvert(pixim, pixim); if (allfg) return (PIX *)ERROR_PTR("pixim all foreground", procName, NULL); } pixd = NULL; if (d == 8) { pixm = NULL; pixGetBackgroundGrayMap(pixs, pixim, sx, sy, thresh, mincount, &pixm); if (!pixm) { L_WARNING("map not made; return a copy of the source\n", procName); return pixCopy(NULL, pixs); } pixmi = pixGetInvBackgroundMap(pixm, bgval, smoothx, smoothy); if (!pixmi) ERROR_PTR("pixmi not made", procName, NULL); else pixd = pixApplyInvBackgroundGrayMap(pixs, pixmi, sx, sy); pixDestroy(&pixm); pixDestroy(&pixmi); } else { pixmr = pixmg = pixmb = NULL; pixGetBackgroundRGBMap(pixs, pixim, pixg, sx, sy, thresh, mincount, &pixmr, &pixmg, &pixmb); if (!pixmr || !pixmg || !pixmb) { pixDestroy(&pixmr); pixDestroy(&pixmg); pixDestroy(&pixmb); L_WARNING("map not made; return a copy of the source\n", procName); return pixCopy(NULL, pixs); } pixmri = pixGetInvBackgroundMap(pixmr, bgval, smoothx, smoothy); pixmgi = pixGetInvBackgroundMap(pixmg, bgval, smoothx, smoothy); pixmbi = pixGetInvBackgroundMap(pixmb, bgval, smoothx, smoothy); if (!pixmri || !pixmgi || !pixmbi) ERROR_PTR("not all pixm*i are made", procName, NULL); else pixd = pixApplyInvBackgroundRGBMap(pixs, pixmri, pixmgi, pixmbi, sx, sy); pixDestroy(&pixmr); pixDestroy(&pixmg); pixDestroy(&pixmb); pixDestroy(&pixmri); pixDestroy(&pixmgi); pixDestroy(&pixmbi); } if (!pixd) ERROR_PTR("pixd not made", procName, NULL); pixCopyResolution(pixd, pixs); return pixd; }
3 pixGetBackgroundGrayMap
函数功能:局部背景测量
函数位置:adaptmap.c
/*------------------------------------------------------------------* * Measurement of local background * *------------------------------------------------------------------*/ /*! * pixGetBackgroundGrayMap() * * Input: pixs (8 bpp grayscale; not cmapped) * pixim (<optional> 1 bpp 'image' mask; can be null; it * should not have all foreground pixels) * sx, sy (tile size in pixels) * thresh (threshold for determining foreground) * mincount (min threshold on counts in a tile) * &pixd (<return> 8 bpp grayscale map) * Return: 0 if OK, 1 on error * * Notes: * (1) The background is measured in regions that don't have * images. It is then propagated into the image regions, * and finally smoothed in each image region. */ l_int32 pixGetBackgroundGrayMap(PIX *pixs, PIX *pixim, l_int32 sx, l_int32 sy, l_int32 thresh, l_int32 mincount, PIX **ppixd) { l_int32 w, h, wd, hd, wim, him, wpls, wplim, wpld, wplf; l_int32 xim, yim, delx, nx, ny, i, j, k, m; l_int32 count, sum, val8; l_int32 empty, fgpixels; l_uint32 *datas, *dataim, *datad, *dataf, *lines, *lineim, *lined, *linef; l_float32 scalex, scaley; PIX *pixd, *piximi, *pixb, *pixf, *pixims; PROCNAME("pixGetBackgroundGrayMap"); if (!ppixd) return ERROR_INT("&pixd not defined", procName, 1); *ppixd = NULL; if (!pixs || pixGetDepth(pixs) != 8) return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); if (pixGetColormap(pixs)) return ERROR_INT("pixs is colormapped", procName, 1); if (pixim && pixGetDepth(pixim) != 1) return ERROR_INT("pixim not 1 bpp", procName, 1); if (sx < 4 || sy < 4) return ERROR_INT("sx and sy must be >= 4", procName, 1); if (mincount > sx * sy) { L_WARNING("mincount too large for tile size\n", procName); mincount = (sx * sy) / 3; } /* Evaluate the 'image' mask, pixim, and make sure * it is not all fg. */ fgpixels = 0; /* boolean for existence of fg pixels in the image mask. */ if (pixim) { piximi = pixInvert(NULL, pixim); /* set non-'image' pixels to 1 */ pixZero(piximi, &empty); pixDestroy(&piximi); if (empty) return ERROR_INT("pixim all fg; no background", procName, 1); pixZero(pixim, &empty); if (!empty) /* there are fg pixels in pixim */ fgpixels = 1; } /* Generate the foreground mask, pixf, which is at * full resolution. These pixels will be ignored when * computing the background values. */ pixb = pixThresholdToBinary(pixs, thresh); pixf = pixMorphSequence(pixb, "d7.1 + d1.7", 0); pixDestroy(&pixb); /* ------------- Set up the output map pixd --------------- */ /* Generate pixd, which is reduced by the factors (sx, sy). */ w = pixGetWidth(pixs); h = pixGetHeight(pixs); wd = (w + sx - 1) / sx; hd = (h + sy - 1) / sy; pixd = pixCreate(wd, hd, 8); /* Note: we only compute map values in tiles that are complete. * In general, tiles at right and bottom edges will not be * complete, and we must fill them in later. */ nx = w / sx; ny = h / sy; wpls = pixGetWpl(pixs); datas = pixGetData(pixs); wpld = pixGetWpl(pixd); datad = pixGetData(pixd); wplf = pixGetWpl(pixf); dataf = pixGetData(pixf); for (i = 0; i < ny; i++) { lines = datas + sy * i * wpls; linef = dataf + sy * i * wplf; lined = datad + i * wpld; for (j = 0; j < nx; j++) { delx = j * sx; sum = 0; count = 0; for (k = 0; k < sy; k++) { for (m = 0; m < sx; m++) { if (GET_DATA_BIT(linef + k * wplf, delx + m) == 0) { sum += GET_DATA_BYTE(lines + k * wpls, delx + m); count++; } } } if (count >= mincount) { val8 = sum / count; SET_DATA_BYTE(lined, j, val8); } } } pixDestroy(&pixf); /* If there is an optional mask with fg pixels, erase the previous * calculation for the corresponding map pixels, setting the * map values to 0. Then, when all the map holes are filled, * these erased pixels will be set by the surrounding map values. * * The calculation here is relatively efficient: for each pixel * in pixd (which corresponds to a tile of mask pixels in pixim) * we look only at the pixel in pixim that is at the center * of the tile. If the mask pixel is ON, we reset the map * pixel in pixd to 0, so that it can later be filled in. */ pixims = NULL; if (pixim && fgpixels) { wim = pixGetWidth(pixim); him = pixGetHeight(pixim); dataim = pixGetData(pixim); wplim = pixGetWpl(pixim); for (i = 0; i < ny; i++) { yim = i * sy + sy / 2; if (yim >= him) break; lineim = dataim + yim * wplim; for (j = 0; j < nx; j++) { xim = j * sx + sx / 2; if (xim >= wim) break; if (GET_DATA_BIT(lineim, xim)) pixSetPixel(pixd, j, i, 0); } } } /* Fill all the holes in the map. */ if (pixFillMapHoles(pixd, nx, ny, L_FILL_BLACK)) { pixDestroy(&pixd); L_WARNING("can't make the map\n", procName); return 1; } /* Finally, for each connected region corresponding to the * 'image' mask, reset all pixels to their average value. * Each of these components represents an image (or part of one) * in the input, and this smooths the background values * in each of these regions. */ if (pixim && fgpixels) { scalex = 1. / (l_float32)sx; scaley = 1. / (l_float32)sy; pixims = pixScaleBySampling(pixim, scalex, scaley); pixSmoothConnectedRegions(pixd, pixims, 2); pixDestroy(&pixims); } *ppixd = pixd; pixCopyResolution(*ppixd, pixs); return 0; }
4 pixThresholdToBinary
函数功能:
位置:grayquant.c
/*------------------------------------------------------------------* * Simple (pixelwise) binarization with fixed threshold * *------------------------------------------------------------------*/ /*! * pixThresholdToBinary() * * Input: pixs (4 or 8 bpp) * threshold value * Return: pixd (1 bpp), or null on error * * Notes: * (1) If the source pixel is less than the threshold value, * the dest will be 1; otherwise, it will be 0 */ PIX * pixThresholdToBinary(PIX *pixs, l_int32 thresh) { l_int32 d, w, h, wplt, wpld; l_uint32 *datat, *datad; PIX *pixt, *pixd; PROCNAME("pixThresholdToBinary"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); pixGetDimensions(pixs, &w, &h, &d); if (d != 4 && d != 8) return (PIX *)ERROR_PTR("pixs must be 4 or 8 bpp", procName, NULL); if (thresh < 0) return (PIX *)ERROR_PTR("thresh must be non-negative", procName, NULL); if (d == 4 && thresh > 16) return (PIX *)ERROR_PTR("4 bpp thresh not in {0-16}", procName, NULL); if (d == 8 && thresh > 256) return (PIX *)ERROR_PTR("8 bpp thresh not in {0-256}", procName, NULL); if ((pixd = pixCreate(w, h, 1)) == NULL) return (PIX *)ERROR_PTR("pixd not made", procName, NULL); pixCopyResolution(pixd, pixs); datad = pixGetData(pixd); wpld = pixGetWpl(pixd); /* Remove colormap if it exists. If there is a colormap, * pixt will be 8 bpp regardless of the depth of pixs. */ pixt = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE); datat = pixGetData(pixt); wplt = pixGetWpl(pixt); if (pixGetColormap(pixs) && d == 4) { /* promoted to 8 bpp */ d = 8; thresh *= 16; } thresholdToBinaryLow(datad, w, h, wpld, datat, d, wplt, thresh); pixDestroy(&pixt); return pixd; }
5 pixMorphSequence
函数功能:
位置:morphseq.c
/*-------------------------------------------------------------------------* * Run a sequence of binary rasterop morphological operations * *-------------------------------------------------------------------------*/ /*! * pixMorphSequence() * * Input: pixs * sequence (string specifying sequence) * dispsep (controls debug display of each result in the sequence: * 0: no output * > 0: gives horizontal separation in pixels between * successive displays * < 0: pdf output; abs(dispsep) is used for naming) * Return: pixd, or null on error * * Notes: * (1) This does rasterop morphology on binary images. * (2) This runs a pipeline of operations; no branching is allowed. * (3) This only uses brick Sels, which are created on the fly. * In the future this will be generalized to extract Sels from * a Sela by name. * (4) A new image is always produced; the input image is not changed. * (5) This contains an interpreter, allowing sequences to be * generated and run. * (6) The format of the sequence string is defined below. * (7) In addition to morphological operations, rank order reduction * and replicated expansion allow operations to take place * downscaled by a power of 2. * (8) Intermediate results can optionally be displayed. * (9) Thanks to Dar-Shyang Lee, who had the idea for this and * built the first implementation. * (10) The sequence string is formatted as follows: * - An arbitrary number of operations, each separated * by a '+' character. White space is ignored. * - Each operation begins with a case-independent character * specifying the operation: * d or D (dilation) * e or E (erosion) * o or O (opening) * c or C (closing) * r or R (rank binary reduction) * x or X (replicative binary expansion) * b or B (add a border of 0 pixels of this size) * - The args to the morphological operations are bricks of hits, * and are formatted as a.b, where a and b are horizontal and * vertical dimensions, rsp. * - The args to the reduction are a sequence of up to 4 integers, * each from 1 to 4. * - The arg to the expansion is a power of two, in the set * {2, 4, 8, 16}. * (11) An example valid sequence is: * "b32 + o1.3 + C3.1 + r23 + e2.2 + D3.2 + X4" * In this example, the following operation sequence is carried out: * * b32: Add a 32 pixel border around the input image * * o1.3: Opening with vert sel of length 3 (e.g., 1 x 3) * * C3.1: Closing with horiz sel of length 3 (e.g., 3 x 1) * * r23: Two successive 2x2 reductions with rank 2 in the first * and rank 3 in the second. The result is a 4x reduced pix. * * e2.2: Erosion with a 2x2 sel (origin will be at x,y: 0,0) * * d3.2: Dilation with a 3x2 sel (origin will be at x,y: 1,0) * * X4: 4x replicative expansion, back to original resolution * (12) The safe closing is used. However, if you implement a * closing as separable dilations followed by separable erosions, * it will not be safe. For that situation, you need to add * a sufficiently large border as the first operation in * the sequence. This will be removed automatically at the * end. There are two cautions: * - When computing what is sufficient, remember that if * reductions are carried out, the border is also reduced. * - The border is removed at the end, so if a border is * added at the beginning, the result must be at the * same resolution as the input! */ PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep) { char *rawop, *op, *fname; char buf[256]; l_int32 nops, i, j, nred, fact, w, h, x, y, border, pdfout; l_int32 level[4]; PIX *pixt1, *pixt2; PIXA *pixa; SARRAY *sa; PROCNAME("pixMorphSequence"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (!sequence) return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); /* Split sequence into individual operations */ sa = sarrayCreate(0); sarraySplitString(sa, sequence, "+"); nops = sarrayGetCount(sa); pdfout = (dispsep < 0) ? 1 : 0; if (!morphSequenceVerify(sa)) { sarrayDestroy(&sa); return (PIX *)ERROR_PTR("sequence not valid", procName, NULL); } /* Parse and operate */ pixa = NULL; if (pdfout) { pixa = pixaCreate(0); pixaAddPix(pixa, pixs, L_CLONE); snprintf(buf, sizeof(buf), "/tmp/seq_output_%d.pdf", L_ABS(dispsep)); fname = genPathname(buf, NULL); } border = 0; pixt1 = pixCopy(NULL, pixs); pixt2 = NULL; x = y = 0; for (i = 0; i < nops; i++) { rawop = sarrayGetString(sa, i, 0); op = stringRemoveChars(rawop, " \n\t"); switch (op[0]) { case 'd': case 'D': sscanf(&op[1], "%d.%d", &w, &h); pixt2 = pixDilateBrick(NULL, pixt1, w, h); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'e': case 'E': sscanf(&op[1], "%d.%d", &w, &h); pixt2 = pixErodeBrick(NULL, pixt1, w, h); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'o': case 'O': sscanf(&op[1], "%d.%d", &w, &h); pixOpenBrick(pixt1, pixt1, w, h); break; case 'c': case 'C': sscanf(&op[1], "%d.%d", &w, &h); pixCloseSafeBrick(pixt1, pixt1, w, h); break; case 'r': case 'R': nred = strlen(op) - 1; for (j = 0; j < nred; j++) level[j] = op[j + 1] - '0'; for (j = nred; j < 4; j++) level[j] = 0; pixt2 = pixReduceRankBinaryCascade(pixt1, level[0], level[1], level[2], level[3]); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'x': case 'X': sscanf(&op[1], "%d", &fact); pixt2 = pixExpandReplicate(pixt1, fact); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'b': case 'B': sscanf(&op[1], "%d", &border); pixt2 = pixAddBorder(pixt1, border, 0); pixSwapAndDestroy(&pixt1, &pixt2); break; default: /* All invalid ops are caught in the first pass */ break; } FREE(op); /* Debug output */ if (dispsep > 0) { pixDisplay(pixt1, x, y); x += dispsep; } if (pdfout) pixaAddPix(pixa, pixt1, L_COPY); } if (border > 0) { pixt2 = pixRemoveBorder(pixt1, border); pixSwapAndDestroy(&pixt1, &pixt2); } if (pdfout) { pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); FREE(fname); pixaDestroy(&pixa); } sarrayDestroy(&sa); return pixt1; }