From e36adfbc096e8360e7fa3b60ad97e729e7bddada Mon Sep 17 00:00:00 2001 From: heckflosse Date: Fri, 12 Apr 2019 15:18:10 +0200 Subject: [PATCH] locallab: speedup for blur, also some cleanups --- rtengine/iplocallab.cc | 262 ++++++++++++----------------------------- 1 file changed, 77 insertions(+), 185 deletions(-) diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index 163bb3a03..3dac11cb7 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -1241,18 +1241,18 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in // BENCHFUN const float ach = (float)lp.trans / 100.f; + const float factnoise1 = 1.f + (lp.noisecf) / 500.f; + const float factnoise2 = 1.f + (lp.noisecc) / 500.f; + const float factnoise = factnoise1 * factnoise2; - float factnoise1 = 1.f + (lp.noisecf) / 500.f; - float factnoise2 = 1.f + (lp.noisecc) / 500.f; + const int GW = transformed->W; + const int GH = transformed->H; + const float refa = chromaref * cos(hueref); + const float refb = chromaref * sin(hueref); - int GW = transformed->W; - int GH = transformed->H; - float refa = chromaref * cos(hueref); - float refb = chromaref * sin(hueref); + std::unique_ptr origblur(new LabImage(GW, GH)); - LabImage *origblur = new LabImage(GW, GH); - - float radius = 3.f / sk; + const float radius = 3.f / sk; #ifdef _OPENMP #pragma omp parallel #endif @@ -1260,9 +1260,11 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in gaussianBlur(original->L, origblur->L, GW, GH, radius); gaussianBlur(original->a, origblur->a, GW, GH, radius); gaussianBlur(original->b, origblur->b, GW, GH, radius); - } + const int begx = int (lp.xc - lp.lxL); + const int begy = int (lp.yc - lp.lyT); + #ifdef _OPENMP #pragma omp parallel if (multiThread) #endif @@ -1283,19 +1285,11 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing if (isZone0) { // outside selection and outside transition zone => no effect, keep original values - for (int x = 0; x < transformed->W; x++) { - transformed->L[y][x] = original->L[y][x]; - transformed->a[y][x] = original->a[y][x]; - transformed->b[y][x] = original->b[y][x]; - } - continue; } for (int x = 0, lox = cx + x; x < transformed->W; x++, lox++) { int zone = 0; - int begx = int (lp.xc - lp.lxL); - int begy = int (lp.yc - lp.lyT); float localFactor = 1.f; @@ -1305,11 +1299,7 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in calcTransitionrect(lox, loy, ach, lp, zone, localFactor); } - if (zone == 0) { // outside selection and outside transition zone => no effect, keep original values - transformed->L[y][x] = original->L[y][x]; - transformed->a[y][x] = original->a[y][x]; - transformed->b[y][x] = original->b[y][x]; continue; } @@ -1332,15 +1322,7 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in } switch (zone) { - case 0: { // outside selection and outside transition zone => no effect, keep original values - transformed->L[y][x] = original->L[y][x]; - transformed->a[y][x] = original->a[y][x]; - transformed->b[y][x] = original->b[y][x]; - break; - } - case 1: { // inside transition zone - float factorx = localFactor; float difL, difa, difb; if (call == 2 /*|| call == 1 || call == 3 */) { //simpleprocess @@ -1354,12 +1336,12 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in } - difL *= factorx * reducdEL; - difa *= factorx * reducdEa; - difb *= factorx * reducdEb; + difL *= localFactor * reducdEL; + difa *= localFactor * reducdEa; + difb *= localFactor * reducdEb; transformed->L[y][x] = CLIP(original->L[y][x] + difL); - transformed->a[y][x] = CLIPC((original->a[y][x] + difa) * factnoise1 * factnoise2); - transformed->b[y][x] = CLIPC((original->b[y][x] + difb) * factnoise1 * factnoise2) ; + transformed->a[y][x] = CLIPC((original->a[y][x] + difa) * factnoise); + transformed->b[y][x] = CLIPC((original->b[y][x] + difb) * factnoise) ; break; } @@ -1382,23 +1364,26 @@ void ImProcFunctions::DeNoise_Local(int call, const struct local_params& lp, in difb *= reducdEb; transformed->L[y][x] = CLIP(original->L[y][x] + difL); - transformed->a[y][x] = CLIPC((original->a[y][x] + difa) * factnoise1 * factnoise2); - transformed->b[y][x] = CLIPC((original->b[y][x] + difb) * factnoise1 * factnoise2); + transformed->a[y][x] = CLIPC((original->a[y][x] + difa) * factnoise); + transformed->b[y][x] = CLIPC((original->b[y][x] + difb) * factnoise); } } } } } - delete origblur; - } -void ImProcFunctions::BlurNoise_Local(LabImage * tmp1, const float hueref, const float chromaref, const float lumaref, const local_params & lp, LabImage * original, LabImage * transformed, int cx, int cy, int sk) +void ImProcFunctions::BlurNoise_Local(LabImage *tmp1, const float hueref, const float chromaref, const float lumaref, const local_params & lp, LabImage * original, LabImage * transformed, int cx, int cy, int sk) { //local BLUR BENCHFUN + const int ystart = std::max(static_cast(lp.yc - lp.lyT) - cy, 0); + const int yend = std::min(static_cast(lp.yc + lp.ly) - cy, original->H); + const int xstart = std::max(static_cast(lp.xc - lp.lxL) - cx, 0); + const int xend = std::min(static_cast(lp.xc + lp.lx) - cx, original->W); + const float ach = lp.trans / 100.f; const int GW = transformed->W; const int GH = transformed->H; @@ -1430,8 +1415,6 @@ void ImProcFunctions::BlurNoise_Local(LabImage * tmp1, const float hueref, const #endif { const int limscope = 80; - const int begy = int (lp.yc - lp.lyT); - const int begx = int (lp.xc - lp.lxL); const float mindE = 4.f + MINSCOPE * lp.sensbn * lp.thr;//best usage ?? with blurnoise const float maxdE = 5.f + MAXSCOPE * lp.sensbn * (1 + 0.1f * lp.thr); const float mindElim = 2.f + MINSCOPE * limscope * lp.thr; @@ -1441,16 +1424,9 @@ void ImProcFunctions::BlurNoise_Local(LabImage * tmp1, const float hueref, const #pragma omp for schedule(dynamic,16) #endif - for (int y = 0; y < transformed->H; y++) { + for (int y = ystart; y < yend; y++) { const int loy = cy + y; - - const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing - - if (isZone0) { // outside selection and outside transition zone => no effect, keep original values - continue; - } - - for (int x = 0, lox = cx + x; x < transformed->W; x++, lox++) { + for (int x = xstart, lox = cx + x; x < xend; x++, lox++) { int zone = 0; float localFactor = 1.f; @@ -1460,7 +1436,6 @@ void ImProcFunctions::BlurNoise_Local(LabImage * tmp1, const float hueref, const calcTransitionrect(lox, loy, ach, lp, zone, localFactor); } - if (zone == 0) { // outside selection and outside transition zone => no effect, keep original values continue; } @@ -1470,55 +1445,20 @@ void ImProcFunctions::BlurNoise_Local(LabImage * tmp1, const float hueref, const float reducdE; calcreducdE(dE, maxdE, mindE, maxdElim, mindElim, lp.iterat, limscope, lp.sensbn , reducdE); - switch (zone) { + const float difL = (tmp1->L[y - ystart][x - xstart] - original->L[y][x]) * localFactor * reducdE; + transformed->L[y][x] = CLIP(original->L[y][x] + difL); - case 1: { // inside transition zone - float difL, difa, difb; - difL = tmp1->L[loy - begy][lox - begx] - original->L[y][x]; - difa = tmp1->a[loy - begy][lox - begx] - original->a[y][x]; - difb = tmp1->b[loy - begy][lox - begx] - original->b[y][x]; - difL *= localFactor * reducdE; - - transformed->L[y][x] = CLIP(original->L[y][x] + difL); - - if (!lp.actsp) { - difa *= localFactor * reducdE; - difb *= localFactor * reducdE; - - transformed->a[y][x] = CLIPC(original->a[y][x] + difa); - transformed->b[y][x] = CLIPC(original->b[y][x] + difb); - } - - break; - } - - case 2: { // inside selection => full effect, no transition - float difL, difa, difb; - - difL = tmp1->L[loy - begy][lox - begx] - original->L[y][x]; - difa = tmp1->a[loy - begy][lox - begx] - original->a[y][x]; - difb = tmp1->b[loy - begy][lox - begx] - original->b[y][x]; - - difL *= reducdE;//(100.f + realstrdE) / 100.f; - - transformed->L[y][x] = CLIP(original->L[y][x] + difL); - - if (!lp.actsp) { - difa *= reducdE; - difb *= reducdE; - - transformed->a[y][x] = CLIPC(original->a[y][x] + difa); ; - transformed->b[y][x] = CLIPC(original->b[y][x] + difb); - } - } + if (!lp.actsp) { + const float difa = (tmp1->a[y - ystart][x - xstart] - original->a[y][x]) * localFactor * reducdE;; + const float difb = (tmp1->b[y - ystart][x - xstart] - original->b[y][x]) * localFactor * reducdE;; + transformed->a[y][x] = CLIPC(original->a[y][x] + difa); + transformed->b[y][x] = CLIPC(original->b[y][x] + difb); } } } } } - - void ImProcFunctions::InverseReti_Local(const struct local_params & lp, const float hueref, const float chromaref, const float lumaref, LabImage * original, LabImage * transformed, const LabImage * const tmp1, int cx, int cy, int chro, int sk) { // BENCHFUN @@ -3910,40 +3850,29 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o //Blur and noise if (((radius >= 1.5 * GAUSS_SKIP && lp.rad > 1.) || lp.stren > 0.1) && lp.blurena) { // radius < GAUSS_SKIP means no gauss, just copy of original image + StopWatch Stop1("blur"); std::unique_ptr tmp1; - std::unique_ptr bufgb; - // LabImage *deltasobelL = nullptr; - int GW = transformed->W; - int GH = transformed->H; - int bfh = int (lp.ly + lp.lyT) + del; //bfw bfh real size of square zone - int bfw = int (lp.lx + lp.lxL) + del; - if (call <= 3 && lp.blurmet != 1) { - bufgb.reset(new LabImage(bfw, bfh, true)); - - int begy = lp.yc - lp.lyT; - int begx = lp.xc - lp.lxL; - int yEn = lp.yc + lp.ly; - int xEn = lp.xc + lp.lx; + if (call <= 3 && lp.blurmet == 0) { + const int ystart = std::max(static_cast(lp.yc - lp.lyT) - cy, 0); + const int yend = std::min(static_cast(lp.yc + lp.ly) - cy, original->H); + const int xstart = std::max(static_cast(lp.xc - lp.lxL) - cx, 0); + const int xend = std::min(static_cast(lp.xc + lp.lx) - cx, original->W); + const int bfh = yend - ystart; + const int bfw = xend - xstart; + tmp1.reset(new LabImage(bfw, bfh)); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif - for (int y = 0; y < transformed->H ; y++) { - const int loy = cy + y; - if (loy >= begy && loy < yEn) { - for (int x = 0; x < transformed->W; x++) { - const int lox = cx + x; - if (lox >= begx && lox < xEn) { - bufgb->L[loy - begy][lox - begx] = original->L[y][x]; - bufgb->a[loy - begy][lox - begx] = original->a[y][x]; - bufgb->b[loy - begy][lox - begx] = original->b[y][x]; - } - } + for (int y = ystart; y < yend ; y++) { + for (int x = xstart; x < xend; x++) { + tmp1->L[y - ystart][x - xstart] = original->L[y][x]; + tmp1->a[y - ystart][x - xstart] = original->a[y][x]; + tmp1->b[y - ystart][x - xstart] = original->b[y][x]; } } - tmp1.reset(new LabImage(bfw, bfh)); #ifdef _OPENMP @@ -3951,13 +3880,15 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o #endif { - gaussianBlur(bufgb->L, tmp1->L, bfw, bfh, radius); - gaussianBlur(bufgb->a, tmp1->a, bfw, bfh, radius); - gaussianBlur(bufgb->b, tmp1->b, bfw, bfh, radius); + gaussianBlur(tmp1->L, tmp1->L, bfw, bfh, radius); + gaussianBlur(tmp1->a, tmp1->a, bfw, bfh, radius); + gaussianBlur(tmp1->b, tmp1->b, bfw, bfh, radius); } } else { + const int GW = transformed->W; + const int GH = transformed->H; tmp1.reset(new LabImage(transformed->W, transformed->H)); #ifdef _OPENMP @@ -3972,11 +3903,9 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o } if (lp.stren > 0.1f) { - if (lp.blurmet <= 1) { - float mean = 0.f;//0 best result - float variance = lp.stren ; //(double) SQR(lp.stren)/sk; - addGaNoise(tmp1.get(), tmp1.get(), mean, variance, sk) ; - } + float mean = 0.f;//0 best result + float variance = lp.stren ; //(double) SQR(lp.stren)/sk; + addGaNoise(tmp1.get(), tmp1.get(), mean, variance, sk) ; } if (lp.blurmet == 0) { //blur and noise (center) @@ -3989,81 +3918,44 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o //local impulse if ((lp.bilat > 0.f) && lp.denoiena) { - int bfh = int (lp.ly + lp.lyT) + del; //bfw bfh real size of square zone - int bfw = int (lp.lx + lp.lxL) + del; + const int bfh = int (lp.ly + lp.lyT) + del; //bfw bfh real size of square zone + const int bfw = int (lp.lx + lp.lxL) + del; - LabImage *bufwv = nullptr; + std::unique_ptr bufwv; if (call == 2) {//simpleprocess - bufwv = new LabImage(bfw, bfh); //buffer for data in zone limit + bufwv.reset(new LabImage(bfw, bfh)); //buffer for data in zone limit - - int begy = lp.yc - lp.lyT; - int begx = lp.xc - lp.lxL; - int yEn = lp.yc + lp.ly; - int xEn = lp.xc + lp.lx; + const int begy = lp.yc - lp.lyT; + const int begx = lp.xc - lp.lxL; + const int yEn = lp.yc + lp.ly; + const int xEn = lp.xc + lp.lx; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif - for (int y = 0; y < transformed->H ; y++) //{ - for (int x = 0; x < transformed->W; x++) { - int lox = cx + x; - int loy = cy + y; - - if (lox >= begx && lox < xEn && loy >= begy && loy < yEn) { - bufwv->L[loy - begy][lox - begx] = original->L[y][x]; - bufwv->a[loy - begy][lox - begx] = original->a[y][x]; - bufwv->b[loy - begy][lox - begx] = original->b[y][x]; - } - + for (int y = rtengine::max(0, begy - cy); y < rtengine::min(transformed->H, yEn - cy); y++) { + const int loy = cy + y; + for (int x = rtengine::max(0, begx - cx); x < rtengine::min(transformed->W, xEn - cx); x++) { + const int lox = cx + x; + bufwv->L[loy - begy][lox - begx] = original->L[y][x]; + bufwv->a[loy - begy][lox - begx] = original->a[y][x]; + bufwv->b[loy - begy][lox - begx] = original->b[y][x]; } + } } else {//dcrop.cc - - int GH = transformed->H; - int GW = transformed->W; - - bufwv = new LabImage(GW, GH); -#ifdef _OPENMP - #pragma omp parallel for schedule(dynamic,16) -#endif - - for (int ir = 0; ir < GH; ir++) - for (int jr = 0; jr < GW; jr++) { - bufwv->L[ir][jr] = original->L[ir][jr]; - bufwv->a[ir][jr] = original->a[ir][jr]; - bufwv->b[ir][jr] = original->b[ir][jr]; - } - - + bufwv.reset(new LabImage(transformed->W, transformed->H)); + bufwv->CopyFrom(original); } //end dcrop - double thr = (float) lp.bilat / 20.0; + const double threshold = lp.bilat / 20.0; if (bfh > 8 && bfw > 8) { - ImProcFunctions::impulse_nr(bufwv, thr); + ImProcFunctions::impulse_nr(bufwv.get(), threshold); } - LabImage tmp1(bufwv->W, bufwv->H); - //copy bufwv to tmp1 to use same algo for Denoise_local and DeNoise_Local_imp - -#ifdef _OPENMP - #pragma omp parallel for schedule(dynamic,16) -#endif - - for (int ir = 0; ir < bufwv->H; ir++) - for (int jr = 0; jr < bufwv->W; jr++) { - tmp1.L[ir][jr] = bufwv->L[ir][jr]; - tmp1.a[ir][jr] = bufwv->a[ir][jr]; - tmp1.b[ir][jr] = bufwv->b[ir][jr]; - } - - - - DeNoise_Local(call, lp, levred, huerefblur, lumarefblur, chromarefblur, original, transformed, tmp1, cx, cy, sk); - - delete bufwv; + DeNoise_Local(call, lp, levred, huerefblur, lumarefblur, chromarefblur, original, transformed, *(bufwv.get()), cx, cy, sk); } //local denoise