Questo è il mio codice di test per l'implementazione di un semplice algoritmo testBM, senza prefiltraggio. Ma ci vogliono circa 400 ms o anche più quando le dimensioni della finestra sono maggiori mentre lo StereoBM di opencv (CPU non GPU) richiede 20 ms. Ho controllato la fonte di StereoBM ma è difficile per me capirlo. C'è qualcuno che sa perché?Perché il mio codice è molto più lento di opencv per un semplice algoritmo StereoBM?
E di seguito è il mio codice.
void testBM(const Mat &left0,
const Mat &right0,
Mat &disparity,
int SAD,
int searchRange)
{
int cols = left0.cols;
int rows = left0.rows;
int total = cols*rows;
const uchar* data_left = left0.ptr<uchar>(0);
const uchar* data_right = right0.ptr<uchar>(0);
uchar* data_dm = new uchar[total];
int dbNum = 2 * SAD + 1;
int dNum = dbNum * dbNum;
//x is col index in the dbNum * dbNum window
//y is row index in this window
//z is (x + y * cols).
//I compute them in advance for avoid computing repeatedly.
Point3i *dLocDif = new Point3i[dNum];
for (int i = 0; i < dNum; i++)
{
dLocDif[i] = Point3i(i%dbNum - SAD, i/dbNum - SAD, 0);
dLocDif[i].z = dLocDif[i].x + dLocDif[i].y * cols;
}
//I compute disparity difference for eache search range to avoid
//computing repeatedly.
uchar* dif_ = new uchar[total*searchRange];
for (int _search = 0; _search < searchRange; _search++)
{
int th = _search * total;
for (int i = 0; i < total; i++)
{
int c = i % cols - _search;
if (c < 0) continue;
dif_[i+th] = (uchar)std::abs(data_left[i] - data_right[i-_search]);
}
}
for (int p = 0; p < total; p++)
{
int min = 50 * dNum;
int dm = -256;
int _col = p % cols;
int _row = p/cols;
int th = 0;
//I search for the smallest difference between left and right image
// using def_.
for (int _search = 0; _search < searchRange; _search++, th += total)
{
if (_col + _search > cols) break;
int temp = 0;
for (int i = 0; i < dNum; i++)
{
int _c = _col + dLocDif[i].x;
if (_c >= cols || _c < 0) continue;
int _r = _row + dLocDif[i].y;
if (_r >= rows || _r < 0) continue;
temp += dif_[th + p + dLocDif[i].z];
if (temp > min)
{
break;
}
}
if (temp < min)
{
dm = _search;
min = temp;
}
}
data_dm[p] = dm;
}
disparity = Mat(rows, cols, CV_8UC1, data_dm);
}
Ecco il codice sorgente essenziale di StereoBM in opencv. Sono un po 'confuso dopo l'inizializzazione. Qualcuno potrebbe spiegarci brevemente?
static void
findStereoCorrespondenceBM(const Mat& left, const Mat& right,
Mat& disp, Mat& cost, const CvStereoBMState& state,
uchar* buf, int _dy0, int _dy1)
{
const int ALIGN = 16;
int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
int ndisp = state.numberOfDisparities;
int mindisp = state.minDisparity;
int lofs = MAX(ndisp - 1 + mindisp, 0);
int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio;
short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT);
int *sad, *hsad0, *hsad, *hsad_sub, *htext;
uchar *cbuf0, *cbuf;
const uchar* lptr0 = left.data + lofs;
const uchar* rptr0 = right.data + rofs;
const uchar *lptr, *lptr_sub, *rptr;
short* dptr = (short*)disp.data;
int sstep = (int)left.step;
int dstep = (int)(disp.step/sizeof(dptr[0]));
int cstep = (height+dy0+dy1)*ndisp;
int costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256;
uchar tab[TABSZ];
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
for(x = 0; x < TABSZ; x++)
tab[x] = (uchar)std::abs(x - ftzero);
// initialize buffers
memset(hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]));
memset(htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]));
for(x = -wsz2-1; x < wsz2; x++)
{
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-1) - dy0*sstep;
for(y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep)
{
int lval = lptr[0];
for(d = 0; d < ndisp; d++)
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = (int)(hsad[d] + diff);
}
htext[y] += tab[lval];
}
}
// initialize the left and right borders of the disparity map
for(y = 0; y < height; y++)
{
for(x = 0; x < lofs; x++)
dptr[y*dstep + x] = FILTERED;
for(x = lofs + width1; x < width; x++)
dptr[y*dstep + x] = FILTERED;
}
dptr += lofs;
for(x = 0; x < width1; x++, dptr++)
{
int* costptr = cost.data ? (int*)cost.data + lofs + x : &costbuf;
int x0 = x - wsz2 - 1, x1 = x + wsz2;
const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
hsad = hsad0 - dy0*ndisp;
lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
rptr = rptr0 + MIN(MAX(x1, -rofs), width-1-rofs) - dy0*sstep;
for(y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep)
{
int lval = lptr[0];
for(d = 0; d < ndisp; d++)
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = hsad[d] + diff - cbuf_sub[d];
}
htext[y] += tab[lval] - tab[lptr_sub[0]];
}
// fill borders
for(y = dy1; y <= wsz2; y++)
htext[height+y] = htext[height+dy1-1];
for(y = -wsz2-1; y < -dy0; y++)
htext[y] = htext[-dy0];
// initialize sums
for(d = 0; d < ndisp; d++)
sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));
hsad = hsad0 + (1 - dy0)*ndisp;
for(y = 1 - dy0; y < wsz2; y++, hsad += ndisp)
for(d = 0; d < ndisp; d++)
sad[d] = (int)(sad[d] + hsad[d]);
int tsum = 0;
for(y = -wsz2-1; y < wsz2; y++)
tsum += htext[y];
// finally, start the real processing
for(y = 0; y < height; y++)
{
int minsad = INT_MAX, mind = -1;
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
for(d = 0; d < ndisp; d++)
{
int currsad = sad[d] + hsad[d] - hsad_sub[d];
sad[d] = currsad;
if(currsad < minsad)
{
minsad = currsad;
mind = d;
}
}
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
if(tsum < textureThreshold)
{
dptr[y*dstep] = FILTERED;
continue;
}
if(uniquenessRatio > 0)
{
int thresh = minsad + (minsad * uniquenessRatio/100);
for(d = 0; d < ndisp; d++)
{
if(sad[d] <= thresh && (d < mind-1 || d > mind+1))
break;
}
if(d < ndisp)
{
dptr[y*dstep] = FILTERED;
continue;
}
}
{
sad[-1] = sad[1];
sad[ndisp] = sad[ndisp-2];
int p = sad[mind+1], n = sad[mind-1];
d = p + n - 2*sad[mind] + std::abs(p - n);
dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4);
costptr[y*coststep] = sad[mind];
}
}
}
}
Quindi vuol utilizzare la maggior parte del tempo nel ciclo 'p' o nella chiamata al Mat (...)? – Surt
nel ciclo 'p'. – Seven
Ho controllato il dispendio di tempo di ogni passaggio. Prima del ciclo 'p', ci vogliono circa 70 ms e il ciclo 'p' usa 320 ms. – Seven