SledgeHammer didn't help, so I had to do it myself... here it is again - a super fast improved algorithm compared to the previous one and with no setpixel and getpixel :)

void AlphaBlend(CDC* pDC, int xDest, int yDest, int nDestWidth, int nDestHeight,
CDC* pSrcDC, int xSrc, int ySrc, BYTE uAlphaValue)
{
ASSERT(pDC);
BOOL bProceed = TRUE;
CDC memDC;
if(!memDC.CreateCompatibleDC(pDC))
{
bProceed = FALSE;
}
CBitmap memBmp;
if(bProceed)
{
if(!memBmp.CreateCompatibleBitmap(pDC, nDestWidth, nDestHeight))
{
bProceed = FALSE;
}
}
if(bProceed)
{
CBitmap* pOldBmp = memDC.SelectObject(&memBmp);
memDC.BitBlt(0, 0, nDestWidth, nDestHeight, pDC, 0, 0, SRCCOPY);
int nColorSize = sizeof(COLORREF);
DWORD dwSize = nDestWidth * nDestHeight * nColorSize;
BYTE* pBitmapBits = new BYTE [dwSize];
if(!pBitmapBits ||
memBmp.GetBitmapBits(dwSize, pBitmapBits) <= 0)
{
memDC.SelectObject(pOldBmp);
if(pBitmapBits)
{
delete[] pBitmapBits;
}
return;
}
COLORREF* pBits = (COLORREF*)pBitmapBits;
CBitmap tempBmp;
tempBmp.CreateCompatibleBitmap(pDC, nDestWidth, nDestHeight);
CBitmap* pSrcBitmap = pSrcDC->SelectObject(&tempBmp);
BYTE* pSrcBitmapBits = new BYTE [dwSize];
if(!pSrcBitmapBits
|| pSrcBitmap->GetBitmapBits(dwSize, pSrcBitmapBits) <= 0)
{
pSrcDC->SelectObject(pSrcBitmap);
if(pSrcBitmapBits)
{
delete[] pSrcBitmapBits;
}
return;
}
COLORREF* pSrcBits = (COLORREF*)pSrcBitmapBits;
pSrcDC->SelectObject(pSrcBitmap);
CDC* pDestDC = &memDC;
ASSERT(pDestDC);
ASSERT(pSrcDC);
BYTE r1, r2, rDest;
BYTE g1, g2, gDest;
BYTE b1, b2, bDest;
BYTE av = uAlphaValue;
BYTE rem = 255 - av;
COLORREF clrPixelDest;
COLORREF clrPixelSrc;
for(int dy = yDest, sy = ySrc; dy < nDestHeight; dy++, sy++)
{
for(int dx = xDest, sx = xSrc; dx < nDestWidth; dx++, sx++)
{
clrPixelDest = pBits[dy * nDestWidth + dx];
b1 = GetRValue(clrPixelDest);
g1 = GetGValue(clrPixelDest);
r1 = GetBValue(clrPixelDest);
clrPixelSrc = pSrcBits[sy * nDestWidth + sx];
b2 = GetRValue(clrPixelSrc);
g2 = GetGValue(clrPixelSrc);
r2 = GetBValue(clrPixelSrc);
rDest = (r1*rem + r2*av) / 255;
gDest = (g1*rem + g2*av) / 255;
bDest = (b1*rem + b2*av) / 255;
pBits[dy * nDestWidth + dx] = RGB(bDest, gDest, rDest);
}
}
memBmp.SetBitmapBits(dwSize, pBits);
pDC->BitBlt(0, 0, nDestWidth, nDestHeight, &memDC, 0, 0, SRCCOPY);
memDC.SelectObject(pOldBmp);
delete[] pBitmapBits;
delete[] pSrcBitmapBits;
}