FFT Convolution - 3x3 kernel

问题

I have written some routines to sharpen a Grayscale image using a 3x3 kernel,

-1 -1 -1 
-1  9 -1 
-1 -1 -1

The following code is working well in case of non-FFT (spatial-domain) convolution, but, not working in FFT-based (frequency-domain) convolution.

The output image seems to be blurred.

I have several problems:

(1) This routine is not being able to generate desired result. It also freezes the application.

    public static Bitmap ApplyWithPadding(Bitmap image, Bitmap mask)
    {
        if(image.PixelFormat == PixelFormat.Format8bppIndexed)
        {
            Bitmap imageClone = (Bitmap)image.Clone();
            Bitmap maskClone = (Bitmap)mask.Clone();

            /////////////////////////////////////////////////////////////////
            Complex[,] cPaddedLena = ImageDataConverter.ToComplex(imageClone);
            Complex[,] cPaddedMask = ImageDataConverter.ToComplex(maskClone);

            Complex[,] cConvolved = Convolution.Convolve(cPaddedLena, cPaddedMask);

            return ImageDataConverter.ToBitmap(cConvolved);
        }
        else
        {
            throw new Exception("not a grascale");
        }
    }

(2) This routine gives good result. But, as slow as hell.

    public static Bitmap Apply(Bitmap sourceBitmap)
    {
        Sharpen filter = new Sharpen();

        BitmapData sourceData = sourceBitmap.LockBits(new Rectangle(0, 0,
                                 sourceBitmap.Width, sourceBitmap.Height),
                                 ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);

        byte[] pixelBuffer = new byte[sourceData.Stride * sourceData.Height];
        byte[] resultBuffer = new byte[sourceData.Stride * sourceData.Height];

        Marshal.Copy(sourceData.Scan0, pixelBuffer, 0, pixelBuffer.Length);

        sourceBitmap.UnlockBits(sourceData);

        double blue = 0.0;
        double green = 0.0;
        double red = 0.0;

        int filterWidth = filter.FilterMatrix.GetLength(1);
        int filterHeight = filter.FilterMatrix.GetLength(0);

        int filterOffset = (filterWidth - 1) / 2;
        int calcOffset = 0;

        int byteOffset = 0;

        for (int offsetY = filterOffset; offsetY < sourceBitmap.Height - filterOffset; offsetY++)
        {
            for (int offsetX = filterOffset; offsetX <
                sourceBitmap.Width - filterOffset; offsetX++)
            {
                blue = 0;
                green = 0;
                red = 0;

                byteOffset = offsetY *
                             sourceData.Stride +
                             offsetX * 4;

                for (int filterY = -filterOffset;
                    filterY <= filterOffset; filterY++)
                {
                    for (int filterX = -filterOffset;
                        filterX <= filterOffset; filterX++)
                    {

                        calcOffset = byteOffset +
                                     (filterX * 4) +
                                     (filterY * sourceData.Stride);

                        blue += (double)(pixelBuffer[calcOffset]) *
                                filter.FilterMatrix[filterY + filterOffset,
                                                    filterX + filterOffset];

                        green += (double)(pixelBuffer[calcOffset + 1]) *
                                 filter.FilterMatrix[filterY + filterOffset,
                                                    filterX + filterOffset];

                        red += (double)(pixelBuffer[calcOffset + 2]) *
                               filter.FilterMatrix[filterY + filterOffset,
                                                  filterX + filterOffset];
                    }
                }

                blue = filter.Factor * blue + filter.Bias;
                green = filter.Factor * green + filter.Bias;
                red = filter.Factor * red + filter.Bias;

                if (blue > 255)
                { blue = 255; }
                else if (blue < 0)
                { blue = 0; }

                if (green > 255)
                { green = 255; }
                else if (green < 0)
                { green = 0; }

                if (red > 255)
                { red = 255; }
                else if (red < 0)
                { red = 0; }

                resultBuffer[byteOffset] = (byte)(blue);
                resultBuffer[byteOffset + 1] = (byte)(green);
                resultBuffer[byteOffset + 2] = (byte)(red);
                resultBuffer[byteOffset + 3] = 255;
            }
        }

        Bitmap resultBitmap = new Bitmap(sourceBitmap.Width, sourceBitmap.Height);

        BitmapData resultData = resultBitmap.LockBits(new Rectangle(0, 0,
                                 resultBitmap.Width, resultBitmap.Height),
                                 ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb);

        Marshal.Copy(resultBuffer, 0, resultData.Scan0, resultBuffer.Length);
        resultBitmap.UnlockBits(resultData);

        return resultBitmap;
    }

(3) The following is my GUI code. SharpenFilter.ApplyWithPadding() works properly if I use an image as a mask. But, doesn't work if I use a, say, 3x3 kernel.

    string path = @"E:\lena.png";
    string path2 = @"E:\mask.png";

    Bitmap _inputImage;
    Bitmap _maskImage;

    private void LoadImages_Click(object sender, EventArgs e)
    {
        _inputImage = Grayscale.ToGrayscale(Bitmap.FromFile(path) as Bitmap);

        /*
        _maskImage = Grayscale.ToGrayscale(Bitmap.FromFile(path2) as Bitmap);
        */

        SharpenFilter filter = new SharpenFilter();
        double[,] mask = new double[,]  { { -1, -1, -1, }, 
                                        { -1,  9, -1, }, 
                                        { -1, -1, -1, }, };
        _maskImage = ImageDataConverter.ToBitmap(mask);

        inputImagePictureBox.Image = _inputImage;
        maskPictureBox.Image = _maskImage;
    }

    Bitmap _paddedImage;
    Bitmap _paddedMask;
    private void padButton_Click(object sender, EventArgs e)
    {
        Bitmap lena = Grayscale.ToGrayscale(_inputImage);
        Bitmap mask = Grayscale.ToGrayscale(_maskImage);

        ////Not working...
        //int maxWidth = (int)Math.Max(lena.Width, mask.Width);
        //int maxHeight = (int)Math.Max(lena.Height, mask.Height);

        ////This is working correctly in case if I use a png image as a mask.
        int maxWidth = (int)Tools.ToNextPow2(Convert.ToUInt32(lena.Width + mask.Width));
        int maxHeight = (int)Tools.ToNextPow2(Convert.ToUInt32(lena.Height + mask.Height));

        _paddedImage = ImagePadder.Pad(lena, maxWidth, maxHeight);
        _paddedMask = ImagePadder.Pad(mask, maxWidth, maxHeight);

        paddedImagePictureBox.Image = _paddedImage;
        paddedMaskPictureBox.Image = _paddedMask;
    }

    private void filterButton_Click(object sender, EventArgs e)
    {
        // Not working properly.
        // Freezes the application. 
        Bitmap sharp = SharpenFilter.ApplyWithPadding(_paddedImage, _paddedMask);

        ////Works well. But, very slow.
        //Bitmap sharp = SharpenFilter.Apply(_paddedImage);

        filteredPictureBox.Image = sharp as Bitmap;
    }

Output:

Source Code :

You can download the entire solution from here in this link.

回答1:

The main issue appears to be with the interpretation of the kernel as an image consisting of unsigned byte values. As a result the -1 values are converted to 255 effectively computing a convolution with the kernel

255 255 255
255  9  255
255 255 255

This can be immediately observed from white area in the "Convolution Kernel" image. The resulting kernel is thus that of a low-pass filter, producing a corresponding blurring effect.

Probably the best way to handle this would be to read the kernel as a matrix of signed values instead of as an image.

If you still prefer to handle the kernel as an image, you would need to convert the image back to signed values. The simplest way I can think of achieving this result would be to create a modified version of ImageDataConverter.ToInteger(Bitmap) where you map the bytes to signed values:

public static Complex[,] Unwrap(Bitmap bitmap)
{
  int Width = bitmap.Width;
  int Height = bitmap.Height;

  Complex[,] array2D = new Complex[bitmap.Width, bitmap.Height];
  ...

        else// If there is only one channel:
        {
          iii = (int)(*address);
          if (iii >= 128)
          {
            iii -= 256;
          }
        }
        Complex tempComp = new Complex((double)iii, 0.0);
        array2D[x, y] = tempComp;

You would then be able to convert your image in SharpenFilter.ApplyWithPadding with:

Complex[,] cPaddedMask =  ImageDataConverter.Unwrap(maskClone);

This should then give you the following result:

While this improves on the sharpness of the image, you should immediately notice that the image is much darker than the original. This is due to the Convolution.Rescale function which dynamically rescales the image according to it's minimum and maximum value. This can be convenient to show the image with maximum dynamic range, but could result in a different overall scaling than a standard convolution. To achieve this standard scaling (based on the scaling of your FFT implementation), you could use the following implementation:

    //Rescale values between 0 and 255.
    private static void Rescale(Complex[,] convolve)
    {
        int imageWidth = convolve.GetLength(0);
        int imageHeight = convolve.GetLength(1);

        double scale = imageWidth * imageHeight;

        for (int j = 0; j < imageHeight; j++)
        {
            for (int i = 0; i < imageWidth; i++)
            {
                double re = Math.Max(0, Math.Min(convolve[i, j].Real * scale, 255.0));
                double im = Math.Max(0, Math.Min(convolve[i, j].Imaginary * scale, 255.0));
                convolve[i, j] = new Complex(re, im);
            }
        }
    }

This should then give you an image with a more appropriate brightness level:

Finally, for a filtering operation one would typically expect the result to match the original image size (unlike a convolution which includes the tails). Cropping the result in SharpenFilter.ApplyWithPadding with:

...
// -3 terms are due to kernel size
// +5 vertical offset term is due to vertical reflection & offset in SetPixel
Rectangle rect = new Rectangle((cPaddedLena.GetLength(0) / 2 - 3) / 2,
                               (cPaddedLena.GetLength(1) / 2 - 3) / 2 + 5, 
                               cPaddedLena.GetLength(0) / 2,
                               cPaddedLena.GetLength(1) / 2);
return ImageDataConverter.ToBitmap(cConvolved).Clone(rect, PixelFormat.Format8bppIndexed);

should give you:

For easier visual comparison, here is the original image again:

来源：https://stackoverflow.com/questions/39114265/fft-convolution-3x3-kernel

标签

image-processing

filter

fft

convolution