convert float to integer

问题

how to scale and by which factor to scale dctmtx coefficients from float to get following integer values:

float dctmtx:

( (0.3536    0.3536    0.3536    0.3536    0.3536    0.3536    0.3536    0.3536),
    (0.4904    0.4157    0.2778    0.0975   -0.0975   -0.2778   -0.4157   -0.4904),
    (0.4619    0.1913   -0.1913   -0.4619   -0.4619   -0.1913    0.1913    0.4619),
    (0.4157   -0.0975   -0.4904   -0.2778    0.2778    0.4904    0.0975   -0.4157),
    (0.3536   -0.3536   -0.3536    0.3536    0.3536   -0.3536   -0.3536    0.3536),
    (0.2778   -0.4904    0.0975    0.4157   -0.4157   -0.0975    0.4904   -0.2778),
    (0.1913   -0.4619    0.4619   -0.1913   -0.1913    0.4619   -0.4619    0.1913),
    (0.0975   -0.2778    0.4157   -0.4904    0.4904   -0.4157    0.2778   -0.0975)
)

integer dctmtx:

(( 125,   122,   115,    103,    88,     69,     47,      24  ),
        ( 125,   103,    47,    -24,   -88,   -122,   -115,     -69  ),
        ( 125,    69,   -47,   -122,   -88,     24,    115,     103  ),
        ( 125,    24,  -115,    -69,    88,    103,    -47,    -122  ),
        ( 125,   -24,  -115,     69,    88,   -103,    -47,     122  ),
        ( 125,   -69,   -47,    122,   -88,    -24,    115,    -103  ),
        ( 125,  -103,    47,     24,   -88,    122,   -115,      69  ),
        ( 125,  -122,   115,   -103,    88,    -69,     47,     -24  )
    );

回答1:

If you read up on the discrete cosine transform, you will find that the basic coefficient is

cos(pi*i*(2*j+1)/16),  i,j=0..7

Then the first table consists of these values scaled by 0.5, except for the first row/column, which is scaled by 0.25*sqrt(2)=1/sqrt(8). Which is the correct way to obtain an orthogonal matrix. The square sum of the first column is 8, of the others 4.

The second table is the rounded results when multiplying the cosine values with 125, uniformly. Here one has to take care to properly rescale the vector when using the transpose matrix to compute the inverse transform.

First table reproduced, except for the first column:

> [[ Cos(pi*i*(2*j+1)/16)/2 : i in [0..7] ]: j in [0..7] ];        
[
    [ 0.5, 0.49039264, 0.46193977, 0.41573481, 0.35355339, 0.27778512, 0.19134172, 0.09754516 ],
    [ 0.5, 0.41573481, 0.19134172, -0.09754516, -0.35355339, -0.49039264, -0.46193977, -0.27778512 ],
    [ 0.5, 0.27778512, -0.19134172, -0.49039264, -0.35355339, 0.09754516, 0.46193977, 0.41573481 ],
    [ 0.5, 0.09754516, -0.46193977, -0.27778512, 0.35355339, 0.41573481, -0.19134172, -0.49039264 ],
    [ 0.5, -0.09754516, -0.46193977, 0.27778512, 0.35355339, -0.41573481, -0.19134172, 0.49039264 ],
    [ 0.5, -0.27778512, -0.19134172, 0.49039264, -0.35355339, -0.09754516, 0.46193977, -0.41573481 ],
    [ 0.5, -0.41573481, 0.19134172, 0.09754516, -0.35355339, 0.49039264, -0.46193977, 0.27778512 ],
    [ 0.5, -0.49039264, 0.46193977, -0.41573481, 0.35355339, -0.27778512, 0.19134172, -0.09754516 ]
]

Second table, before integer rounding

> [[ Cos( pi*i*(2*j+1)/16 ) *125 : i in [0..7] ]: j in [0..7] ];       
[
    [ 125, 122.5982, 115.4849, 103.9337, 88.3883, 69.4463, 47.8354, 24.3863 ],
    [ 125, 103.9337, 47.8354, -24.3863, -88.3883, -122.5982, -115.4849, -69.4463 ],
    [ 125, 69.4463, -47.8354, -122.5982, -88.3883, 24.3863, 115.4849, 103.9337 ],
    [ 125, 24.3863, -115.4849, -69.4463, 88.3883, 103.9337, -47.8354, -122.5982 ],
    [ 125, -24.3863, -115.4849, 69.4463, 88.3883, -103.9337, -47.8354, 122.5982 ],
    [ 125, -69.4463, -47.8354, 122.5982, -88.3883, -24.3863, 115.4849, -103.9337 ],
    [ 125, -103.9337, 47.8354, 24.3863, -88.3883, 122.5982, -115.4849, 69.4463 ],
    [ 125, -122.5982, 115.4849, -103.9337, 88.3883, -69.4463, 47.8354, -24.3863 ]
]

回答2:

Besides one of the two matrices being rotated the two don't appear to have a direct linear relationship:

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
int main (int argc, char *argv[])
{
    float dctmtx[8][8] = 
  { 0.3536,   0.3536,   0.3536,   0.3536,   0.3536,   0.3536,   0.3536,   0.3536,
    0.4904,   0.4157,   0.2778,   0.0975,  -0.0975,  -0.2778,  -0.4157,  -0.4904,
    0.4619,   0.1913,  -0.1913,  -0.4619,  -0.4619,  -0.1913,   0.1913,   0.4619,
    0.4157,  -0.0975,  -0.4904,  -0.2778,   0.2778,   0.4904,   0.0975,  -0.4157,
    0.3536,  -0.3536,  -0.3536,   0.3536,   0.3536,  -0.3536,  -0.3536,   0.3536,
    0.2778,  -0.4904,   0.0975,   0.4157,  -0.4157,  -0.0975,   0.4904,  -0.2778,
    0.1913,  -0.4619,   0.4619,  -0.1913,  -0.1913,   0.4619,  -0.4619,   0.1913,
    0.0975,  -0.2778,   0.4157,  -0.4904,   0.4904,  -0.4157,   0.2778,  -0.0975
  };

    int j,k, i;
    float m;
    for ( j = 0; j < 8; j++) {
        for ( k = 0; k < 8; k++) {
            if ( k == 0) 
                m = (dctmtx[k][j] * 354) ;
            else 
                m = (dctmtx[k][j] * 248) ;
            i = lroundf(m);
            printf("%4d ",i);
        }
        printf("\n");
    }   
}

The first coefficient in each row appears to be to a different accuracy than the remaining:

%% convftoi
 125  122  115  103   88   69   47   24
 125  103   47  -24  -88 -122 -115  -69
 125   69  -47 -122  -88   24  115  103
 125   24 -115  -69   88  103  -47 -122
 125  -24 -115   69   88 -103  -47  122
 125  -69  -47  122  -88  -24  115 -103
 125 -103   47   24  -88  122 -115   69
 125 -122  115 -103   88  -69   47  -24

After a little finessing to find scaling factors that did give a match.

addendum

After LutzL's answer I derived the float coefficient matrix algorithmically:

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#define PI 3.14159265359

int main (int argc, char *argv[])
{
  float calcmtx[8][8];

    int j,k, i;
    float m;
    printf("float coefficients calculated\n");
    for ( j = 0; j < 8; j++) {
        for ( k = 0; k < 8; k++) {
            if ( j == 0) 
                m = cos(PI*j*(2*k+1)/16)/(sqrt(2)*2) ;
            else 
                m = cos(PI*j*(2*k+1)/16)/2 ;
            calcmtx[k][j] = floorf(m*10000 + 0.5)/10000;
        }
    }

    for ( j = 0; j < 8; j++) {
        for ( k = 0; k < 8; k++) {
            printf("% 2.4f ", calcmtx[k][j]);
        }
        printf("\n");
    }
    printf("\n") ;

    printf("integer coefficients derived\n");
    for ( j = 0; j < 8; j++) {
        for ( k = 0; k < 8; k++) {
            if (k == 0)
                m = sqrt(2);
            else
                m = 1;
            i = (int) (calcmtx[j][k] * 250 * m);
            printf("%4d ", i);
        }
        printf("\n");
    }
    printf("\n") ;

    printf("approximated integer coefficients\n");
    for ( j = 0; j < 8; j++) {
        for ( k = 0; k < 8; k++) {
            if ( k == 0) 
                m = calcmtx[j][k] * 354 ;
            else 
                m = calcmtx[j][k] * 248 ;
            i = lroundf(m);
            printf("%4d ", i);
        }
        printf("\n");
    }   
}

And we see that the integer matrix first coefficient is multiplied by the square root of two:

%% gencoeffi
float coefficients calculated
 0.3536  0.3536  0.3536  0.3536  0.3536  0.3536  0.3536  0.3536
 0.4904  0.4157  0.2778  0.0975 -0.0975 -0.2778 -0.4157 -0.4904
 0.4619  0.1913 -0.1913 -0.4619 -0.4619 -0.1913  0.1913  0.4619
 0.4157 -0.0975 -0.4904 -0.2778  0.2778  0.4904  0.0975 -0.4157
 0.3536 -0.3536 -0.3536  0.3536  0.3536 -0.3536 -0.3536  0.3536
 0.2778 -0.4904  0.0975  0.4157 -0.4157 -0.0975  0.4904 -0.2778
 0.1913 -0.4619  0.4619 -0.1913 -0.1913  0.4619 -0.4619  0.1913
 0.0975 -0.2778  0.4157 -0.4904  0.4904 -0.4157  0.2778 -0.0975

integer coefficients derived
 125  122  115  103   88   69   47   24
 125  103   47  -24  -88 -122 -115  -69
 125   69  -47 -122  -88   24  115  103
 125   24 -115  -69   88  103  -47 -122
 125  -24 -115   69   88 -103  -47  122
 125  -69  -47  122  -88  -24  115 -103
 125 -103   47   24  -88  122 -115   69
 125 -122  115 -103   88  -69   47  -24

approximated integer coefficients
 125  122  115  103   88   69   47   24
 125  103   47  -24  -88 -122 -115  -69
 125   69  -47 -122  -88   24  115  103
 125   24 -115  -69   88  103  -47 -122
 125  -24 -115   69   88 -103  -47  122
 125  -69  -47  122  -88  -24  115 -103
 125 -103   47   24  -88  122 -115   69
 125 -122  115 -103   88  -69   47  -24

Which matches the approximation when the float accuracy is limited.

来源：https://stackoverflow.com/questions/23843140/convert-float-to-integer

标签

math

image-processing

vhdl

dct