How to get CRC64 distributed calculation (use its linearity property)?

前端 未结 2 1436
走了就别回头了
走了就别回头了 2021-02-02 01:35

I need hash over pretty large files which is stored on distributed FS. I\'m able to process parts of file with much more better performance than whole file so I\'d like to be ab

2条回答
  •  花落未央
    2021-02-02 01:51

    OK, my contribution to this. Ported to Java.

    • I cannot win from 8-byte blocks without doing unsafe thing so I removed block calculation.
    • I stay with ECMA polynom - ISO one looks too transparent as for me.
    • Of course in final version I will move test code under JUnit.

    So here is code:

    package com.test;
    
    import java.util.Arrays;
    
    /**
     * CRC-64 implementation with ability to combine checksums calculated over different blocks of data.
     **/
    public class CRC64 {
    
        private final static long POLY = (long) 0xc96c5795d7870f42L; // ECMA-182
    
        /* CRC64 calculation table. */
        private final static long[] table;
    
        /* Current CRC value. */
        private long value;
    
        static {
            table = new long[256];
    
            for (int n = 0; n < 256; n++) {
                long crc = n;
                for (int k = 0; k < 8; k++) {
                    if ((crc & 1) == 1) {
                        crc = (crc >>> 1) ^ POLY;
                    } else {
                        crc = (crc >>> 1);
                    }
                }
                table[n] = crc;
            }
        }
    
        public CRC64() {
            this.value = 0;
        }
    
        public CRC64(long value) {
            this.value = value;
        }
    
        public CRC64(byte [] b, int len) {
            this.value = 0;
            update(b, len);
        }
    
        /**
         * Construct new CRC64 instance from byte array.
         **/
        public static CRC64 fromBytes(byte [] b) {
            long l = 0;
            for (int i = 0; i < 4; i++) {
                l <<= 8;
                l ^= (long) b[i] & 0xFF;
            }
            return new CRC64(l);
        }
    
        /**
         * Get 8 byte representation of current CRC64 value.
         **/
        public byte[] getBytes() {
            byte [] b = new byte[8];
            for (int i = 0; i < 8; i++) {
                b[7 - i] = (byte) (this.value >>> (i * 8));
            }
            return b;
        }
    
        /**
         * Get long representation of current CRC64 value.
         **/
        public long getValue() {
            return this.value;
        }
    
        /**
         * Update CRC64 with new byte block.
         **/
        public void update(byte [] b, int len) {
    
            int idx = 0;
            this.value = ~this.value;
            while (len > 0) {
                this.value = table[((int) (this.value ^ b[idx])) & 0xff] ^ (this.value >>> 8);
                idx++;
                len--;
            }
            this.value = ~this.value;
        }
    
        private static final int GF2_DIM = 64; /* dimension of GF(2) vectors (length of CRC) */
    
        private static long gf2MatrixTimes(long [] mat, long vec)
        {
            long sum = 0;
            int idx = 0;
            while (vec != 0) {
                if ((vec & 1) == 1)
                    sum ^= mat[idx];
                vec >>>= 1;
                idx++;
            }
            return sum;
        }
    
        private static void gf2MatrixSquare(long [] square, long [] mat)
        {
            for (int n = 0; n < GF2_DIM; n++)
                square[n] = gf2MatrixTimes(mat, mat[n]);
        }
    
        /*
         * Return the CRC-64 of two sequential blocks, where summ1 is the CRC-64 of the
         * first block, summ2 is the CRC-64 of the second block, and len2 is the length
         * of the second block.
         */
        static public CRC64 combine(CRC64 summ1, CRC64 summ2, long len2)
        {
            // degenerate case.
            if (len2 == 0)
                return new CRC64(summ1.getValue());
    
            int n;
            long row;
            long [] even = new long[GF2_DIM]; // even-power-of-two zeros operator
            long [] odd  = new long[GF2_DIM];  // odd-power-of-two zeros operator
    
            // put operator for one zero bit in odd
            odd[0] = POLY;      // CRC-64 polynomial
    
            row = 1;
            for (n = 1; n < GF2_DIM; n++) {
                odd[n] = row;
                row <<= 1;
            }
    
            // put operator for two zero bits in even
            gf2MatrixSquare(even, odd);
    
            // put operator for four zero bits in odd
            gf2MatrixSquare(odd, even);
    
            // apply len2 zeros to crc1 (first square will put the operator for one
            // zero byte, eight zero bits, in even)
            long crc1 = summ1.getValue();
            long crc2 = summ2.getValue();
            do {
                // apply zeros operator for this bit of len2
                gf2MatrixSquare(even, odd);
                if ((len2 & 1) == 1)
                    crc1 = gf2MatrixTimes(even, crc1);
                len2 >>>= 1;
    
                // if no more bits set, then done
                if (len2 == 0)
                    break;
    
                // another iteration of the loop with odd and even swapped
                gf2MatrixSquare(odd, even);
                if ((len2 & 1) == 1)
                    crc1 = gf2MatrixTimes(odd, crc1);
                len2 >>>= 1;
    
                // if no more bits set, then done
            } while (len2 != 0);
    
            // return combined crc.
            crc1 ^= crc2;
            return new CRC64(crc1);
        }
    
        private static void test(byte [] b, int len, long crcValue) throws Exception {
    
            /* Test CRC64 default calculation. */
            CRC64 crc = new CRC64(b, len);
            if (crc.getValue() != crcValue) {
                throw new Exception("mismatch: " + String.format("%016x", crc.getValue())
                    + " should be " + String.format("%016x", crcValue));
            }
    
            /* test combine() */
            CRC64 crc1 = new CRC64(b, (len + 1) >>> 1);
            CRC64 crc2 = new CRC64(Arrays.copyOfRange(b, (len + 1) >>> 1, b.length), len >>> 1);
            crc = CRC64.combine(crc1, crc2, len >>> 1);
    
            if (crc.getValue() != crcValue) {
                throw new Exception("mismatch: " + String.format("%016x", crc.getValue())
                    + " should be " + String.format("%016x", crcValue));
            }
        }
    
        public static void main(String [] args) throws Exception {
    
            final byte[] TEST1 = "123456789".getBytes();
            final int    TESTLEN1 = 9;
            final long   TESTCRC1 = 0x995dc9bbdf1939faL; // ECMA.
            test(TEST1, TESTLEN1, TESTCRC1);
    
            final byte[] TEST2 = "This is a test of the emergency broadcast system.".getBytes();
            final int    TESTLEN2 = 49;
            final long   TESTCRC2 = 0x27db187fc15bbc72L; // ECMA.
            test(TEST2, TESTLEN2, TESTCRC2);
    
            final byte[] TEST3 = "IHATEMATH".getBytes();
            final int    TESTLEN3 = 9;
            final long   TESTCRC3 = 0x3920e0f66b6ee0c8L; // ECMA.
            test(TEST3, TESTLEN3, TESTCRC3);
        }
    }
    

提交回复
热议问题