Modulus of Product of two integers

徘徊边缘 提交于 2019-12-07 19:56:08

问题


I have to find c,

c = (a*b) mod m

a,b,c,m are 32-bit integers. But (a*b) can be more than 32 bits. I am trying to figure out a way to compute c, without using long or any data type > 32 bit.

Any ideas?

What if m is a prime number, can things be simplified?

Note: based on a few comments,

c = ((a mod m) * (b mod m)) mod m, but in my case even this multiplication will overflow


回答1:


I happen to have this 256 by 128 bit division code with a random-based test and I'm sure you can reduce it trivially to 128 by 64 bit division and then to 64 by 32:

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

typedef unsigned long long uint64;

#define C_ASSERT(expr) extern char CAssertExtern[(expr)?1:-1]
C_ASSERT(sizeof(uint64) * CHAR_BIT == 64);

void mul64full(uint64 prod[2], uint64 a, uint64 b)
{
  uint64 p0, p1, p2, p3;

  p0 = (a & 0xFFFFFFFF) *
       (b & 0xFFFFFFFF);

  p1 = (a & 0xFFFFFFFF) *
       (b >> 32);

  p2 = (b & 0xFFFFFFFF) *
       (a >> 32);

  p3 = (a >> 32) *
       (b >> 32);

  prod[0] = p0;
  prod[1] = p3;

  if ((p1 << 32) > 0xFFFFFFFFFFFFFFFFULL - prod[0])
    prod[1]++;
  prod[0] += p1 << 32;
  prod[1] += p1 >> 32;

  if ((p2 << 32) > 0xFFFFFFFFFFFFFFFFULL - prod[0])
    prod[1]++;
  prod[0] += p2 << 32;
  prod[1] += p2 >> 32;
}

void mul128short(uint64 prod[2], const uint64 a[2], const uint64 b[2])
{
  uint64 p0[2], p1[2], p2[2];

  mul64full(p0, a[0], b[0]);
  mul64full(p1, a[0], b[1]);
  mul64full(p2, a[1], b[0]);

  prod[0] = p0[0];
  prod[1] = p0[1] + p1[0] + p2[0];
}

int lessThan128(const uint64 a[2], const uint64 b[2])
{
  return (a[1] < b[1]) || ((a[1] == b[1]) && (a[0] < b[0]));
}

int div256x128(uint64 qr[4], const uint64 dividend[4], const uint64 divisor[2])
{
  int i;

  if (!lessThan128(dividend + 2, divisor))
    return 0; // overflow

  qr[0] = dividend[0];
  qr[1] = dividend[1];
  qr[2] = dividend[2];
  qr[3] = dividend[3];

  for (i = 0; i < 128; i++)
  {
    if (qr[3] >= 0x8000000000000000ULL)
    {
      qr[3] = (qr[3] << 1) | (qr[2] >> 63);
      qr[2] = (qr[2] << 1) | (qr[1] >> 63);
      qr[1] = (qr[1] << 1) | (qr[0] >> 63);
      qr[0] <<= 1;

      if (qr[2] < divisor[0])
        qr[3]--;
      qr[2] -= divisor[0];
      qr[3] -= divisor[1];

      qr[0] |= 1;
    }
    else
    {
      qr[3] = (qr[3] << 1) | (qr[2] >> 63);
      qr[2] = (qr[2] << 1) | (qr[1] >> 63);
      qr[1] = (qr[1] << 1) | (qr[0] >> 63);
      qr[0] <<= 1;

      if (!lessThan128(qr + 2, divisor))
      {
        if (qr[2] < divisor[0])
          qr[3]--;
        qr[2] -= divisor[0];
        qr[3] -= divisor[1];

        qr[0] |= 1;
      }
    }
  }

  return 1;
}

void randfill(void* p, size_t s)
{
  unsigned char* pc = p;
  while (s--)
    *pc++ = rand();
}

int main(void)
{
  int i;

  srand(time(NULL));

  for (i = 0; i < 10000; i++)
  {
    uint64 qr[4], dividend[4], divisor[2];

    // divisor, 64 bits only
    randfill(&divisor[0], sizeof(divisor[0]));
    divisor[1] = 0;

    if (divisor[0] != 0)
    {
      uint64 dividend2[2];

      // dividend, 128 bits only
      randfill(&dividend[0], sizeof(dividend[0]) * 2);
      dividend[3] = dividend[2] = 0;

      // divide
      div256x128(qr, dividend, divisor);

      // test via multiplication and addition
      mul128short(dividend2, qr, divisor);
      if (dividend2[0] + qr[2] < dividend2[0])
        dividend2[1]++;
      dividend2[0] += qr[2];
      dividend2[1] += qr[3];

      printf("0x%016llX%016llX / 0x%016llX = 0x%016llX%016llX:0x%016llX%016llX\n",
             dividend[1], dividend[0], divisor[0], qr[1], qr[0], qr[3], qr[2]);

      if (dividend[0] != dividend2[0] ||
          dividend[1] != dividend2[1])
      {
        printf("0x%016llX%016llX - restored dividend - ERROR\n",
               dividend2[1], dividend2[0]);
        exit(-1);
      }
    }
  }

  return 0;
}

Output reduced to 100 iterations (ideone):

0x72238CB3A1A9F5656A1E0F3567E6CE0E / 0xC7F98F34BBA0FAD9 = 0x0000000000000000921DBA985B7AB177:0x000000000000000060D9C93D34382A2F
0xA05F7D3BF45F2CBFF745F1E97EF75A4A / 0x3CBA50635A5A27A9 = 0x0000000000000002A40EC5297059E69F:0x0000000000000000290F7F44A795E253
0xD48C5015950A24AC8C2C5242F098F28C / 0x3D98659426410873 = 0x0000000000000003736258D9691352D7:0x000000000000000005E859D02FBD03F7
0x985CA6ECCF7A9A8D8A029AFDC4029830 / 0xDC572869B7C9EB7C = 0x0000000000000000B10519E30A95B186:0x0000000000000000454594E80D549948
0x13609B86D699ECD9D5EA40A50EE97DA5 / 0x32B4B9B6DF2D66CA = 0x000000000000000061D4B3346F3C56FA:0x00000000000000000C0857D82EB34061
0x91D63DB80153785C458EDFA05C2BE6A5 / 0x4BB93F8121E3DA35 = 0x0000000000000001ED086E53A490EBA9:0x00000000000000000154DC2121A232A8
0xD5E77579A2E69A028A6F1BE52462A6A3 / 0x40C3C80BAF686F29 = 0x00000000000000034D8356E4FA79EAE5:0x00000000000000002A4ACFAD31FABCF6
0xE7289EE59D2EC9B80A671267274F9F1C / 0xA037AC77A5D63AED = 0x0000000000000001715A12BD82C8D05A:0x00000000000000002762B55566F657CA
0xDB5EB523534D10EC26C14D0A21165EAA / 0x70877083B2890E38 = 0x0000000000000001F30F41B77230648F:0x00000000000000003B161DAC42798D62
0xF22BCF06050EB9FAEDA39C433315D3AF / 0x634AA12A63798C7E = 0x00000000000000027061B6CFBDC47CAC:0x00000000000000002CA1B7EEE6E66707
0x6A5F7E6F71DBD32EA8BE5AF85B105730 / 0xACDEA42E034679E0 = 0x00000000000000009D86B0889208F92C:0x0000000000000000A900572AAF6884B0
0x86F664584EA6FE564BEEFE1B41207713 / 0x1D319A3D3E6F6D38 = 0x00000000000000049F7C4B1AC30CB248:0x00000000000000000BDEBA5D7138CF53
0x93D55D3D896F3E6E2E1EF71A01C680C4 / 0x88115750A7F8D137 = 0x00000000000000011622DE3AEA8D6745:0x00000000000000005F3FEF0A1E3DFBF1
0xAE986E3F6A4F484F4E81CF89F9BE32A9 / 0x173AD8E0B4E17B5B = 0x00000000000000078417C63DC58B3E77:0x0000000000000000141D123D47A4D15C
0x5DB95A0D6BD93EE1DF7FAF3668745E88 / 0xC07C0C65E93707AC = 0x00000000000000007CA699C204812C1C:0x0000000000000000BFFBD2C9E371F7B8
0x91FC43AF1DC400E75C8C89D4CB0CC766 / 0x23905F76E2C708A2 = 0x00000000000000041AD8BD6E95B77C6B:0x00000000000000000167C96223DFB3B0
0xD33CD0ECE044630C7857441234B4B3BD / 0xD2ECB5308FFCD581 = 0x000000000000000100613A50243CC975:0x0000000000000000BF01143AB448D6C8
0xCA7D0ABE05B379F37FC540C2F2540DC2 / 0xC511ED4380F000D0 = 0x00000000000000010709E7B22B442D2E:0x0000000000000000A454F0746FCF5862
0x75418582C2C04200CEEEF30B29E21EE6 / 0x622EE6915AAAC16E = 0x000000000000000131BACD4A53406179:0x0000000000000000527DDDC9966203E8
0x82EE158220268F14FDADF6174BC831B9 / 0x6D87F8FFBD7AF5FF = 0x00000000000000013203A1EE685F58F4:0x0000000000000000619862918C6512AD
0xBABC73A5BFC6AFA87BE777C27AF0CA7A / 0x334F8534DA44AE57 = 0x0000000000000003A3AAEC85949B00F6:0x00000000000000001720D365E24442E0
0xEC80AC4404C4CD424ADD78D0AA294A76 / 0x4F06C8F784DAE202 = 0x0000000000000002FE219872DF7C4B95:0x00000000000000001C7F230AFF95294C
0x74AC5B32627EB992D390475D4141954A / 0xBCBB67BA01AB465B = 0x00000000000000009E420C0580D7DBC7:0x000000000000000050FED88BD9810B8D
0xD0FA893E26F8F7C9B7B6346DF979053F / 0x785AF91D6D797129 = 0x0000000000000001BC813A285AA730EE:0x000000000000000029780222319B2121
0x5DED4A9336945FC99AE5C45B6D6A6250 / 0xFCE9DFD374327842 = 0x00000000000000005F12BA510F26C8DF:0x0000000000000000952062F60FB410D2
0xBD6604F3CC1E2F71B1C50F61C92682F5 / 0xB1507D6F7F83E641 = 0x0000000000000001117252CA7EC0A65F:0x0000000000000000076726C74125EAD6
0x8DEAD81C39130DD74AE755543605D7C7 / 0x8553E144F6F3FA63 = 0x0000000000000001107E30EAE9EE45D4:0x00000000000000002C281D730E73CECB
0xB62F71DEE58A89915484BA8DFF66ACBA / 0x70B8C00DFB590908 = 0x00000000000000019DC1EDE4447FF324:0x00000000000000001ED8A84F8856CF9A
0x1164808010FCC683FD5FD742EF1E82E2 / 0x2329D91B9BEF427F = 0x00000000000000007E9F79392CD015F6:0x000000000000000001894BD59B9031D8
0x1F65989C6839C52549A6A367837A8D68 / 0xDE9E265FE8F6EE0A = 0x0000000000000000241ADBD0FB260F42:0x0000000000000000C9B279DBD86298D4
0x01B09EDC67F8390074BF720CE0D4E681 / 0xEBEF93E1DE2F615E = 0x000000000000000001D569310FE86723:0x000000000000000036910723B0FDC4A7
0x43150143A042D09461FCAEE011BF4D2F / 0xE01ECE82FF69E963 = 0x00000000000000004C9FC0D605116CEE:0x000000000000000076790CFC803F8F25
0xA8BC8C145B90657A7FA62D4FC60E8144 / 0x0F175AAB16AA0D3A = 0x000000000000000B2E5C8870D0E697EA:0x0000000000000000076D41A7BEB53440
0xDBB6986137F134E82B2567E7164F8D6B / 0x39E35EFFA049FE5D = 0x0000000000000003CBA4704C8552B329:0x00000000000000000652776CE2D0C986
0xC95308E127E27A40CC2AB361F1D003F1 / 0x30BAF3D3113646FD = 0x000000000000000421A383F06B1DD66E:0x0000000000000000039BD74B637D053B
0xC3D5AF830984D0A89300A5A2D0EBAFFD / 0xDA78B7DC9D00443C = 0x0000000000000000E57983384327721C:0x0000000000000000C4E3AD916D5D816D
0x72F659CA625824C08839118A5F99D382 / 0xFCD33EC00AEBB829 = 0x00000000000000007467EB48186A84B8:0x0000000000000000C6A5E47AE23E520A
0x3015B4708D7B5E031CC52F99C95C5B09 / 0x52705129AD7C7A29 = 0x00000000000000009551C5B192977B2F:0x00000000000000001D95ED27B0A13A82
0x0E1F7B0B03B5DC69EC7F0EE22D9D9103 / 0xF01589C7A2936ED8 = 0x00000000000000000F0F29462804FE9E:0x0000000000000000D04177AE1344D7B3
0xEC3D468251C6746F99D7DE96E6C90D1F / 0x267AB14E499C9AD9 = 0x000000000000000623AF464370A3B267:0x00000000000000000E79D9A7DCF0DDD0
0x324766C3AE8FE518A91C0B89F691D8A8 / 0x473C0E6E2DEFF322 = 0x0000000000000000B4B0B8770B975D14:0x0000000000000000278C077555718000
0x1EEA010641E4FBB7EE6ADF4622A338B4 / 0x7649C1545D840EDB = 0x000000000000000042E78BDEEFA2D348:0x00000000000000001A6FB41E21AA8A1C
0x26CF636EE1F98F9AD6EB622275186021 / 0xBBDC13E0AACC79E6 = 0x000000000000000034E32521D0410AB4:0x00000000000000002FA92BE28D29AE69
0x7A7BE4E0A4F87E8283652261AA454ECA / 0x9B8251B56467B45E = 0x0000000000000000C9A2458FDD775882:0x0000000000000000311B005509E9670E
0x194D9A97CA3475698A2F1BBF94996EDF / 0x35072BD7E15B7473 = 0x00000000000000007A27854605B3DE17:0x0000000000000000012503425AFD3E8A
0xD63E9B6DB46C52F4D8B986F9847ECEAD / 0x9D72392AF8BE4618 = 0x00000000000000015C59F8C79AC088BA:0x00000000000000005D586814B303213D
0xF42EA2FF56E9795D65FB8FB85E1C7F34 / 0x24C1EE0CA7A07210 = 0x0000000000000006A49EF0579168219C:0x000000000000000017B3212D2322ED74
0x924A5235E557A62DF989AEC565EDD758 / 0x4637E63561A89AF4 = 0x0000000000000002155744F31100089E:0x00000000000000002F3BD6DFA70694C0
0xF5F894C7FE0B1939A62B614DE52A67B0 / 0xC9BF16D5CBE833CC = 0x0000000000000001381E0FEF42EC23E6:0x00000000000000006DFA7B799B66FA68
0xBC54C083AE952260AFF7F9FF2E39E958 / 0x6E0125A26FDA4F3A = 0x0000000000000001B647A71AF0D7D071:0x000000000000000053A8E5E784C7D0BE
0xA5371345881C4C88EE129F95A49D7002 / 0x2E96B3F3A1BB5FD9 = 0x00000000000000038BD711431BFF2BE4:0x0000000000000000163D12E3C47B9FBE
0xED919D64A38AC50EE6289DE3FA073006 / 0x6675A78DB953CC35 = 0x000000000000000251939CAFFFB1C284:0x00000000000000001BC552CAEE6CBAB2
0x1F1FF71139CF7455D56C25CE06AF2779 / 0xF6D3ACC12E75ADE9 = 0x0000000000000000204816C1394B681E:0x00000000000000001D4C796EF1FB1E2B
0xE3FB1B8E26AE6958A8B9312FC35D8302 / 0x2F0E544621C3A9BC = 0x0000000000000004D84A89285F567516:0x00000000000000000E22D0A2A6D200DA
0x410856E95F9CB830D95BADD72B9F83E5 / 0x7EC03BC1A01CCA8F = 0x00000000000000008358CE856D657B0A:0x00000000000000005C76947347C1E54F
0xE9F5F4B91B990C446D6DC188EF014D2E / 0x603063D14D68B6BF = 0x00000000000000026EAB5A279B04DF98:0x000000000000000021D3917341A86AC6
0x3F672EFBF9C03A71D13924A3D9F33F07 / 0xA6D751E7964991AC = 0x0000000000000000614908842CC78070:0x000000000000000030AE8F43843983C7
0xF8C19187F058634D7F6F0E77C93626E1 / 0xA0DD50F4F45B003A = 0x00000000000000018BDEEE39534EDFD1:0x00000000000000009899A7EA260C7187
0x7FD9CC315A5C23E293EDBCB24CDE6258 / 0xEA0758B0C7F182CC = 0x00000000000000008BDA913F184CADE0:0x0000000000000000CA19EEBB2F9813D8
0xCA048AE8719D2CBF504D5DF863569FB3 / 0x8EAE31C2B66F312F = 0x00000000000000016A76D09445883B90:0x00000000000000000C53FDE6587D2043
0xF1A862325714D55FB17FC0FEF112CD2E / 0x5C5EFD2DDF2461AD = 0x00000000000000029DBCDFF6149E9460:0x000000000000000059AC2B766E30284E
0x82370123858263879151B962F55B65C8 / 0x7C4067CF7662458E = 0x00000000000000010C494E7DE774E5A7:0x00000000000000006BF1E0862CB00026
0xAE4AD227B9816E578C12F2C496B15DC7 / 0x3BFB82AD09DA4BD7 = 0x0000000000000002E7DD4BD12E902D85:0x00000000000000001A49F830CE032C14
0x9A58A643CB945107FE9FA93764AEB5B6 / 0xA5DB6BCE5834CC35 = 0x0000000000000000EE3BAA824701F858:0x000000000000000093DBD3146D802B7E
0x3CFF433501A48CCA40DD1589383A1E6A / 0xE1EA9EAC3C53D914 = 0x0000000000000000451E9FB6BE32F4B4:0x00000000000000002A779739A4766C5A
0xE05CE3161C060193CDC77463E58AC539 / 0x4E716039D7089394 = 0x0000000000000002DC36829F6DEA09FE:0x00000000000000002CFDECD854902461
0x8C172FF9496884E683FA2149ACB0E973 / 0xD8E1E044A5E1006F = 0x0000000000000000A55B999B02B5B8B5:0x00000000000000001E4D953B7FD0D2F8
0xDAAC01F32907D2E05A22F6E749150705 / 0x7026050046A81D30 = 0x0000000000000001F328DBBBE708E51C:0x00000000000000001E77E203F315E5C5
0xB32B29D3D107DDE9BEC8E2B858BBB357 / 0x750B3A447F231586 = 0x000000000000000187E150D9948EB8D0:0x0000000000000000580C63326C6DE677
0xDF6E9EF5B0D613F77E584427E339EC9E / 0xBAC98934EFDD32FB = 0x000000000000000132392EB5CDD7AB7A:0x000000000000000085ABCAA502F4F800
0x70E4427966EA353F07FC52694289DF0E / 0x9A02DB9F4FB0757B = 0x0000000000000000BBA681F9A127CD04:0x00000000000000003B17F3B474F78A22
0x587602196F06C706C43D26B6A324DA04 / 0xD52A8E594B20BB56 = 0x00000000000000006A3C831F9C1DBA1A:0x00000000000000004B9CAAC798F75748
0xDDC4B4D70076B04AD38C6FCFB745415E / 0xF920A3A3B0BE6137 = 0x0000000000000000E3E2DACC13655973:0x00000000000000004D3FDAADB24076A9
0xBFB7F075E4640514ACC0D34EC7B3AB23 / 0x03EDC2CC944CCA7B = 0x0000000000000030CC7EFAC33315FE5B:0x000000000000000002DDC91DC26AA76A
0xB7850EA2D84ECF8A871CDE6319F1A14D / 0x5D5583E278BF2EA1 = 0x0000000000000001F75D58F861AAD829:0x0000000000000000104585A47C115184
0x19D30C8F4CF0B1E8D7BF478A62F20780 / 0x7A4852D9899BE914 = 0x0000000000000000361050BBA6A2E574:0x000000000000000038FF75771A258670
0xD67CC4EEA505A61BA2B4142239CCCF60 / 0x4334772F7DF221DB = 0x00000000000000033108E073FA69488D:0x00000000000000002E9320CD011791C1
0x8699D56AF72156D5E789058746D11016 / 0x531EF57805226C75 = 0x00000000000000019E8CF4E75D4C2521:0x000000000000000046269F1EEFF82C01
0x8C7B3EB794B4B10C6EE0FC588DDD6214 / 0xD840A863692B9E7B = 0x0000000000000000A64D52F5F26EDEFF:0x000000000000000093CF8DD99921DB8F
0xDCBFDAD051F9D4F96CF696581E56B0BB / 0xED841C71FE839C94 = 0x0000000000000000EDEDB656CBF05C2E:0x00000000000000002AC3B820EFAB5E23
0xCD3D3DD4D1473D7929E7C96EFA445188 / 0x46CEECB24270748B = 0x0000000000000002E6055D0FB8CCCA0B:0x000000000000000032CCDCD39CB5A18F
0xD09684576D9D8EFFA349AD1A03DF2E51 / 0x9CBEDD11E11E3711 = 0x000000000000000154AB87F3F19CF5EA:0x00000000000000002C5BA1ECC43193C7
0x4EB7FC4F14B3A2FAAFC3CB5E260D8015 / 0x2C62DE1A95A1DDC1 = 0x0000000000000001C603BFD8AC6E84DE:0x0000000000000000074354C5F869AEB7
0x4820304E706D821246759231492F522E / 0x9AB151D8E1BB548A = 0x0000000000000000775C480C24C7798D:0x0000000000000000579F64114AC6882C
0xAFB7699C70F40A3FABDBED7D413B2B68 / 0x5A7014CF215CD3D2 = 0x0000000000000001F165630D7BC529B0:0x0000000000000000279134DF8CE2E908
0xA40B7764609C77E35B3CB7F2E247DE12 / 0x26823F543A1F24EC = 0x0000000000000004428AEC3F73053FC3:0x000000000000000022C3FA3735DCAA4E
0x4B3D5C68E220B1F03E69112C438FECEF / 0x55CDCB691E140081 = 0x0000000000000000E07B315ED33F95DF:0x000000000000000008ABEE64F9196790
0x07145917D5EF05A9AC76BDBC20F0F1B7 / 0xD2DCB75038ABD9D4 = 0x000000000000000008984E14ED4515F5:0x0000000000000000576FBDC4D17715D3
0x79D36ED027F8136BD7237A20519D6900 / 0x65BE279172E9340A = 0x00000000000000013288389B33B93E78:0x00000000000000005686822B60789850
0x10B816A5E0F32BC0A28D56A228987B97 / 0x2691631C8A4FC373 = 0x00000000000000006EF9B0E3683A6778:0x000000000000000002EA49293B8398AF
0xBF6D72FFCAE5A828BC10AD25EA1CC2CE / 0x91730B1EF937B029 = 0x000000000000000150ECA60E96E99786:0x000000000000000085FFE8CA42BD5E58
0xA9B05D80F44DD3CF63B70D95259A8A07 / 0xA0525777429F609D = 0x00000000000000010EF523D97E6167AF:0x000000000000000083B23B3A994B53B4
0xCEBC9EFF59E7F2C4C2583ABAB8E86341 / 0xA5C3E00804739505 = 0x00000000000000013F46555CC330626E:0x00000000000000007B23CE99D042711B
0x9D1340D851E89E9730B634EF10700E08 / 0x6C9B7366F2C20971 = 0x0000000000000001723EA5F3B133DFF1:0x000000000000000043DA931B7F08BBA7
0xCAC50533954FFFA63F8F983623FDC3BC / 0x0B01B39932F6FB33 = 0x00000000000000126C26F2199D49C968:0x000000000000000004C39A7E9BE1AC04
0x0A8D31644EA2CC187FCF54C2C4530F2A / 0x21192FEE7C1CADA3 = 0x0000000000000000519C6B55F4BD6EEB:0x0000000000000000054E1CB7F60CA089
0xE26C37CAED68752AA4221B7A01024A13 / 0x862E65E264A0DD30 = 0x0000000000000001AFFC08C0C0FC6210:0x00000000000000005D17FA9067081713
0x4B6BAD20C68B054C8A0302777DD3129B / 0x6F72BE3F67300E03 = 0x0000000000000000AD3E5477E997550C:0x0000000000000000237CF0BDB4266B77
0x82FCD93671D634FA5860E8BDF176FFB2 / 0x0ECFA40BED38D323 = 0x0000000000000008D80913755D8AD21C:0x00000000000000000A9E9182DA2F31DE
0xF9292AFFD1C91713D8A1142692458287 / 0x584AB435FF4987DF = 0x0000000000000002D26F9202488D82E9:0x000000000000000055D41BE953869A90

If you were to use x86 assembly, it would be much simpler:

#include <stdio.h>

unsigned mulmod(unsigned a, unsigned b, unsigned m)
{
  unsigned result = 0;

  __asm
  {
    mov  eax, a
    mov  ebx, b
    mul  ebx // edx:eax = 64-bit product of a*b
    push eax // store away low 32 bits of dividend

    mov  ecx, m
    mov  eax, edx
    xor  edx, edx
    div  ecx // get high 32 bits of quotient
    xchg eax, [esp] // store them on stack, get low 32 bits of dividend
    div  ecx // get low 32 bits of quotient and the remainder
    mov  result, edx
  }

  return result;
}

int main(void)
{
  unsigned a = 4294967231, b = 4294967279, m = 4294967291;
  printf("Assembly:\n(%u * %u) mod %u = %u\n", a, b, m, mulmod(a, b, m));
  printf("Validation:\n(%u * %u) mod %u = %u\n", a, b, m, (unsigned)((unsigned long long)a * b % m));
  return 0;
}

Output (compiled with Open Watcom C/C++ 1.9):

Assembly:
(4294967231 * 4294967279) mod 4294967291 = 720
Validation:
(4294967231 * 4294967279) mod 4294967291 = 720



回答2:


Look here: Modular arithmetics and NTT (finite field DFT) optimizations

  • its my still unanswered question
  • find modmul member function
  • all mod functions are optimized and tested ...
  • if you have problems with asm then convert it co C
  • in rems is info about what line do what so it should be easy
  • even for non asm user

Above modmul implementation uses double precision

  • but still all variables are in single precision
  • so if you cannot use that for some reason
  • then you have to stick with loop of modadd which is slow
  • or binary modmul ... multiplicate a by only bits of b ... much faster
  • this leads to loop through all bits with some modadds and bitshift and bittest functions

For single precision only I use something like this 'template' code:

_mod_type modadd(_mod_type a,_mod_type b,_mod_type c)
    {
    _mod_type d,cy;
    a=mod(a,c);
    b=mod(b,c);
    d=a+b;
    cy=(shr(a)+shr(b)+shr((a&1)+(b&1)))&_mod_mask;
    if (cy) d-=c;
    if (_mod_type(d)>=_mod_type(c)) d-=c;
    return d;
    }

_mod_type modmul(_mod_type a,_mod_type b,_mod_type c)
    {   // b is not modded !
    int i;
    _mod_type d;
    a=mod(a,c);
    for (d=0,i=0;i<_mod_bits;i++)
        {
        if (_mod_type(a&1)) d=modadd(d,b,c);
        a=shr(a);
        b=modadd(b,b,c);
        }
    return d;
    }
  • _mod_type is you variable type (for example 32 bit unsigned int or DWORD)
  • _mod_bits is number of bits constant (32)
  • _mod_mask is msb bit set constant (0x80000000)
  • mod(a,b) is modulo a%b
  • shr(a) is bit shift a>>1
  • modadd(a,b,c) is (a+b)%c
  • modmul(a,b,c) is (a*b)%c

Hope it helps...



来源:https://stackoverflow.com/questions/16115682/modulus-of-product-of-two-integers

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!