If a unicode uses 17 bits codepoints, how the surrogate pairs is calculated from code points?
Unicode code points are scalar values which range from 0x000000 to 0x10FFFF. Thus they are are 21 bit integers, not 17 bit.
Surrogate pairs are a mechanism of the UTF-16 form. This represents the 21-bit scalar values as one or two 16-bit code units.
This is explained in detail, with sample code, in the Unicode consortium's FAQ, UTF-8, UTF-16, UTF-32 & BOM. That FAQ refers to the section of the Unicode Standard which has even more detail.
If it is code you are after, here is how a single codepoint is encoded in UTF-16 and UTF-8 respectively.
A single codepoint to UTF-16 codeunits:
if (cp < 0x10000u)
{
*out++ = static_cast<uint16_t>(cp);
}
else
{
*out++ = static_cast<uint16_t>(0xd800u + (((cp - 0x10000u) >> 10) & 0x3ffu));
*out++ = static_cast<uint16_t>(0xdc00u + ((cp - 0x10000u) & 0x3ffu));
}
A single codepoint to UTF-8 codeunits:
if (cp < 0x80u)
{
*out++ = static_cast<uint8_t>(cp);
}
else if (cp < 0x800u)
{
*out++ = static_cast<uint8_t>((cp >> 6) & 0x1fu | 0xc0u);
*out++ = static_cast<uint8_t>((cp & 0x3fu) | 0x80u);
}
else if (cp < 0x10000u)
{
*out++ = static_cast<uint8_t>((cp >> 12) & 0x0fu | 0xe0u);
*out++ = static_cast<uint8_t>(((cp >> 6) & 0x3fu) | 0x80u);
*out++ = static_cast<uint8_t>((cp & 0x3fu) | 0x80u);
}
else
{
*out++ = static_cast<uint8_t>((cp >> 18) & 0x07u | 0xf0u);
*out++ = static_cast<uint8_t>(((cp >> 12) & 0x3fu) | 0x80u);
*out++ = static_cast<uint8_t>(((cp >> 6) & 0x3fu) | 0x80u);
*out++ = static_cast<uint8_t>((cp & 0x3fu) | 0x80u);
}