问题
Bellow is a code that compares hashlib.sha256()
to my sha256_test()
function which is written in raw python in terms of hash rate performance.
from time import time_ns as time
import hashlib
def pad512(bytes_):
L = len(bytes_)*8
K = 512 - ((L + 1) % 512)
padding = (1 << K) | L
return bytes_ + padding.to_bytes((K + 1)//8, 'big')
def mpars (M):
chunks = []
while M:
chunks.append(M[:64])
M = M[64:]
return chunks
def sha256_transform(H, Kt, W):
a, b, c, d, e, f, g, h = H
# Step 1: Looping
for t in range(0, 64):
T1 = h + g1(e) + Ch(e, f, g) + Kt[t] + W[t]
T2 = (g0(a) + Maj(a, b, c))
h = g
g = f
f = e
e = (d + T1) & 0xffffffff
d = c
c = b
b = a
a = (T1 + T2) & 0xffffffff
# Step 2: Updating Hashes
H[0] = (a + H[0]) & 0xffffffff
H[1] = (b + H[1]) & 0xffffffff
H[2] = (c + H[2]) & 0xffffffff
H[3] = (d + H[3]) & 0xffffffff
H[4] = (e + H[4]) & 0xffffffff
H[5] = (f + H[5]) & 0xffffffff
H[6] = (g + H[6]) & 0xffffffff
H[7] = (h + H[7]) & 0xffffffff
return H
Ch = lambda x, y, z: (z ^ (x & (y ^ z)))
## """The x input chooses if the output is from y or z.
## Ch(x,y,z)=(x∧y)⊕(¬x∧z)"""
Maj = lambda x, y, z: (((x | y) & z) | (x & y))
## """The result is set according to the majority of the 3 inputs.
## Maj(x, y,z) = (x ∧ y) ⊕ (x ∧ z) ⊕ ( y ∧ z)"""
ROTR = lambda x, y: (((x & 0xffffffff) >> (y & 31)) | (x << (32 - (y & 31)))) & 0xffffffff
SHR = lambda x, n: (x & 0xffffffff) >> n
s0 = lambda x: (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
s1 = lambda x: (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
g0 = lambda x: (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
g1 = lambda x: (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
def sha256_test (bytes_):
#Parameters
initHash = [
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
]
Kt = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
]
padM = pad512(bytes_)
chunks = mpars(padM)
# Preparing Initaial Hashes
H = initHash
# Starting the Main Loop
for chunk in chunks:
W = []
# Step 1: Preparing Wt
for t in range(0, 16):
W.append((((((chunk[4*t] << 8) | chunk[4*t+1]) << 8) | chunk[4*t+2]) << 8) | chunk[4*t+3])
for t in range(16, 64):
W.append((s1(W[t-2]) + W[t-7] + s0(W[t-15]) + W[t-16]) & 0xffffffff)
# Step 2: transform the hash
H = sha256_transform(H, Kt, W)
# Step 3: Give Out the digest
Hash = b''
for j in H:
Hash += (j.to_bytes(4, byteorder='big'))
return Hash
if __name__ == "__main__":
k = 10000
M = bytes.fromhex('00000000000000000001d2c45d09a2b4596323f926dcb240838fa3b47717bff6') #block #548867
start = time()
for i in range(0, k):
o1 = sha256_test(sha256_test(M))
end = time()
endtns1 = (end-start)/k
endts1 = endtns1 * 1e-9
print('@sha256_TESTs() Each iteration takes: {} (ns) and {} (sec).'.format(endtns1, endts1))
print('@sha256_TESTs() Calculated Hash power: {} (h/s)'.format(int(2/endts1)))
start = time()
for i in range(0, k):
o2 = hashlib.sha256(hashlib.sha256(M).digest()).digest()
end = time()
endtns2 = (end-start)/k
endts2 = endtns2 * 1e-9
print('@hashlib.sha256() Each iteration takes: {} (ns) and {} (sec).'.format(endtns2, endts2))
print('@hashlib.sha256() Calculated Hash power: {} (Kh/s)'.format(int(2/endts2/1024)))
print('Outputs Match : ', o1 == o2)
print('hashlib is ~{} times faster'.format(int(endtns1/endtns2)))
When calculating the hash rate, is 1 Kilo Hash considered to be 1000
hashes or 1024
hashes?!
If I am correct about calculating the hash rate I conclude that my PC can generate a hash rate of ~900 (h/s)
using my own sha256_test()
function while hashlib.sha256()
outperforms this with ~300 Kh/s
.
Firstly, I would like to know the mechanism behind hashlib's outstanding performance. When I read the code in hashlib.py
, there isn't much code inside it and I can't understand how the hash values are computed. Is that possible to see the code behind hashlib.sha256()
?
Secondly, Is there any possibility to improve my code so that it can get close to a performance of 300 (Kh/s)
? I have read about Cython, I am just not sure how much is it capable of improving such an algorithm.
Thirdly, Is that technically possible to be faster than hashlib
in python?
回答1:
Well looking at hashlib.py won't help you too much to be honest, but it may give you a hint. What you have done is pure python code, whereas hashlib relies on a C implementation and that would run in circles around pure python with ease. Namely you need to look at this. So if you want to even get close to these numbers, you'd need to look into cython, C, C++ or Rust.
来源:https://stackoverflow.com/questions/53374359/why-is-hashlib-so-faster-than-other-codes-for-sha256-and-how-can-i-get-my-code-c