问题
I change this code to work in persian. My code returns:
Key: Text is too short to analyze Text:
and doesn't return key and decrypt text. Is it anything wrong with uppercase
or ascii
in persian text? What can I do?
# -*- coding: utf-8 -*-
from string import uppercase
from operator import itemgetter
def vigenere_decrypt(target_freqs, input):
nchars = len(uppercase)
ordA = ord(u"ا")
sorted_targets = sorted(target_freqs)
def frequency(input):
result = [[c, 0.0] for c in uppercase]
for c in input:
result[c - ordA][1] += 1
return result
def correlation(input):
result = 0.0
freq = frequency(input)
freq.sort(key=itemgetter(1))
for i, f in enumerate(freq):
result += f[1] * sorted_targets[i]
return result
cleaned = [ord(c) for c in input.upper() if c.isupper()]
best_len = 0
best_corr = -100.0
# Assume that if there are less than 20 characters
# per column, the key's too long to guess
for i in xrange(2, len(cleaned) // 20):
pieces = [[] for _ in xrange(i)]
for j, c in enumerate(cleaned):
pieces[j % i].append(c)
# The correlation seems to increase for smaller
# pieces/longer keys, so weigh against them a little
corr = -0.5 * i + sum(correlation(p) for p in pieces)
if corr > best_corr:
best_len = i
best_corr = corr
if best_len == 0:
return ("Text is too short to analyze", "")
pieces = [[] for _ in xrange(best_len)]
for i, c in enumerate(cleaned):
pieces[i % best_len].append(c)
freqs = [frequency(p) for p in pieces]
key = ""
for fr in freqs:
fr.sort(key=itemgetter(1), reverse=True)
m = 0
max_corr = 0.0
for j in xrange(nchars):
corr = 0.0
c = ordA + j
for frc in fr:
d = (ord(frc[0]) - c + nchars) % nchars
corr += frc[1] * target_freqs[d]
if corr > max_corr:
m = j
max_corr = corr
key += chr(m + ordA)
r = (chr((c - ord(key[i % best_len]) + nchars) % nchars + ordA)
for i, c in enumerate(cleaned))
return (key, "".join(r))
def main():
encoded = " پهيتش غعهدد ذصلدي هزفضر کنهرظ ضذکاح يصتمد "
english_frequences = [
14, 4.2, 0.7, 5.2, 0.1, 1.2, 0.4,
1, 1.4, 7.5, 0.1, 8.5, 2.1, 0.1,
3.3, 2.6, 0.7, 0.3, 0.6, 0.2, 1.5,
0.2, 1.6, 1.2, 3, 1.7, 2.7, 5.7, 7.1, 6, 5.7, 9.1]
(key, decoded) = vigenere_decrypt(english_frequences, encoded)
print "Key:", key
print "\nText:", decoded
main()
来源:https://stackoverflow.com/questions/34829728/python-uppercase-and-ascii-in-persian-text