python uppercase and ascii in persian text

非 Y 不嫁゛ 提交于 2020-01-05 20:00:54

问题


I change this code to work in persian. My code returns: Key: Text is too short to analyze Text: and doesn't return key and decrypt text. Is it anything wrong with uppercase or ascii in persian text? What can I do?

# -*- coding: utf-8 -*-
from string import uppercase
from operator import itemgetter

def vigenere_decrypt(target_freqs, input):
    nchars = len(uppercase)

    ordA = ord(u"ا")
    sorted_targets = sorted(target_freqs)

    def frequency(input):
        result = [[c, 0.0] for c in uppercase]
        for c in input:
            result[c - ordA][1] += 1
        return result

    def correlation(input):
        result = 0.0
        freq = frequency(input)
        freq.sort(key=itemgetter(1))

        for i, f in enumerate(freq):
            result += f[1] * sorted_targets[i]
        return result

    cleaned = [ord(c) for c in input.upper() if c.isupper()]
    best_len = 0
    best_corr = -100.0

    # Assume that if there are less than 20 characters
    # per column, the key's too long to guess
    for i in xrange(2, len(cleaned) // 20):
        pieces = [[] for _ in xrange(i)]
        for j, c in enumerate(cleaned):
            pieces[j % i].append(c)

        # The correlation seems to increase for smaller
        # pieces/longer keys, so weigh against them a little
        corr = -0.5 * i + sum(correlation(p) for p in pieces)

        if corr > best_corr:
            best_len = i
            best_corr = corr

    if best_len == 0:
        return ("Text is too short to analyze", "")

    pieces = [[] for _ in xrange(best_len)]
    for i, c in enumerate(cleaned):
        pieces[i % best_len].append(c)

    freqs = [frequency(p) for p in pieces]

    key = ""
    for fr in freqs:
        fr.sort(key=itemgetter(1), reverse=True)

        m = 0
        max_corr = 0.0
        for j in xrange(nchars):
            corr = 0.0
            c = ordA + j
            for frc in fr:
                d = (ord(frc[0]) - c + nchars) % nchars
                corr += frc[1] * target_freqs[d]

            if corr > max_corr:
                m = j
                max_corr = corr

        key += chr(m + ordA)

    r = (chr((c - ord(key[i % best_len]) + nchars) % nchars + ordA)
         for i, c in enumerate(cleaned))
    return (key, "".join(r))


def main():
    encoded = " پهيتش غعهدد ذصلدي هزفضر کنهرظ ضذکاح يصتمد "

    english_frequences = [
        14, 4.2, 0.7, 5.2, 0.1, 1.2, 0.4,
        1, 1.4, 7.5, 0.1, 8.5, 2.1, 0.1,
        3.3, 2.6, 0.7, 0.3, 0.6, 0.2, 1.5,
        0.2, 1.6, 1.2, 3, 1.7, 2.7, 5.7, 7.1, 6, 5.7, 9.1]

    (key, decoded) = vigenere_decrypt(english_frequences, encoded)
    print "Key:", key
    print "\nText:", decoded

main()

来源:https://stackoverflow.com/questions/34829728/python-uppercase-and-ascii-in-persian-text

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!