66}
77LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
88PARAMETER = 0.0665 # index of confidence of the entire language (for english 0.0665)
9+ MAX_KEYLENGTH = 10 # None is the default
910
1011
1112def index_of_coincidence (frequencies : dict , length : int ) -> float :
@@ -82,17 +83,77 @@ def friedman_method(ciphertext: str, max_keylength: int=None) -> int:
8283 return li [1 ]
8384
8485
86+ def get_frequencies () -> tuple :
87+ """Return the values of the global variable @letter_frequencies_dict as a tuple ex. (0.25, 1.42, ...)."""
88+ t = tuple (LETTER_FREQUENCIES_DICT [chr (i )] for i in range (ord ('A' ), ord ('A' ) + 26 ))
89+ return tuple (num / 100 for num in t )
90+
91+
92+ def find_key (ciphertext : str , key_length : int ) -> str :
93+ """
94+ Finds the key of a text which has been encrypted with the Vigenere algorithm, using statistical analysis.
95+ The function needs an estimation of the length of the key. Firstly it finds the frequencies of the letters in the
96+ text. Then it compares these frequencies with those of an average text in the english language. For each letter it
97+ multiplies its frequency with the average one and adds them all together, then it shifts the frequencies of the text
98+ cyclically by one position and repeats the process. The shift that produces the largest sum corresponds to a letter
99+ of the key. The whole procedure takes place for every letter of the key (essentially as many times as the length
100+ of the key).
101+ :param ciphertext: a string (text)
102+ :param key_length: a supposed length of the key
103+ :return: the key as a string
104+ """
105+ a = ord ('A' )
106+ cipher_length = len (ciphertext )
107+ alphabet_length = 26 # the length of the english alphabet
108+
109+ key = []
110+
111+ # for every letter of the key
112+ for k in range (key_length ):
113+ # find the frequencies of the letters in the message:
114+ # the frequency of 'A' is in the first position of the freq list and so on
115+ freq = [0 ]* alphabet_length
116+ c = 0
117+ for i in range (k , cipher_length , key_length ):
118+ freq [ord (ciphertext [i ]) - a ] += 1
119+ c += 1
120+ freq = [num / c for num in freq ]
121+
122+ # find the max sum -> part of the key
123+ real_freq = get_frequencies ()
124+ max1 = [- 1 , None ] # value, position
125+ for i in range (alphabet_length ):
126+ new_val = sum ((freq [j ] * real_freq [j ]) for j in range (alphabet_length ))
127+ if max1 [0 ] < new_val :
128+ max1 = [new_val , i ]
129+ freq .append (freq .pop (0 )) # shift the list cyclically one position to the left
130+ key .append (max1 [1 ])
131+
132+ return "" .join (chr (num + a ) for num in key ) # return the key as a string
133+
134+
85135def find_key_from_vigenere_cipher (ciphertext : str ) -> str :
86136 clean_ciphertext = list ()
87- for symbol in ciphertext :
137+ for symbol in ciphertext . upper () :
88138 if symbol in LETTERS :
89- clean_ciphertext .append (symbol . upper () )
139+ clean_ciphertext .append (symbol )
90140
91141 clean_ciphertext = "" .join (clean_ciphertext )
142+ print (clean_ciphertext )
143+
144+ key_length = friedman_method (clean_ciphertext , max_keylength = MAX_KEYLENGTH )
145+ print (f"The length of the key is { key_length } " )
146+ if key_length <= 0 :
147+ print ("Something went wrong while calculating the length of the key." )
148+ return ""
92149
93- key = "" # todo replace with function
150+ key = find_key ( clean_ciphertext , key_length )
94151 return key
95152
96153
97154if __name__ == '__main__' :
98- print (index_of_coincidence (LETTER_FREQUENCIES_DICT , 1000 ))
155+ # print(index_of_coincidence(LETTER_FREQUENCIES_DICT, 1000))
156+ with open ("out.txt" ) as file :
157+ c = file .read ()
158+ k = find_key_from_vigenere_cipher (c )
159+ print (k )
0 commit comments