/*
* Vigenere.java
* by Eric Farmer
*/
/**
* This class contains methods for encrypting and decrypting using the Vigenere
* cipher. All computation is done with arrays of character codes in the range
* 0 to 25 ('a' to 'z'); convenience methods convert between this and string
* format.
*
* @author Eric Farmer
* @version 2002-04-30
*/
public class Vigenere {
/* Contains only static methods. */
private Vigenere() {}
/**
* Converts the alphabetic characters in the given string to an array of
* character codes in the range 0 to 25. All non-alphabetic characters are
* ignored.
*
* @param s the string to convert
*
* @return the array of character codes
*/
public static char[] stringToLetters(String s) {
StringBuffer buffer = new StringBuffer(s.toLowerCase());
int length = 0;
for (int i = 0; i < buffer.length(); i++) {
char ch = buffer.charAt(i);
if (Character.isLetter(ch)) {
buffer.setCharAt(length++, ch);
}
}
buffer.setLength(length);
char[] c = buffer.toString().toCharArray();
for (int i = 0; i < c.length; i++) {
c[i] -= 'a';
}
return c;
}
/**
* Converts the given array of character codes, in the range 0 to 25, to a
* string of corresponding lowercase alphabetic characters.
*
* @param c the array of character codes
*
* @return the string of alphabetic characters
*/
public static String lettersToString(char[] c) {
for (int i = 0; i < c.length; i++) {
c[i] += 'a';
}
String s = String.copyValueOf(c);
for (int i = 0; i < c.length; i++) {
c[i] -= 'a';
}
return s;
}
/**
* Encrypts the given plaintext with the given key, both consisting of
* character codes in the range 0 to 25.
*
* @param plainText the text to be encrypted
* @param key the encryption key
*
* @return the encrypted text
*/
public static char[] encrypt(char[] plainText, char[] key) {
char[] cipherText = new char[plainText.length];
for (int i = 0; i < plainText.length; i++) {
cipherText[i] = (char)((plainText[i] + key[i%key.length])%26);
}
return cipherText;
}
/**
* Decrypts the given ciphertext with the given key, both consisting of
* character codes in the range 0 to 25.
*
* @param cipherText the text to be decrypted
* @param key the decryption key
*
* @return the decrypted text
*/
public static char[] decrypt(char[] cipherText, char[] key) {
char[] plainText = new char[cipherText.length];
for (int i = 0; i < cipherText.length; i++) {
plainText[i] = (char)((cipherText[i] + 26 - key[i%key.length])%26);
}
return plainText;
}
/**
* Estimates the keyword length for the given ciphertext, based on the
* index of coincidence test. The keyword length which maximizes the
* minimum index of coincidence over all parts of the ciphertext partition
* is returned.
*
* @param cipherText the ciphertext
*
* @return the estimated keyword length
*/
public static int guessKeyLength(char[] cipherText) {
int bestKeyLength = 2;
double maxMinIndex = Double.NEGATIVE_INFINITY;
int maxKeyLength = (int)Math.sqrt(cipherText.length);
/* Try "all" possible keyword lengths. */
for (int keyLength = 2; keyLength <= maxKeyLength; keyLength++) {
double minIndex = Double.POSITIVE_INFINITY;
/* Compute each index of coincidence, and find the minimum. */
for (int offset = 0; offset < keyLength; offset++) {
double index = indexOfCoincidence(cipherText, offset,
keyLength);
if (index < minIndex) {
minIndex = index;
}
}
/* Select the keyword length that maximizes the minimum. */
if (minIndex > maxMinIndex) {
maxMinIndex = minIndex;
bestKeyLength = keyLength;
}
}
return bestKeyLength;
}
/**
* Estimates the keyword for the given ciphertext, given the keyword
* length, based on a combination of monoalphabetic frequency analysis and
* mutual indices of coincidence.
*
* @param cipherText the ciphertext
* @param keyLength the keyword length
*
* @return the estimated keyword
*/
public static char[] guessKey(char[] cipherText, int keyLength) {
char[] key = new char[keyLength];
boolean[] found = new boolean[keyLength];
int lastFound = 0;
/*
* Build the keyword one letter at a time. Based on simple frequency
* analysis in each (monoalphabetic) block, estimate each keyletter by
* matching the most frequent ciphertext letter with plaintext 'e'.
* Select the best of these matches by "confidence," measured by the
* discrepancy between the most frequent and second most frequent
* letter.
*/
int maxDiffFreq = -1;
for (int offset = 0; offset < keyLength; offset++) {
int[] bins = frequencies(cipherText, offset, keyLength);
int bestE = 0;
/* Estimate keyletter based on plaintext 'e'. */
for (int c = 1; c < 26; c++) {
if (bins[c] > bins[bestE]) {
bestE = c;
}
}
int maxFrequency = bins[bestE];
/* Select the keyletter we are most confident in. */
sort(bins);
int diff = maxFrequency - bins[24];
if (diff > maxDiffFreq) {
maxDiffFreq = diff;
lastFound = offset;
key[offset] = (char)((bestE + 22)%26);
}
}
found[lastFound] = true;
/*
* Continue the incremental building of the keyword using mutual
* indices of coincidence. Pair the most recently found keyletter with
* each other remaining keyletter, "solving" using the mutual index of
* coincidence test. Select the best of these solutions again by
* "confidence," or discrepancy between the two highest indices.
*/
for (int i = 1; i < keyLength; i++) {
int bestOffset = 0;
double maxDiffIndex = Double.NEGATIVE_INFINITY;
for (int offset = 0; offset < keyLength; offset++) {
/* If this keyletter isn't already known, estimate it. */
if (!found[offset]) {
double[] indices = mutualIndicesOfCoincidence(cipherText,
offset, lastFound, keyLength);
int bestShift = 0;
/* Estimate keyletter based on mutual index. */
for (int shift = 1; shift < 26; shift++) {
if (indices[shift] > indices[bestShift]) {
bestShift = shift;
}
}
double maxIndex = indices[bestShift];
/* Select the keyletter we are most confident in. */
sort(indices);
double diff = maxIndex - indices[24];
if (diff > maxDiffIndex) {
maxDiffIndex = diff;
bestOffset = offset;
key[offset] = (char)((key[lastFound] + bestShift)%26);
}
}
}
lastFound = bestOffset;
found[lastFound] = true;
}
/*
* At this point, we have a very good guess of the keyword. The weak
* point seems to be not the relationships between keyletters (derived
* by the mutual indices of coincidence), but the choice from the 26
* cyclic shifts of the keyword based on frequency analysis. We refine
* our guess by selecting the shift which maximizes the average
* frequency of guessed plaintext 'e' over all parts of the ciphertext
* partition.
*/
int[][] bins = new int[keyLength][];
for (int offset = 0; offset < keyLength; offset++) {
bins[offset] = frequencies(cipherText, offset, keyLength);
}
int bestShift = 0,
maxFreq = -1;
for (int shift = 0; shift < 26; shift++) {
int totalFreq = 0;
for (int offset = 0; offset < keyLength; offset++) {
totalFreq += bins[offset][(4 + key[offset] + shift)%26];
}
if (totalFreq > maxFreq) {
maxFreq = totalFreq;
bestShift = shift;
}
}
for (int offset = 0; offset < keyLength; offset++) {
key[offset] = (char)((key[offset] + bestShift)%26);
}
return key;
}
/**
* Returns the frequencies of the letters in the given subtext of the given
* ciphertext. Element c (in the range 0 to 25) in the array that is
* returned indicates the number of occurrences of letter c in the subtext.
* The subtext is specified by the keyword length and an offset; e.g., the
* entire ciphertext is specified by offset 0 and key length 1.
*
* @param cipherText the entire ciphertext
* @param offset the offset from the start of the ciphertext
* @param keyLength the keyword length
*
* @return the frequencies of the 26 ciphertext letters
*/
public static int[] frequencies(char[] cipherText, int offset,
int keyLength) {
int[] bins = new int[26];
for (int i = offset; i < cipherText.length; i+= keyLength) {
bins[cipherText[i]]++;
}
return bins;
}
/**
* Returns the index of coincidence of the given subtext of the given
* ciphertext.
*
* @see #frequencies
*
* @param cipherText the entire ciphertext
* @param offset the offset from the start of the ciphertext
* @param keyLength the keyword length
*
* @return the index of coincidence
*/
public static double indexOfCoincidence(char[] cipherText, int offset,
int keyLength) {
double index = 0;
int[] bins = frequencies(cipherText, offset, keyLength);
int length = 0;
for (int c = 0; c < 26; c++) {
index += bins[c]*(bins[c] - 1);
length += bins[c];
}
return index/(length*(length - 1));
}
/**
* Returns the mutual indices of coincidence for the given subtexts of the
* given ciphertext, one for each of 26 possible shifts. Element c in the
* array that is returned indicates the mutual index of coincidence for the
* first subtext and the second subtext shifted by c.
*
* @see #frequencies
*
* @param cipherText the entire ciphertext
* @param offset1 the first offset from the start of the ciphertext
* @param offset2 the second offset from the start of the ciphertext
* @param keyLength the keyword length
*
* @return the mutual indices of coincidence
*/
public static double[] mutualIndicesOfCoincidence(char[] cipherText,
int offset1, int offset2,
int keyLength) {
double[] indices = new double[26];
/* Compute frequencies in each (monoalphabetic) block only once. */
int[] bins1 = frequencies(cipherText, offset1, keyLength);
int[] bins2 = frequencies(cipherText, offset2, keyLength);
int length1 = 0, length2 = 0;
for (int c = 0; c < 26; c++) {
length1 += bins1[c];
length2 += bins2[c];
}
/* Compute index for each possible shift. */
for (int shift = 0; shift < 26; shift++) {
for (int c = 0; c < 26; c++) {
indices[shift] += bins1[(c + shift)%26]*bins2[c];
}
indices[shift] /= length1*length2;
}
return indices;
}
/* Sorting is not built in to Java 1.1 */
private static void sort(int[] a) {
for (int i = 0; i < a.length - 1; i++) {
for (int j = i + 1; j < a.length; j++) {
if (a[i] > a[j]) {
int temp = a[i];
a[i] = a[j];
a[j] = temp;
}
}
}
}
private static void sort(double[] a) {
for (int i = 0; i < a.length - 1; i++) {
for (int j = i + 1; j < a.length; j++) {
if (a[i] > a[j]) {
double temp = a[i];
a[i] = a[j];
a[j] = temp;
}
}
}
}
}