// Adam Blank // 1/7/2015 // The frequency analysis class attempts to solve transposition ciphers // using a frequency analysis. import java.util.Arrays; import java.util.Scanner; import java.util.Collections; import java.util.List; import java.util.ArrayList; import java.io.File; import java.io.FileNotFoundException; public class FrequencyAnalysis { /* Characters ordered from most to least frequently used */ public static final List ENGLISH_FREQUENCY_ORDER = Arrays.asList( 'e','t','a','o','i','n','s','h','r','d','l','u','c', 'm','f','w','y','p','v','b','g','k','j','q','x','z' ); /* The amount two doubles can be apart and still be considered equal. */ private static final double DELTA = 0.00001; /* The threshold used to determine if the percentages are far enough apart to * assign a letter or not. */ private static final double EASY_THRESHOLD = 0.01; /* The number of runs of passes through the text on the dictionary the analysis will perform. */ private static final int NUMBER_DICTIONARY_RUNS = 10; /* The ciphertext of the cryptogram. */ private String cipherText; /* The decoded text so far. Represents undecoded letters with UPPERCASE * and decoded letters as LOWERCASE. */ private String decodedText; /* A dictionary of words that we use to complete the cryptogram. */ private List dictionary; /* The frequency ordering in the ciphertext. */ private List frequencyOrdering; /* A LetterInventory containing the letters in the cryptogram. */ private LetterInventory cryptogramInventory; /* All of the CIPHERTEXT letters that are currently unassigned. We maintain the invariant that these letters are in CIPHERTEXT frequency order. */ private List unassignedCipherTextLetters; /* All of the PLAINTEXT letters that are currently unassigned. We maintain the invariant that these letters are in ENGLISH frequency order. */ private List unassignedPlainTextLetters; /* post: constructs a frequency analysis on the ciphertext given */ public FrequencyAnalysis(String ciphertext) throws FileNotFoundException { readCiphertext(ciphertext); calculateFrequencyOrdering(); readDictionary(); } /* post: Attempts to use the most definitive frequencies to fill in letters in the cryptogram. This function will overwrite any previous decoding work that was done. */ public void decipherEasyLetters() { /* Begin with the cipherText we were given. Since we internally represent un-decoded letters with upper case, we also convert everything to uppercase as well. */ this.decodedText = this.cipherText.toUpperCase(); /* Since we are starting over, we must initialize the unassigned letters variables to contain all of the letters. */ this.unassignedCipherTextLetters = new ArrayList(ENGLISH_FREQUENCY_ORDER); this.unassignedPlainTextLetters = new ArrayList(this.frequencyOrdering); /* Run through all of the letters, and try to assign them based on their frequencies, as compared to English frequencies. */ for (int i = 0; i < ENGLISH_FREQUENCY_ORDER.size(); i++) { char englishLetter = ENGLISH_FREQUENCY_ORDER.get(i); char cipherLetter = frequencyOrdering.get(i); double frequency = cryptogramInventory.getLetterPercentage(cipherLetter); double previousFrequency = i == 0 ? 1 : cryptogramInventory.getLetterPercentage(frequencyOrdering.get(i-1)); /* If the current frequency is far enough from the previous one, then we assume the letter is in English frequency order. Go ahead and update the output. */ if (previousFrequency - frequency > EASY_THRESHOLD) { decodedText = decodedText.replace( Character.toUpperCase(cipherLetter), englishLetter); unassignedCipherTextLetters.remove(new Character(cipherLetter)); unassignedPlainTextLetters.remove(new Character(englishLetter)); } } } /* post: Attempts to use existing partial translations with a dictionary to translate the ciphertext */ public void decipherWithDictionary() { for (int x = 0; x < NUMBER_DICTIONARY_RUNS; x++) { /* Get all of the words with one letter missing. */ String[] words = decodedText.replaceAll("[,.?!\"-]", "").split(" "); List newlyAssigned = new ArrayList(); for (char toReplace : unassignedCipherTextLetters) { toReplace = Character.toUpperCase(toReplace); List possibleChoices = new ArrayList(unassignedPlainTextLetters); /* Run through the words with ONLY toReplace untranslated in them. * Find all the possible valid translations, and if we end up with a * single choice, then we assume that it is the correct translation * for that character. */ for (int word = 0; word < words.length; word++) { /* Only worry about this word if it has the character we're concerned with. */ if (words[word].matches("[a-z-]*(" + toReplace + "+[a-z-]*)+")) { List workingChoices = new ArrayList(); for (char choice : possibleChoices) { if (dictionary.contains(words[word].replace(toReplace, choice))) { workingChoices.add(choice); } } /* Ignore the case where none of the given possibilities work-- this case likely means a word is missing from the dictionary. */ if (workingChoices.size() > 0) { possibleChoices.retainAll(workingChoices); } } } /* If we have narrowed down the possibilities for the translation to a single choice, assign that translation, and decode it in the text. */ if (possibleChoices.size() == 1) { char choice = possibleChoices.get(0); newlyAssigned.add(new Character(Character.toLowerCase(toReplace))); unassignedPlainTextLetters.remove(new Character(choice)); decodedText = decodedText.replace(toReplace, choice); } } unassignedCipherTextLetters.removeAll(newlyAssigned); } } /* post: returns a string representation of result of the frequency analysis on the ciphertext; indicates that a letter is untranslated with a '*' */ public String toString() { return decodedText.replaceAll("[A-Z]", "*"); } /* post: frequencyOrdering is populated with a list of all lowercase letters ordered * from most to least frequent in the ciphertext; cryptogramInventory is populated with * a letter inventory corresponding to the ciphertext */ private void calculateFrequencyOrdering() { LetterInventory cryptogramInventory = new LetterInventory(this.cipherText); this.cryptogramInventory = cryptogramInventory; this.frequencyOrdering = this.getFrequencyOrder(); /* Reverse the frequencyOrdering, because getFrequencyOrder() returns a list * from least-to-most frequent, and we want the opposite. */ Collections.reverse(this.frequencyOrdering); } /* pre: throws a FileNotFoundException if file does not exist * post: cipherText is populated with the contents of file, and * decodedText is initialized to an all uppercase version of the ciphertext */ private void readCiphertext(String file) throws FileNotFoundException { Scanner input = null; input = new Scanner(new File(file)); StringBuilder originalText = new StringBuilder(); while (input.hasNext()) { originalText.append(input.nextLine()); } this.cipherText = originalText.toString().toLowerCase(); this.decodedText = this.cipherText.toUpperCase(); } /* post: populates dictionary with the English words listed in the provided dictionary text file */ private void readDictionary() { Scanner input = null; try { input = new Scanner(new File("dictionary.txt")); } catch (FileNotFoundException e) { System.out.println("You must copy dictionary.txt to this directory" + " before running the frequency analysis."); System.exit(1); } List dictionary = new ArrayList(); while (input.hasNext()) { dictionary.add(input.next()); } this.dictionary = dictionary; } /* post: returns a list of the characters in the ciphertext in frequency order from least-to-most frequent */ private List getFrequencyOrder() { List order = new ArrayList(); List sortedFrequencies = new ArrayList(); for (char c = 'a'; c < ('z' + 1); c++) { sortedFrequencies.add(this.cryptogramInventory.getLetterPercentage(c)); } /* Sort the list of frequencies */ Collections.sort(sortedFrequencies); /* goes through the list of sorted counts */ for (int i = 0; i < sortedFrequencies.size(); i++) { /* go through all of the possible letters in the inventory, and append * each letter to the order (but only if it's not already in the list) */ for (char c = 'a'; c < ('z' + 1); c++) { if (!order.contains(c) && Math.abs(this.cryptogramInventory.getLetterPercentage(c) - sortedFrequencies.get(i)) < DELTA) { order.add(c); } } } return order; } public static void main(String[] args) { FrequencyAnalysis freq = null; try { freq = new FrequencyAnalysis("cryptogram.txt"); } catch (FileNotFoundException e) { System.out.println("You must copy cryptogram.txt to this directory" + " before running the frequency analysis."); System.exit(1); } /* Do the frequency analyses. */ freq.decipherEasyLetters(); freq.decipherWithDictionary(); /* See the result! */ System.out.println(freq); } }