The previous solution I coded for the problem of finding words written in periodic element symbols didn’t sit well with me. It worked, but it was sub-optimal and was too rigid in how it tried to find matches. A more complete and elegant solution is below and will discover *all* possible variants of elements for a given word. At each position in the word, it uses both 1- and 2-letter elements and branches out. The initial recursion will generate many more incomplete/partial matches, but then the finalizing pass will recompose and flatten all paths through the graph and then cleanup after itself. This makes me happy.
When running this code, the results are:
Number of words found: 9092 // a 22% increase
Longest word: Internationalisation
I, N, Te, Rn, At, I, O, N, Al, I, S, At, I, O, N
In, Te, Rn, At, I, O, N, Al, I, S, At, I, O, N
I, N, Te, Rn, At, I, O, Na, Li, S, At, I, O, N
Longest word with unique variants: Bureaucratisation
B, U, Re, Au, Cr, At, I, S, At, I, O, N
B, U, Re, Au, C, Ra, Ti, S, At, I, O, N
Longest word with most variants: Consciousnesses
C, O, N, S, C, I, O, U, S, Ne, S, Se, S
C, O, N, S, C, I, O, U, S, Ne, S, S, Es
C, O, N, S, C, I, O, U, S, N, Es, Se, S
C, O, N, S, C, I, O, U, S, N, Es, S, Es
C, O, N, Sc, I, O, U, S, Ne, S, Se, S
C, O, N, Sc, I, O, U, S, Ne, S, S, Es
C, O, N, Sc, I, O, U, S, N, Es, Se, S
C, O, N, Sc, I, O, U, S, N, Es, S, Es
C, O, N, S, C, I, O, U, SN, Es, Se, S
C, O, N, S, C, I, O, U, SN, Es, S, Es
Co, N, S, C, I, O, U, S, Ne, S, Se, S
Co, N, S, C, I, O, U, S, Ne, S, S, Es
Co, N, S, C, I, O, U, S, N, Es, Se, S
Co, N, S, C, I, O, U, S, N, Es, S, Es
Longest word with most unique variants: Carbines
C, Ar, B, I, Ne, S
C, Ar, B, I, N, Es
Ca, Rb, I, Ne, S
Ca, Rb, I, N, Es
C, Ar, Bi, Ne, S
C, Ar, Bi, N, Es
C, Ar, B, In, Es
Elements that were never used in any word:
Bk, Cd, Cf, Cm, Fm, Gd, Hg, Md, Mg, Sg, Sr, Zn, Zr
import java.io.*; import java.util.*; public class PeriodicWordFinder { private TreeSet<String> words; private TreeSet<String> elements; private ArrayList<PeriodicWord> periodicWords; public PeriodicWordFinder() { try { words = read("wordlist.txt"); elements = new TreeSet<>((s1, s2) -> { int lenthComparison = Integer.compare(s1.length(), s2.length()); if (lenthComparison == 0) return s1.compareTo(s2); else return lenthComparison; }); elements.addAll(read("elements.txt")); periodicWords = new ArrayList<>(); System.out.println("words.size() = " + words.size()); System.out.println("elements.size() = " + elements.size()); } catch (Exception e) { e.printStackTrace(); } } private TreeSet<String> read(String fileName) throws Exception { TreeSet<String> retval = new TreeSet<>(); InputStream stream = getClass().getClassLoader().getResourceAsStream(fileName); BufferedReader in = new BufferedReader(new InputStreamReader(stream)); String line; while ((line = in.readLine()) != null) { line = line.toUpperCase().trim(); //no elements contain Q or J if (line.contains("Q") || line.contains("J")) { continue; } retval.add(line); } in.close(); return retval; } private void findWords() { for (String word : words) { PeriodicWord pWord = new PeriodicWord(word); if (periodicWord(1, pWord)) { periodicWords.add(pWord); } } //the new variant searcher algo will find partial/incomplete matches //this needs to be filtered out by the finalizeVariants method finalizeVariants(); for (PeriodicWord pWord : periodicWords) { System.out.println("found: " + pWord.getInitialWord()); } } private boolean periodicWord(long variantId, PeriodicWord word) { if (word.getWord(variantId).length() == 0) return true; /*COSEU -- 11 C -- 111 O -- 1111 S -- 11111 Eu -- 111 O -- 1112 Se -- 11121 U -- 112 Os -- 1121 Eu -- 12 Co -- 121 S -- 1211 Eu -- 122 Se -- 1221 U */ long ndx = 1; boolean matchFound = false; for (String element : elements) { if (word.getWord(variantId).startsWith(element)) { matchFound = true; long newVariantId = variantId * 10 + ndx++; String alreadyPresent = word.setElement(newVariantId, element); //if (alreadyPresent != null) { // System.err.println("Should not already be element " + alreadyPresent + // " at newVariantId " + variantId + " for word" + word.getInitialWord()); //} word.setWord(newVariantId, word.getWord(variantId).substring(element.length())); if (!periodicWord(newVariantId, word)) { //we found a partial match //it will be dealt with in finalize } } } return matchFound; } private void finalizeVariants() { for (Iterator<PeriodicWord> it = periodicWords.iterator(); it.hasNext(); ) { PeriodicWord pWord = it.next(); TreeMap<Long, String> elementVariants = pWord.getElementVariants(); boolean allFound = false; while (!allFound && !elementVariants.isEmpty()) { //start from the end of the collection and work your way backwards, //collapsing the paths in the graph String lastElement = elementVariants.lastEntry().getValue().toUpperCase(); if (pWord.getInitialWord().toUpperCase().endsWith(lastElement)) { //build a word List<String> elementVariant = buildVariant(elementVariants); //add last sanity check, check that you don't return a partial word String elVarStr = String.join("", elementVariant).toUpperCase(); if (pWord.getInitialWord().equals(elVarStr)) { pWord.addElementWordVariant(elementVariant); } //remove tail of collection and continue elementVariants.remove(elementVariants.lastKey()); } else { //all words found, stop loop allFound = true; } } //if only partial words were mapped - no full words - remove as invalid if (pWord.elementWordVariants.isEmpty()) { it.remove(); } } } private List<String> buildVariant(TreeMap<Long, String> elementVariants) { /*COSEU -- 11 C -- 111 O -- 1111 S -- 11111 Eu -- 111 O -- 1112 Se -- 11121 U -- 112 Os -- 1121 Eu -- 12 Co -- 121 S -- 1211 Eu -- 122 Se -- 1221 U */ List<String> retval = new ArrayList<>(); boolean done = false; long key = elementVariants.lastKey(); while (!done) { String val = elementVariants.get(key); if (val != null) { retval.add(0, val); } else { done = true; } key = key / 10; } return retval; } public List<PeriodicWord> getPeriodicWords() { return periodicWords; } public Set<String> getElements() { return elements; } public static void main(String[] args) { PeriodicWordFinder ew = new PeriodicWordFinder(); ew.findWords(); //now we have fun List<PeriodicWord> pWords = ew.getPeriodicWords(); System.out.println("Number of words found: " + pWords.size()); //longest word pWords.sort((o1, o2) -> Integer.compare(o1.getInitialWord().length(), o2.getInitialWord().length())); System.out.println("Longest word: " + pWords.get(pWords.size() - 1)); //longest word with only unique elements pWords.sort((o1, o2) -> { String s1 = String.join("", o1.getVariantWithMostElements(true)); String s2 = String.join("", o2.getVariantWithMostElements(true)); int lenthComparison = Integer.compare(s1.length(), s2.length()); if (lenthComparison == 0) return s1.compareTo(s2); else return lenthComparison; }); System.out.println("Longest word with unique variants: " + pWords.get(pWords.size() - 1)); //longest word with with most variants pWords.sort((o1, o2) -> { int numberComparison = Integer.compare(o1.getNumberOfVariants(false), o2.getNumberOfVariants(false)); if (numberComparison == 0) { int lenthComparison = Integer.compare(o1.getInitialWord().length(), o2.getInitialWord().length()); if (lenthComparison == 0) return o1.getInitialWord().compareTo(o2.getInitialWord()); else return lenthComparison; } else { return numberComparison; } }); System.out.println("Longest word with most variants: " + pWords.get(pWords.size() - 1)); //longest word with most unique variants pWords.sort((o1, o2) -> { int numberComparison = Integer.compare(o1.getNumberOfVariants(true), o2.getNumberOfVariants(true)); if (numberComparison == 0) { int lenthComparison = Integer.compare(o1.getInitialWord().length(), o2.getInitialWord().length()); if (lenthComparison == 0) return o1.getInitialWord().compareTo(o2.getInitialWord()); else return lenthComparison; } else { return numberComparison; } }); System.out.println("Longest word with most unique variants: " + pWords.get(pWords.size() - 1)); //is there any element that was not used? TreeSet<String> elements = new TreeSet<>(ew.getElements()); for(PeriodicWord pWord : pWords){ for (List<String> variantElements : pWord.elementWordVariants ){ elements.removeAll(variantElements); if (elements.isEmpty()){ break; } } } System.out.println("Elements not used: " + elements); } private class PeriodicWord { private TreeMap<Long, String> wordVariants; //will get mangled during recursion private String initialWord; //keep track of initial word private TreeMap<Long, String> elementVariants; //will get updated during recursion private List<List<String>> elementWordVariants; public int getNumberOfVariants(boolean onlyUnique) { List<List<String>> sorted = new ArrayList<>(); sorted.addAll(elementWordVariants); if (onlyUnique) { sorted.removeIf(this::containsDuplicateElements); } return sorted.size(); } public List<String> getVariantWithMostElements(boolean onlyUnique) { if (elementWordVariants.size() == 1) { return elementWordVariants.get(0); } else { List<List<String>> sorted = new ArrayList<>(); sorted.addAll(elementWordVariants); if (onlyUnique) { sorted.removeIf(this::containsDuplicateElements); } sorted.sort((o1, o2) -> Integer.compare(o1.size(), o2.size())); if (sorted.size() > 0) { return sorted.get(sorted.size() - 1); } else { return new ArrayList<>(); } } } private boolean containsDuplicateElements(List<String> elementVariant) { return new HashSet<>(elementVariant).size() != elementVariant.size(); } public PeriodicWord(String word) { initialWord = word; elementVariants = new TreeMap<>(); wordVariants = new TreeMap<>(); wordVariants.put(1l, word); elementWordVariants = new ArrayList<>(); } public void setWord(long variantId, String word) { wordVariants.put(variantId, word); } public TreeMap<Long, String> getElementVariants() { return elementVariants; } public String getWord(long variantId) { return wordVariants.get(variantId); } public String setElement(long variantId, String element) { return elementVariants.put(variantId, element); } public String getInitialWord() { return initialWord; } public void addElementWordVariant(List<String> elementWordVariant) { elementWordVariants.add(elementWordVariant); } @Override public String toString() { return "PeriodicWord{" + "initialWord='" + initialWord + '\'' + ", elementWordVariants=" + elementWordVariants + '}'; } } }