/*Copyright (C) 2019-2022 Crawford Currie http://c-dot.co.uk*/
/**
* Letter node in a Dictionary. Each node has multiple links and helpers
* that trade off space for performance during word searches.
* Each LetterNode participates in two linked lists; the `next` pointer
* points to the next alternative to this letter when composing a word,
* while the `child` pointer points to the first in a chain of possible
* child nodes. For example, in a dictionary where we have the two words
* `NO`, `SO` and `SAD`:
* ```
* N -next-> S -next->
* | |
* child child
* | |
* `-> O(e) `-> A -next-> O(e)
* |
* child
* |
* D(e)
* ```
* where (e) indicates a node that is a valid end-of-word.
* The example is a tree; however the DAWG compressor will
* collapse letter sequences that are common to several words,
* to create a more optimal DAG.
*
* Once a Dictionary has been created and fully populated, it can be
* processed using `buildLists` to generate lists that support rapid
* navigation through the DAG without needing to traverse the node
* chains.
*/
class LetterNode {
/**
* Bit mask for end-of-word marker
* @member {number}
*/
static END_OF_WORD_BIT_MASK = 0x1;
/**
* Bit mask for end-of-list marker
* @member {number}
*/
static END_OF_LIST_BIT_MASK = 0x2;
/**
* Shift to create space for masks
* @member {number}
*/
static CHILD_INDEX_SHIFT = 2;
/**
* Mask for child index, exclusing above bit masks
* @member {number}
*/
static CHILD_INDEX_BIT_MASK = 0x3FFFFFFF;
/**
* Pointer to the next (alternative) node in this peer chain.
* During loading this will be a number that will be converted
* to a pointer.
* @member {(number|LetterNode)?}
*/
next;
/**
* Pointer to the head of the child chain
* @member {LetterNode?}
*/
child;
/**
* Is this the end of a valid word?
* @member {boolean}
*/
isEndOfWord = false;
/**
* List of nodes that link forward to this node. Set up
* by {@linkcode LetterNode#buildLists|buildLists}.
* @member {LetterNode[]?}
*/
preNodes;
/**
* List of letters that are in the nodes listed in `preNodes`.
* Set up by {@linkcode LetterNode#buildLists|buildLiss}.
* @member {string[]?}
*/
preLetters;
/**
* List of nodes that are linked to from this node. Set up
* by {@linkcode LetterNode#buildLists|buildLists}.
* @member {LetterNode[]?}
*/
postNodes;
/**
* List of letters that are in the nodes listed in `postNode`.
* Set up by {@linkcode LetterNode#buildLists|buildLists}.
* @member {LetterNode[]?}
*/
postLetters;
constructor(letter) {
/**
* The letter at this node
* @member {string}
*/
this.letter = letter;
}
/**
* @callback LetterNode~wordCallback
* @param {string} word found
* @param {LetterNode} node node where word was terminated
*/
/**
* Enumerate each word in the dictionary. Calls cb on each word.
* @param {string} s the word constructed so far
* @param {LetterNode~wordCallback} cb the callback
*/
eachWord(s, cb) {
let node = this;
while (node) {
if (node.isEndOfWord)
cb(s + node.letter, node);
if (node.child)
node.child.eachWord(s + node.letter, cb);
node = node.next;
}
}
/**
* Enumerate each LONG word in the dictionary. A long word is one
* that has no child nodes i.e. cannot be extended by adding more
* letters to create a new word. Calls cb on each word.
* Caution this is NOT the same as dawg/TrieNode.eachWord.
* @param {string} s the word constructed so far
* @param {wordCallback} cb the callback
*/
eachLongWord(s, cb) {
let node = this;
while (node) {
if (node.child)
node.child.eachLongWord(s + node.letter, cb);
else if (node.isEndOfWord)
cb(s + node.letter, node);
node = node.next;
}
}
/**
* @callback LetterNode~nodeCallback
* @param {LetterNode} node node
* @return {boolean} true to continue the iteration, false to stop it.
*/
/**
* Enumerate each node in the dictionary.
* Calls cb on each node, stops if cb returns false.
* @param {LetterNode~nodeCallback} cb the callback
*/
eachNode(cb) {
let node = this;
while (node) {
if (!cb(node))
return false;
if (node.child && !node.child.eachNode(cb))
return false;
node = node.next;
}
return true;
}
/**
* Add a letter sequence to this node. This is used to add
* whitelist nodes to a DAG.
* @param {string} word word being added
* @return {boolean} true if the word was added, false if it
* was already there
*/
add(word) {
//console.debug("Adding", word);
let node = this, added = false;
while (node) {
if (node.letter === word.charAt(0)) {
//console.debug("\tMatched", node.letter);
if (word.length === 1) {
if (!node.isEndOfWord) {
added = true;
node.isEndOfWord = true;
}
return added;
}
word = word.substring(1);
if (!node.child || node.child.letter > word.charAt(0)) {
//console.debug("\tAdding child", word.charAt(0));
const t = node.child;
node.child = new LetterNode(word.charAt(0));
added = true;
node.child.next = t;
}
//console.debug("\t->child", node.child.letter);
node = node.child;
} else if (!node.next || node.next.letter > word.charAt(0)) {
//console.debug("\tInserting", word.charAt(0), "before", node.next ? node.next.letter: "end");
const t = node.next;
node.next = new LetterNode(word.charAt(0));
added = true;
node.next.next = t;
} else {
node = node.next;
//console.debug("\t->next", node.letter);
}
}
/* istanbul ignore next */
return assert.fail(`Unreachable '${word}`);
}
/**
* Build forward and backward lists to allow us to navigate
* in both directions - forward through words, and backwards too.
* This has to be done from the root of the DAG, and has to be
* re-done if the DAG is modified..
*/
buildLists(nodeBefore) {
let node = this;
while (node) {
node.preNodes = [];
node.preLetters = [];
node.postNodes = [];
node.postLetters = [];
if (nodeBefore) {
node.preNodes.push(nodeBefore);
node.preLetters.push(nodeBefore.letter);
nodeBefore.postNodes.push(node);
nodeBefore.postLetters.push(node.letter);
}
if (node.child)
node.child.buildLists(node);
node = node.next;
}
}
/**
* Find the letter node at the end of the subtree that matches
* the last character in chars, even if it"s not isEndOfWord
* @param {string} chars a string of characters that may
* be the root of a word
* @param {number} index index into chars
* @return {LetterNode} node found, or undefined
*/
match(chars, index) {
let node = this;
while (node) {
if (node.letter === chars[index]) {
if (index === chars.length - 1)
return node;
if (node.child)
return node.child.match(chars, index + 1);
}
node = node.next;
}
return null;
}
/**
* Find all words below this node that match the character sequence
* passed from a given point in the sequence.
* " " acts as a wildcard.
* @param {string} chars the character sequence to match
* @param {number} index index into chars
* @param {string} word word asembled so far
* @param {string[]} list list of words found
*/
hangmen(chars, index, word, list) {
let node = this;
const ci = chars[index];
while (node) {
if (ci === " " || node.letter === ci) {
if (node.isEndOfWord && index === chars.length - 1)
list.push(word + node.letter);
if (index < chars.length - 1 && node.child)
node.child.hangmen(
chars, index + 1, word + node.letter, list);
}
node = node.next;
}
}
/**
* Find words that can be made from a sorted set of letters.
* @param {string[]} chars the available set of characters
* @param {string} realWord the string built so far in this recursion
* @param {string} blankedWord the string built using spaces for blanks
* if they are used
* @param {string[]} foundWords list of words found
*/
findWordsThatUse(chars, realWord, blankedWord, foundWords) {
let node = this;
while (node) {
// is this character available from chars?
// Only use blank if no other choice
let i = chars.indexOf(node.letter);
if (i < 0) // not there, try blank
i = chars.indexOf(" ");
if (i >= 0) {
const match = chars[i];
//console.debug("Found", match, "in chars");
// The char is available from chars.
// Is this then a word?
if (node.isEndOfWord) {
//console.debug("Found", realWord + node.letter);
// A word is found
foundWords[realWord + node.letter]
= blankedWord + match;
}
if (chars.length > 1) {
// Cut the matched letter out of chars and recurse
// over our child node chain
chars.splice(i, 1);
let child = node.child;
while (child) {
child.findWordsThatUse(
chars,
realWord + node.letter,
blankedWord + match,
foundWords);
child = child.next;
}
chars.splice(i, 0, match);
}
}
node = node.next;
}
}
/**
* Decode node information encoded in an integer in a
* serialised Dictionary.
* @param {number} i index of node in node list
* @param {number} numb encoded node
* @return {LetterNode} this
*/
decode(i, numb) {
if ((numb & LetterNode.END_OF_WORD_BIT_MASK) !== 0)
this.isEndOfWord = true;
if ((numb & LetterNode.END_OF_LIST_BIT_MASK) === 0)
this.next = i + 1;
if (((numb >> LetterNode.CHILD_INDEX_SHIFT)
& LetterNode.CHILD_INDEX_BIT_MASK) > 0)
this.child = ((numb >> LetterNode.CHILD_INDEX_SHIFT)
& LetterNode.CHILD_INDEX_BIT_MASK);
return this;
}
}
export { LetterNode };