TrieNode.js

/*Copyright (C) 2019-2022 Crawford Currie http://c-dot.co.uk*/

import { LetterNode } from "./LetterNode.js";

let nodeIds = 0;

/**
 * A Trie/DAWG node.
 * Represents a letter in a set of words.  It has pointers to a
 * child list representing the next letters that can follow this
 * letter, and a next pointer to the next alternative to this
 * letter in the child list of it's parent node.  Note this is
 * only used while generating a DAWG from a lexicon. TrieNodes are
 * serialised using the above structure but are then rebuilt using
 * {@linkcode LetterNode}s at the sharp end.
 */
class TrieNode {
  /**
   * The letter at this node
   * @member {string}
   */
  letter = undefined;

  /**
   * Unique ID for this node, purely used for debugging
   * @member {number}
   */
  id = -1;

  /**
   * Pointer to the next alternative to this letter
   * @member {TrieNode}
   */
  next = null;

  /**
   * Pointer to the first next letter if this letter is matched
   * @member {TrieNode}
   */
  child = null;

  /**
   * Marker for a valid end-of-word node
   * @member {boolean}
   */
  isEndOfWord = false;

  /**
   * Marker for the first child under a parent node. This is
   * used when the node is arrived at other than through the
   * parent
   * @member {boolean}
   */
  isFirstChild = false;

  /**
   * Will be set if the node is to be pruned after DAWG generation
   * @member {boolean}
   */
  isPruned = false;

  /**
   * Maximum number of nodes under this node (iei remaining length
   * of the longest word this node partiipates in)
   * @member {number}
   */
  maxChildDepth = 0;

  /**
   * Number of child nodes under this node - used for optimisation
   * @member {number}
   */
  numberOfChildren = 0;

  /**
   * Index of the node in the encoded DAWG, assigned when the
   * encoding is generated
   * @member {number}
   */
  index = -1;

  /**
   * @param {string} letter codepoint
   * @param {TrieNode} next next node pointer
   * @param {boolean} isWordEnding true if this is an end-of-word node
   * @param {number} starterDepth The maximum depth below this
   * node before the end-of-word is reached, for the first word
   * added
   * @param {boolean} isFirstChild is the first child of the parent node
   */
  constructor(letter, next, isWordEnding, starterDepth, isFirstChild) {
    this.letter = letter;
    this.next = next;
    this.isEndOfWord = isWordEnding;
    this.maxChildDepth = starterDepth;
    this.isFirstChild = isFirstChild;
    this.id = nodeIds++;
  }

  /* istanbul ignore next */
  /**
   * Debug
   * @param {boolean} deeply true to expand child nodes
   */
  toString(deeply) {
    let simpler = `{${this.id} ${this.letter}`;

    if (this.isEndOfWord)
      simpler += ".";
    if (this.child) {
      simpler += "+";
      if (deeply)
        simpler += this.child.toString(deeply);
    }
    simpler += "}";
    if (this.next) {
      simpler += "-";
      if (deeply)
        simpler += this.next.toString(deeply);
    }

    return simpler;
  }

  /**
   * Mark a node as pruned, and recursively mark every node
   * under and after it as well
   * @return {number} the total number of nodes pruned as a result
   */
  prune() {
    //console.debug(`Pruning ${this}`);
    this.isPruned = true;

    let result = 0;
    if (this.next)
      result += this.next.prune();

    if (this.child)
      result += this.child.prune();

    return result + 1;
  }

  /**
   * @callback TrieNode~wordCallback
   * @param {TrieNode} nodes list of nodes on the path
   * from the root to the end of the word
   */

  /**
   * Depth-first tree walk. Will visit ends of words in
   * sorted order.
   * @param {TrieNode[]} nodes list of nodes visited to create the word
   * @param {TrieNode~wordCallback} cb callback function
   * @private
   */
  eachWord(nodes, cb) {

    nodes.push(this);

    if (this.isEndOfWord)
      cb(nodes);

    if (this.child)
      this.child.eachWord(nodes, cb);

    nodes.pop();

    if (this.next)
      this.next.eachWord(nodes, cb);
  }

  eachNode(cb) {
    cb(this);
    if (this.next)
      this.next.eachNode(cb);
    if (this.child)
      this.child.eachNode(cb);
  }

  /**
   * Search along this's child next chain for a node with the
   * given letter.
   * @param {string} thisLetter letter to look for
   * @return {TrieNode} the node found, or null
   */
  findChild(thisLetter) {
    let result = this.child;
    while (result) {
      if (result.letter === thisLetter)
        return result;
      if (result.letter > thisLetter)
        break;
      result = result.next;
    }
    return null;
  }

  /**
   * Insert a letter in the child list of this node. The child
   * list is sorted on letter
   * @param {string} thisLetter letter to add
   * @param {boolean} wordEnder true if this is the end of a word
   * @param {number} startDepth depth of shallowest node
   */
  insertChild(thisLetter, wordEnder, startDepth) {
    this.numberOfChildren++;

    if (!this.child) {
      // child list does not exist yet
      this.child = new TrieNode(
        thisLetter, null, wordEnder, startDepth, true);
      return;
    }

    if (this.child.letter > thisLetter) {
      // thisLetter should be the first in the child list
      this.child.isFirstChild = false;
      this.child = new TrieNode(
        thisLetter, this.child, wordEnder, startDepth, true);
      return;
    }

    // thisLetter is not the first in the list
    let child = this.child;
    while (child.next) {
      if (child.next.letter > thisLetter)
        break;
      child = child.next;
    }
    child.next = new TrieNode(
      thisLetter, child.next, wordEnder, startDepth, false);
  }

  /**
   * Determine if this and other are the parent nodes
   * of equal Trie branches.
   * @param {TrieNode} other other tree to compare
   * @return {boolean} if the are the same
   */
  sameSubtrie(other) {
    //console.debug("CMP",this.toString(), !other ? "null" : other.toString());
    if (other === this) // identity
      return true;

    if (other === null
        || other.letter !== this.letter
        || other.maxChildDepth !== this.maxChildDepth
        || other.numberOfChildren !== this.numberOfChildren
        || other.isEndOfWord !== this.isEndOfWord
        || !this.child && other.child
        || this.child && !other.child
        || !this.next && other.next
        || this.next && !other.next)
      return false;

    if (this.child && !this.child.sameSubtrie(other.child))
      return false;

    if (this.next && !this.next.sameSubtrie(other.next))
      return false;

    return true;
  }

  /**
   * Returns the first node in the red[maxChildDepth], that is
   * identical to 'this'. If the function returns 'this'
   * then it is the first of its kind in the
   * Trie.
   * @param {TrieNode[][]} red reduction structure
   * @return {TrieNode}
   * @private
   */
  findSameSubtrie(red) {
    //return red[this.maxChildDepth].find(n => this.sameSubtrie(n));
    let x;
    for (x = 0; x < red[this.maxChildDepth].length; x++)
      if (this.sameSubtrie(red[this.maxChildDepth][x]))
        break;
    /* istanbul ignore if */
    if (red[this.maxChildDepth][x].isPruned)
      throw Error("Same subtrie equivalent is pruned!");
    return red[this.maxChildDepth][x];
  }

  /**
   * Recursively replaces all redundant nodes in a trie with their
   * first equivalent.
   * @param {TrieNode[][]} red reduction structure
   * @return {number} no of nodes replaced
   */
  replaceRedundantNodes(red) {

    if (!this.next && !this.child)
      // Leaf node
      return 0;

    let trimmed = 0;
    if (this.child) {
      if (this.child.isPruned) {
        //console.debug(`Trimming ${this.child}`);
        // we have found a node that has been tagged for
        // as pruned, so let us replace it with its first
        // equivalent which isn't tagged.
        this.child = this.child.findSameSubtrie(red);
        /* istanbul ignore if */
        if (this.child === null)
          throw Error("Something horrible");
        trimmed++;
      } else
        trimmed += this.child.replaceRedundantNodes(red);
    }

    // Traverse the rest of the 'Trie', but a 'TrieNode' that is
    // not a direct child will never be directly replaced.
    // This will allow the resulting 'Dawg' to fit into a
    // contiguous array of node lists.
    if (this.next)
      trimmed += this.next.replaceRedundantNodes(red);

    return trimmed;
  }

  /**
   * Encode the node in a pair of integers. Requires node indices to have
   * been established.
   * @return {number[]} array with the 2-integer encoding
   */
  encode() {
    const array = [ this.letter.codePointAt(0) ];
    let numb = 0;
    if (this.child)
      numb |= (this.child.index << LetterNode.CHILD_INDEX_SHIFT);
    if (this.isEndOfWord)
      numb |= LetterNode.END_OF_WORD_BIT_MASK;
    if (!this.next)
      numb |= LetterNode.END_OF_LIST_BIT_MASK;
    array.push(numb);
    return array;
  }
}

export { TrieNode };