import {
  containsHTML,
  isSymbolString,
  regexNewLine,
  removeSpaces,
} from "../string";
import { noAbbreviation } from "./abbreviation";

import {
  forTwoUppercaseCharacter,
  identifierCodeSpacer,
  maybeContainsDateRange,
  predictSteps,
} from "./pronounciation";

import { romanTransform } from "./roman";

export function markTheWords(str, noAbbreviationFunc = noAbbreviation) {
  /*
      Marking the word with <spkn>, <sps>, <spw> tag
      Example
      Input: <h5>hello word</h5>
      Output: <h5><spkn><sps><spw>hello</spw> <spw>word</spw></sps></spkn></h5>
    */

  if (str) {
    if (!containsHTML(str)) {
      str = regexNewLine(str);
    }

    if (containsHTML(str)) {
      str = iterateOverHTMLtag(str, (txt) =>
        dotAsSentences(txt, noAbbreviationFunc)
      );
    } else {
      let spkn = document.createElement("div");
      spkn.appendChild(dotAsSentences(str, noAbbreviationFunc));
      str = spkn.innerHTML;
    }
  }

  return str;
}

function dotAsSentences(str, noAbbreviationFunc) {
  // This function is to give the marker for the sentences and the each word.
  let wrapper = document.createElement("spkn");
  var arr = str.split(/\.\s|\.\n|。/);
  let lastPos = 0;

  var taggedAllSentence = str.match(/\.\s|\.\n|。/g);

  function separation(i) {
    if (taggedAllSentence) {
      if (taggedAllSentence[i]) {
        return taggedAllSentence[i];
      }
    }
    return ".";
  }

  // Iterate over sentence
  for (let i = 0, len = arr.length; i < len; i++) {
    let sentence = arr[i];
    let wordExist = false;

    let newSps = document.createElement("sps");
    // Give each word with <spw> tag
    var taggedAllWords = sentence.match(/\S+/g);

    if (taggedAllWords) {
      taggedAllWords.forEach(async (match, indexTagWord) => {
        wordExist = true;

        let textWillSpoken = match;

        // Make text will spoken
        let idx = str.indexOf(match, lastPos);
        lastPos = idx;
        let idxAfter = idx + match.length;

        // Add character before and after word so it can be used for more parameter
        // for the noAbbreviationFunc and romanTransform

        // Add character after word
        if (str[idxAfter]) {
          if (/^[\W\d\s]+$/.test(str[idxAfter])) {
            textWillSpoken += str[idxAfter];
          }
        }

        // Add character before word
        if (str[idx - 1]) {
          if (/^[\W\d\s]+$/.test(str[idx - 1])) {
            // console.log(textWillSpoken);
            textWillSpoken = str[idx - 1] + textWillSpoken;
            // console.log(textWillSpoken);
          }
        }

        // Rules [1]: GWS -> Get Well Soon
        textWillSpoken = noAbbreviationFunc(textWillSpoken);

        // Rules [2]: II. -> 2
        textWillSpoken = romanTransform(textWillSpoken);

        textWillSpoken = textWillSpoken.trim();

        // Rules [3]: It's document code or an Idetifier.
        // B/1871/M.SM.01.00/2023. -> B / 1871 / M. S_M. 01 .00 / 2023.
        textWillSpoken = identifierCodeSpacer(textWillSpoken);

        // Rules [4]: if the word is two uppercase character
        // RI. -> R_I
        // NI  -> N_I
        textWillSpoken = forTwoUppercaseCharacter(textWillSpoken);

        let newSpokenW = document.createElement("spw");

        // Rules [5]: Maybe contains date range
        // Register: 11 December 2023 – 9 January 2024"
        if (textWillSpoken.length == 1) {
          if (isSymbolString(textWillSpoken)) {
            if (maybeContainsDateRange(str)) {
              newSpokenW.setAttribute("sps-context", str);
            }
          }
        }

        // steps -1 mean the program doesn't know how many steps to speak that word.
        // this useful on calibrate function
        let steps = predictSteps(textWillSpoken);

        newSpokenW.setAttribute("steps", steps);
        newSpokenW.setAttribute("sp", textWillSpoken);
        newSpokenW.innerHTML = match;

        // Add spaces
        if (str[idx - 1]) {
          if (
            str[idx - 1] == " " &&
            str[idx - 2] != " " &&
            str[idx - 2] != removeSpaces(separation(i))
          ) {
            if (indexTagWord == 0) {
              newSps.appendChild(newTextEl(" "));
            }
          }
        }

        // append word to sentence node
        newSps.appendChild(newSpokenW);

        // append space to word
        if (str[idxAfter]) {
          if (
            str[idxAfter] != removeSpaces(separation(i)) ||
            str[idxAfter] == " "
          ) {
            if (indexTagWord < taggedAllWords.length) {
              newSps.appendChild(newTextEl(" "));
            }
          }
        }
      });
    }

    if (wordExist) {
      wrapper.appendChild(newSps);

      if (i < len - 1) {
        wrapper.appendChild(newTextEl(separation(i)));
      }
    }
  }
  return wrapper;
}

function iterateOverHTMLtag(html, executeOverIterate) {
  // Create a temporary element
  const temp = document.createElement("div");
  // Set the HTML content of the element to the input HTML string
  temp.innerHTML = html;

  // Get all the text nodes in the element
  const textNodes = getTextNodes(temp);

  // Iterate over the text nodes and wrap each word
  textNodes.forEach((node) => {
    let cond1 = true;

    if (node.nodeValue.length == 1) {
      if (!/[a-zA-Z0-9]/.test(node.nodeValue)) {
        cond1 = false;
      }
    }

    if (cond1) {
      if (typeof executeOverIterate == "function") {
        node.parentNode.replaceChild(executeOverIterate(node.nodeValue), node);
      }
    }
  });

  // Return the inner HTML of the element as a string
  return temp.innerHTML.replace(/&lt;/g, "<").replace(/&gt;/g, ">");
}

function getTextNodes(element) {
  // Initialize an array to store the text nodes
  const textNodes = [];

  // Iterate over the child nodes of the element
  for (let i = 0; i < element.childNodes.length; i++) {
    const childNode = element.childNodes[i];

    // If the child node is a text node, add it to the array
    if (childNode.nodeType == Node.TEXT_NODE) {
      textNodes.push(childNode);
    }
    // If the child node is an element, recursively get its text nodes
    else if (childNode.nodeType == Node.ELEMENT_NODE) {
      textNodes.push(...getTextNodes(childNode));
    }
  }

  // Return the array of text nodes
  return textNodes;
}

function newTextEl(str) {
  return document.createTextNode(str);
}
