import * as d3 from "d3";
import { BibleBooks, chars, extraChars, places } from "./static";
const bcv_parser =
  require("bible-passage-reference-parser/js/en_bcv_parser").bcv_parser;

const bcv = new bcv_parser();

export const bcvBibleBooks = bcv.translation_info('kjv').books

function findEnclosingToken(stack, token) {
  let idx = stack.length - 1;
  while (idx >= 0) {
    if (token.idx <= stack[idx].endIdx &&
        token.endIdx <= stack[idx].endIdx) {
      // inside
      return idx;
    }
    idx--;
  }
  throw new Error("could not find parent token");
}

const removeWithinPoems = (sortedTokens) => {
  const poems = sortedTokens.filter(t => t.type === "poem");
  return poems.length ? sortedTokens.filter(t => {
    if (["poem", "base"].includes(t.type)) {
      return true;
    }
    if (poems.filter(p => p.idx <= t.idx && t.endIdx <= p.endIdx).length) {
      return false;
    }
    return true;
  }) : sortedTokens;
}

/*
 * turn overlapping tokens into divided segments
 */
function tokeniseString(sortedTokensPre) {

  // remove any within poems
  const sortedTokens = removeWithinPoems(sortedTokensPre)

  let currentIdx = sortedTokens[0].idx;
  let segments = [];
  let tokenStack = [sortedTokens[0]];
  for (const t of sortedTokens.slice(1)) {
    const enclosingTokenIdx = findEnclosingToken(tokenStack, t);
    const enclosingToken = tokenStack[enclosingTokenIdx];

    if (enclosingTokenIdx < tokenStack.length - 1) {
      const toRemove = tokenStack.slice(enclosingTokenIdx + 1);
      for (const t2 of toRemove.reverse()) {
        if (currentIdx < t2.endIdx) {
          segments.push({
            idx: currentIdx,
            endIdx: t2.endIdx,
            type: t2.type,
            obj: t2.obj
          });
          currentIdx = t2.endIdx;
        }
      }
    }

    segments.push({
      idx: currentIdx,
      endIdx: t.idx,
      type: enclosingToken.type,
      obj: t.obj
    });
    currentIdx = t.idx;

    tokenStack = tokenStack.slice(0, enclosingTokenIdx + 1);
    tokenStack.push(t);
  }

  // ensure end captured
  // TODO: put these two .push in function?
  const toRemove = tokenStack.slice();
  for (const t2 of toRemove.reverse()) {
    if (currentIdx < t2.endIdx) {
      segments.push({
        idx: currentIdx,
        endIdx: t2.endIdx,
        type: t2.type,
        obj: t2.obj
      });
      currentIdx = t2.endIdx;
    }
  }

  return segments;
}

const prepRegex = (t) => t.replace(/\./ig, "\\.").replace(/-/ig, "\\-").replace(/\s/ig, "\\s")

export function simpleSearch(text, item, insensitive = true) {
  return (
    new RegExp(
      prepRegex(item),
      insensitive ? "ig" : "g"
    ).exec(text)?.index ?? -1
  );
}

export function simpleSearchAll(chunk, search, insensitive = true) {
  return [...chunk.matchAll(new RegExp(
    prepRegex(search),
    insensitive ? "ig" : "g"
  ))]
}

export function getBibleEntities(text) {
  const parse = bcv.parse(text.replace(/\n/ig, ' ').replace(/\s\s/ig, ' '));
  if (parse.parsed_entities().length !== 1) {
    throw new Error("Bible ref. not found");
  }
  return parse.parsed_entities()[0].entities;
}

function makeBibleData(bibleIndices) {
  let items = [];
  for (const b in bibleIndices) {
    let found = null;
    let bAdjusted = { Jas: "James", "2Kgs": "2Kings" }[b] || b;
    for (let i = 0; i < BibleBooks.length; i++) {
      const book = BibleBooks[i];
      if (book.replace(" ", "").startsWith(bAdjusted)) {
        found = i;
        break;
      }
    }
    if (found === null) {
      throw new Error(`didn't find ${b}`);
    }
    items.push({
      bibleIndex: found,
      ppMarks: bibleIndices[b],
    });
  }

  // fill any missing
  for (let i = 0; i < BibleBooks.length; i++) {
    if (items.filter((obj) => obj.bibleIndex === i).length === 0) {
      items.push({
        bibleIndex: i,
        ppMarks: [],
      });
    }
  }

  items.sort((a, b) => a.bibleIndex - b.bibleIndex);

  return items;
}

function searchableChars() {
  let items = []
  for (const char in chars) {
    items.push({key: char, search: char, exact: true})
    items.push({key: char, search: chars[char], exact: false});
  }
  for (const char of extraChars) {
    items.push({key: char, search: char, exact: false})
  }
  return items;
}

function doChars(marks, i, charCounts, charIndices, tokensRaw, chunk) {

  const searchItems = searchableChars();

  const out = []

  for (let j = 0; j < searchItems.length; j++) {
    // these need to be sorted by idx...
    const { key, search, exact } = searchItems[j];

    const matches = simpleSearchAll(chunk, search, false)

    //for (let k = 0; k < matches.length; k++) {
    //  const item = matches[k];
    for (const item of matches) {
      if (exact) {

        out.push({
          idx: item.index,
          endIdx: item.index + search.length,
          type: "character-simple"
        })

      } else {

        out.push({
          idx: item.index,
          endIdx: item.index + search.length,
          type: "character",
        });

      }

      // exclude these if they a
      if (!exact && ["MAN.", "MEN.", "JUDGE.", "HELP."].includes(key)) {
        continue
      }

      marks[i].chars.push(key);
  
      if (charCounts[key] === undefined) {
        charCounts[key] = 0;
      }

      if (charIndices[key] === undefined) {
        charIndices[key] = [];
      }

      charIndices[key].push(marks[i].idx);
      charCounts[key] += 1;

    }
  }

  for (const item of out) {
    tokensRaw.push(item);
  }
}

const doPlaces = (
  places,
  tokensRaw,
  placeCounts,
  placeIndices,
  marks,
  chunk,
  i
) => {
  for (const pl of places) {
    const placeMatch = simpleSearch(chunk, pl.name, !pl.caseSensitive);

    if (placeMatch >= 0) {
      tokensRaw.push({
        idx: placeMatch,
        endIdx: placeMatch + pl.name.length,
        type: "place",
      });
    }

    if (placeMatch >= 0) {
      marks[i].places.push(pl.name);

      if (placeCounts[pl.name] === undefined) {
        placeCounts[pl.name] = 0;
      }

      if (placeIndices[pl.name] === undefined) {
        placeIndices[pl.name] = [];
      }

      placeIndices[pl.name].push(marks[i].idx);
      placeCounts[pl.name] += 1;
    }
  }
};

const doBibleRefs = (bibleRefs, tokensRaw, bibleIndices, marks, i) => {
  for (const ref of bibleRefs) {
    const [s, e] = [ref.index, ref.index + ref[0].length];
    const text = marks[i].chunk.substring(s + 1, e - 1);
    const entities = getBibleEntities(text);
    for (const e of entities) {
      const book = e.start.b;
      if (bibleIndices[book] === undefined) {
        bibleIndices[book] = [];
      }
      bibleIndices[book].push(marks[i].idx);
    }
    tokensRaw.push({
      idx: s,
      endIdx: e,
      type: "bible",
      obj: entities,
    });
  }
};

const doGeneric = (items, tokensRaw, type) => {
  for (const x of items) {
    tokensRaw.push({
      idx: x.index,
      endIdx: x.index + x[0].length,
      type,
    });
  }
}

const doGroups = (marks, charIndices, placeIndices, bibleIndices) => {
  const charIndicesByGroup = {};
  const placeIndicesByGroup = {};

  const bins = d3.scaleLinear().domain([0, marks[marks.length - 1].idx]);
  const group = d3.bin().domain(bins.domain()).thresholds(bins.ticks(25));

  for (const k in charIndices) {
    charIndicesByGroup[k] = group(charIndices[k]);
  }

  for (const k in placeIndices) {
    placeIndicesByGroup[k] = group(placeIndices[k]);
  }

  const bibleDataByGroup = makeBibleData(bibleIndices);

  for (let i = 0; i < bibleDataByGroup.length; i++) {
    bibleDataByGroup[i].grouped = group(bibleDataByGroup[i].ppMarks);
  }

  return { charIndicesByGroup, placeIndicesByGroup, bibleDataByGroup };
};

const doMarks = (txt) => {
  const markMatches = txt.matchAll(/\{(\d+)\}/gi);
  let marks = [];
  let seen = [];

  for (const m of markMatches) {
    let idx = +m[1];

    if (seen.indexOf(idx) !== -1) {
      idx = idx + "b";
    }

    seen.push(idx);

    marks.push({
      idx: idx,
      index: m.index,
      match_len: m[0].length,
      chars: [],
      places: [],
    });
  }

  return marks;
}

export function mainAnalysis(txt) {
  const marks = doMarks(txt);

  let charCounts = {};
  let placeCounts = {};

  let charIndices = {};
  let placeIndices = {};
  let bibleIndices = {};

  for (let i = 0; i < marks.length; i++) {
    if (i === marks.length - 1) {
      marks[i].end_index = txt.length;
    } else {
      marks[i].end_index = marks[i + 1].index;
    }

    marks[i].chunk = txt
      .slice(marks[i].index + marks[i].match_len, marks[i].end_index)
      .replace(/^\s+/, "")
      .replace(/\s+$/, "")
      .replace(/\r/gi, "");

    const chunk = marks[i].chunk;
    const breaks = marks[i].chunk.matchAll(/\n\n/gi);
    const bibleRefs = marks[i].chunk.matchAll(/\[([^\]])+\]/gi);
    const quotes = marks[i].chunk.matchAll(/\"([^"])+"/gi);
    const poems = [...marks[i].chunk.matchAll(/<([^>])+>/gi)];
    let tokensRaw = [];

    doBibleRefs(bibleRefs, tokensRaw, bibleIndices, marks, i);
    doPlaces(places, tokensRaw, placeCounts, placeIndices, marks, chunk, i);
    doChars(marks, i, charCounts, charIndices, tokensRaw, chunk);

    doGeneric(quotes, tokensRaw, "quote");
    doGeneric(poems, tokensRaw, "poem");
    doGeneric(breaks, tokensRaw, "break");

    const tokensRawSorted = tokensRaw.sort((a, b) => a.idx - b.idx);

    marks[i].tokens = tokeniseString(
      [{ idx: 0, endIdx: chunk.length, type: "base" }].concat(tokensRawSorted)
    );
    marks[i].tokens = marks[i].tokens.filter((t) => t.idx !== t.endIdx)
  }

  const { charIndicesByGroup, placeIndicesByGroup, bibleDataByGroup } =
    doGroups(marks, charIndices, placeIndices, bibleIndices);

  return {
    marks,
    charCounts,
    placeCounts,
    charIndices,
    placeIndices,
    charIndicesByGroup,
    placeIndicesByGroup,
    bibleDataByGroup,
  };
}
