import StringList from "src/dataTypes/strings/StringList";
import NumberList from "src/dataTypes/numeric/NumberList";
import NumberTable from "src/dataTypes/numeric/NumberTable";
import StringOperators from "src/operators/strings/StringOperators";
import ListGenerators from "src/operators/lists/ListGenerators";
import TableOperators from "src/operators/lists/TableOperators";
import Network from "src/dataTypes/structures/networks/Network";
import Relation from "src/dataTypes/structures/elements/Relation";
import Node from "src/dataTypes/structures/elements/Node";
import NumberListOperators from "src/operators/numeric/numberList/NumberListOperators";
import Table from "src/dataTypes/lists/Table";
import TableGenerators from "src/operators/lists/TableGenerators";

/**
 * @classdesc  StringList Operators
 *
 * @namespace
 * @category strings
 */
function StringListOperators() {}
export default StringListOperators;

/**
 * receives n arguments and performs addition
 */
StringListOperators.concatStrings = function(stringList, joinString) { //deprecated
  if(joinString == null) joinString = "";
  return StringList.fromArray(stringList.join(joinString));
};

/**
 * join strings with a character (or Enter)
 * @param  {StringList} StringList strings to be joined
 *
 * @param  {String} join character
 * @param  {String} prefix
 * @param  {String} sufix
 * @param {Boolean} byEnter
 * @return {String}
 * tags:transform,combine
 */
StringListOperators.join = function(stringList, character, prefix, sufix, byEnter) {
  if(stringList == null) return;

  if(byEnter) character="\n";

  character = character == null ? "" : character;
  prefix = prefix == null ? "" : prefix;
  sufix = sufix == null ? "" : sufix;
  return prefix + stringList.join(character) + sufix;
};


/**
 * transforms texts by performing multiple optional cleaning operations (applied in the same order as parameters suggest)
 * @param {StringList} strings to be transformed
 *
 * @param {Boolean} removeEnters
 * @param {Boolean} removeTabs
 * @param {String} replaceTabsAndEntersBy
 * @param {Boolean} removePunctuation
 * @param {Boolean} toLowerCase
 * @param {StringList} stopWords removes strings from text
 * @param {Boolean} removeDoubleSpaces
 * @param {Boolean} removeAccentsAndDiacritics removes accents and diacritics from characters
 * @return {StringList}
 * tags:filter,transform,clean
 * examples:#/santiago/tests/cleanTexts
 */
StringListOperators.cleanTexts = function(strings, removeEnters, removeTabs, replaceTabsAndEntersBy, removePunctuation, toLowerCase, stopWords, removeDoubleSpaces, removeAccentsAndDiacritics){
  if(strings==null) return null;

  var newStrings = new StringList();
  newStrings.name = strings.name;
  
  for(var i=0; i<strings.length; i++){
    newStrings[i] = StringOperators.cleanText(strings[i], removeEnters, removeTabs, replaceTabsAndEntersBy, removePunctuation, toLowerCase, stopWords, removeDoubleSpaces, removeAccentsAndDiacritics);
  }

  return newStrings;
};


/**
 * filters a StringList by a string
 * @param  {StringList} stringList    to be filtered
 * @param  {String} string        filter criteria (string or word that will be search in each string of the stringList)
 *
 * @param  {Boolean} asWord        if true a word (string surrounded by separators such as space or punctuation) will be used as criteria, false by default
 * @param  {Boolean} returnIndexes if true a numberList with indexes will be returned, instead of a stringList
 * @return {List}               stringList or numberlist with filtered strings or indexes
 * tags:filter
 */
StringListOperators.filterStringListByString = function(stringList, string, asWord, returnIndexes) {
  if(stringList==null || string==null) return null;

  var i;
  var newList = returnIndexes ? new NumberList() : new StringList();
  var regex;
  var l = stringList.length;

  if(asWord) {
    regex = new RegExp("\\b" + string + "\\b");
  }

  for(i = 0; i<l; i++) {
    if(asWord) {
      if(stringList[i].match(regex).length > 0) {
        newList.push(returnIndexes ? i : stringList[i]);
      }
    } else {
      if(stringList[i].indexOf(string) != -1) {
        newList.push(returnIndexes ? i : stringList[i]);
      }
    }
  }
  
  return newList;
};

StringListOperators._toRegEx = function(string){
  return new RegExp(StringOperators.escapeForRegExp(string), "g");
};

/**
 * replaces in each string, a sub-string by a string
 * @param  {StringList} texts  where to replace strings
 * @param  {StringList} strings to be replaced (could be Regular Expressions)
 * @param  {String} replacement string to be placed instead
 * @return {StringList}
 * tags:
 * examples:jeff/examples/stringReplacements
 */
StringListOperators.replaceStringsInTexts = function(texts, strings, replacement) {
  if(texts==null || strings==null || replacement==null) return null;

  var newTexts = new StringList();
  newTexts.name = texts.name;
  var nTexts = texts.length;
  var nStrings = strings.length;
  var i, j;

  for(i = 0; i<nTexts; i++) {
    newTexts[i] = texts[i];
    if(newTexts[i] == null) continue;
    for(j=0; j<nStrings; j++){
      newTexts[i] = StringOperators.replaceStringInText(newTexts[i],strings[j],replacement);
    }
  }

  return newTexts;
};

/**
 * replaces in each string, a sub-string by a string, you can use regex expressions such as /\n/ for enter or /[a|e]/ for multiple characters (see:https://regex101.com/)
 * @param  {StringList} stringList  StringList to work on.
 * @param  {String} string to be replaced (could be Regular Expression)
 * @param  {String} replacement string to be placed instead
 * @return {StringList}
 * tags:
 * examples:jeff/examples/stringReplacements
 */
StringListOperators.replaceStringInTexts = function(texts, string, replacement) {
  if(texts==null || string==null || replacement==null) return null;

  var newTexts = new StringList();
  newTexts.name = texts.name;
  var nTexts = texts.length;
  var i;

  for(i = 0; i<nTexts; i++) {
    newTexts[i] = texts[i]==null?null:StringOperators.replaceStringInText(texts[i],string,replacement);
  }

  return newTexts;
};

/**
 * replaces in each string, a sub-string by a string
 * @param  {StringList} texts  where to replace strings
 * @param  {StringList} strings to be replaced (could be Regular Expressions)
 * @param  {StringList} replacements strings to be placed instead (should have same length as strings)
 * @return {StringList}
 * tags:
 * examples:jeff/examples/stringReplacements
 */
StringListOperators.replaceStringsInTextsByStrings = function(texts, strings, replacements) {
  if(texts==null || strings==null || replacements==null) return null;

  var newTexts = new StringList();
  newTexts.name = texts.name;
  var nTexts = texts.length;
  var nStrings = strings.length;
  var i, j;
  var string;

  for(i = 0; i<nTexts; i++) {
    newTexts[i] = texts[i];
    if(newTexts[i] == null)
      continue;
    for(j=0; j<nStrings; j++){
      newTexts[i] = StringOperators.replaceStringInText(newTexts[i],strings[j],replacements[j]);
    }
  }

  return newTexts;
};




// var regex = new RegExp("\\b"+word+"\\b");
// var match = string.match(regex);
// return match==null?0:match.length;

/*
 * a classic function, but now it works with patterns!
 */
/**
 * @todo finish docs
 */

/**
* finds strings from a list in strings in another list (typically the strings in the first list are short, and in the second longer texts)
* @param {StringList} list of strings or list of Regular Expressions
* @param {StringList} list of texts were to search
*
* @param {Boolean} asWords if false (default) searches substrings, if true searches words
* @return {NumberTable} matrix of results, each column being the vector of occurrences for each string
* tags:count
*/
StringListOperators.countStringsOccurrencesOnTexts = function(strings, texts, asWords) {
  var occurrencesTable = new NumberTable();

  var i;
  var j;
  var string;
  var numberList;
  //var splitArray;
  var nStrings = strings.length;
  var nTexts = texts.length;
  var wordRegex;

  for(i = 0; i<nStrings; i++) {
    string = strings[i];
    wordRegex = new RegExp("\\b" + string + "\\b");
    numberList = new NumberList();
    numberList.name = string;
    for(j = 0; j<nTexts; j++) {
      if(asWords){
        numberList[j] = StringOperators.countRegexOccurrences(texts[j], wordRegex);
      } else {
        numberList[j] = StringOperators.countOccurrences(texts[j], string);
      }
    }
    occurrencesTable[i] = numberList;
  }
  return occurrencesTable;
};

/**
 * builds a table with a list of occurrent words and numberLists for occurrences in each string
 * @param  {StringList} texts
 *
 * @param {Number} weightsMode weights mode<|>0: words count (default)<|>1: words count normalized to sum (a single word weights add up 1)<|>2:tf-idf simple (term frequency - inverse document frequency), divides number of occurrences in string by total number of texts the word occurs (see: https://en.wikipedia.org/wiki/Tf%E2%80%93idf)<br>3:tf-idf classic (idf = log(N/nt))<|>4:tf-df (term frequency * document frequency) words that are both common in a text and in the corpus get high score
 * @param {StringList} stopWords words to be excluded from the list (if value is 1, stopwords will be default english stopwrods at StringOperators.STOP_WORDS)
 * @param {Boolean} includeLinks
 * @param {Number} wordsLimitPerString number of words extracted per string
 * @param {Number} totalWordsLimit final number of words
 * @param {Boolean} sortByTotalWeight sort all columns by total weights of words (default: true)
 * @param {Number} minSizeWords
 * @param {Boolean} addTotalList adds a numberList with sums of weights for each word (this is ths list used optionally to sort the lists of the table) (default: false)
 * @param {Number} minSupportFraction a number in range [0,1] which if specified only words appearing in at least that fraction of input texts will be included
 * @param {String} wordDelimiter default is ' ' in which case words are defined in the usual way. Anything else and the input is simply split by that string and the results trimmed
 * @return {Table}
 * tags:count,statistics
 */
StringListOperators.getWordsInTextsOccurrencesTable = function(texts, weightsMode, stopWords, includeLinks, wordsLimitPerString, totalWordsLimit, sortByTotalWeight, minSizeWords, addTotalList, minSupportFraction, wordDelimiter) {
  if(texts == null) return;

  var i, j;
  var matrix;
  var nTexts = texts.length;

  if(stopWords==1) stopWords = StringOperators.STOP_WORDS;

  wordsLimitPerString = wordsLimitPerString || 500;
  totalWordsLimit = totalWordsLimit || 1000;
  var normalize = weightsMode==1;
  var tfidf = weightsMode==2 || weightsMode==3 || weightsMode==4;
  sortByTotalWeight = (sortByTotalWeight || true);
  minSizeWords = minSizeWords == null ? 3 : minSizeWords;
  minSupportFraction = minSupportFraction == null ? 0 : minSupportFraction;

  // new algorithm for combining results
  var table;
  var oWordCounts = {};
  var tabCounts;
  for(i = 0; i<nTexts; i++){
    table = StringOperators.getWordsOccurrencesTable(texts[i], stopWords, includeLinks, wordsLimitPerString, minSizeWords, wordDelimiter);
    for(j = 0; j<table[0].length; j++){
      tabCounts = oWordCounts[table[0][j]];
      if(tabCounts == undefined){
        tabCounts = new NumberTable();
        tabCounts.push(new NumberList()); // text indexes
        tabCounts.push(new NumberList()); // counts of this word in this text item
        oWordCounts[table[0][j]] = tabCounts;
      }
      tabCounts[0].push(i);
      tabCounts[1].push(table[1][j]);
    }
  }
  var sLWords = new StringList();
  for(var key in oWordCounts){
    if(!oWordCounts.hasOwnProperty(key)) continue;
    if(oWordCounts[key][0].length < minSupportFraction*nTexts){
      // not enough support, do not include
      delete oWordCounts[key];
      continue;
    }
    sLWords.push(key);
  }
  matrix = TableGenerators.createTableWithSameElement(nTexts+1,sLWords.length,0);
  // set list names
  for(i = 0; i < matrix.length; i++){
    if(i==0)
      matrix[i] = ListGenerators.createListWithSameElement(sLWords.length,'','words');
    else
      matrix[i].name = 'text ' + (i-1);
  }
  // fill with data
  var iWord = 0;
  for(var key in oWordCounts){
    if(!oWordCounts.hasOwnProperty(key)) continue;
    matrix[0][iWord] = key;
    tabCounts = oWordCounts[key];
    for(i = 0; i < tabCounts[0].length; i++){
      matrix[tabCounts[0][i]+1][iWord] += tabCounts[1][i];
    }
    iWord++;
  }


  if(matrix[0].length > totalWordsLimit) sortByTotalWeight = true;

  matrix[0].name = 'words';

  if(tfidf || sortByTotalWeight || addTotalList) {
    var totalList = new NumberList();
    var occurrencesInText;
    totalList = matrix[1].clone();
    var idf = ListGenerators.createListWithSameElement(matrix[0].length, 0, 'idf');

    for(i=1; i<matrix.length; i++){
        occurrencesInText = matrix[i];
        occurrencesInText.name = 'text '+(i-1);
        for(j=0; j<occurrencesInText.length; j++){
          if(occurrencesInText[j]>0){
            if(i>1) totalList[j] += occurrencesInText[j];
            idf[j]++;
          }
        }
    }
    
    if(tfidf) {

      totalList = ListGenerators.createListWithSameElement(matrix[0].length, 0, 'total tf-idf');

      if(weightsMode==2){
        for(i=1; i<matrix.length; i++){
          occurrencesInText = matrix[i];
          for(j=0; j<occurrencesInText.length; j++){
            occurrencesInText[j] /= idf[j];
            totalList[j] += occurrencesInText[j];
          }
        }
      } else if(weightsMode==3){
        for(i=1; i<matrix.length; i++){
          occurrencesInText = matrix[i];
          for(j=0; j<occurrencesInText.length; j++){
            occurrencesInText[j] *= Math.log(nTexts/idf[j]);
            totalList[j] += occurrencesInText[j];
          }
        }
      } else if(weightsMode==4){
        for(i=1; i<matrix.length; i++){
          occurrencesInText = matrix[i];
          for(j=0; j<occurrencesInText.length; j++){
            occurrencesInText[j] *= idf[j];
            totalList[j] += occurrencesInText[j];
          }
        }
      }
    }

    if(addTotalList){
      matrix.push(totalList);
      totalList.name = 'totals';
    }

    if(sortByTotalWeight) {
      matrix = matrix.getListsSortedByList(totalList, false);
    }

  } else {
    for(i=1; i<matrix.length; i++){
      matrix[i].name = 'text '+(i-1);
    }
  }

  if(normalize) {
    matrix.forEach(function(occurrences, i) {
      if(i === 0) return;
      matrix[i] = NumberListOperators.normalizeToSum(matrix[i]);
    });
  }


  if(totalWordsLimit > 0 && totalWordsLimit < matrix[0].length) matrix = matrix.sliceRows(0, totalWordsLimit - 1);

  return matrix;
};


/**
 * deprecated, replaced by NetworkGenerators.createNetworkFromTexts
 */
// StringListOperators.createTextsNetwork = function(texts, stopWords, stresuniqueness, relationThreshold) {
//   var i, j;
//   var network = new Network();

//   var matrix = StringListOperators.getWordsOccurrencesTable(texts, stopWords, false, 600, 800, false, true, false, 3);

//   texts.forEach(function(text, i) {
//     var node = new Node("_" + i, "_" + i);
//     node.content = text;
//     node.wordsWeights = matrix[i + 1];
//     network.addNode(node);
//   });

//   for(i = 0; network.nodeList[i + 1] != null; i++) {
//     var node = network.nodeList[i];
//     for(j = i + 1; network.nodeList[j] != null; j++) {
//       var node1 = network.nodeList[j];

//       var weight = NumberListOperators.cosineSimilarity(node.wordsWeights, node1.wordsWeights);

//       if(i === 0 && j == 1) {
//         console.log(node.wordsWeights.length, node1.wordsWeights.length, weight);
//         console.log(node.wordsWeights.type, node.wordsWeights);
//         console.log(node1.wordsWeights.type, node1.wordsWeights);
//         console.log(node.wordsWeights.getNorm() * node1.wordsWeights.getNorm());
//       }

//       if(weight > relationThreshold) {
//         var relation = new Relation(node.id + "_" + node1.id, node.id + "_" + node1.id, node, node1, weight);
//         network.addRelation(relation);
//       }
//     }
//   }

//   return network;
// };


/**
 * builds a network out of a list of short strings, adds a property wordsTable to each node (with words and weights)
 * @param  {StringList} texts
 *
 * @param  {Boolean|StringList} stopWords remove stop words (true for using default list stop words, or stringList of words)
 * @param  {Number} relationThreshold threshold to create a relation
 * @param {Number} mode <|>0:pseudoentropy, by finding key words with low entropy (words occurring in a single text or in all texts have maximum entropy, occuring in 0.25 texts minimum entropy (max weight))<|>1:originality<|>2:skewed entropy<|>3:originality except isolation
 * @param {Boolean} applyIntensity takes into account occurrences of word into each text
 * @param {Table} [varname] if a words frquency table is provided, les frequent words are weighed
 * @param {StringList} names list of names (of same length as list of texts) to be assigned to nodes
 * @param {List} colors list of colors or categories (of same length as list of texts) to assign colors to nodes
 * @return {Network}
 * tags:generator
 */
StringListOperators.createShortTextsNetwork = function(texts, stopWords, relationThreshold, mode, applyIntensity, wordsFrequencyTable, names, colors) {
  if(texts == null ||  texts.length == null || texts.length === 0) return;

  var _time = new Date().getTime();

  var network = new Network();
  var joined = texts.join(' *** ').toLowerCase();
  var textsLowerCase = joined.split(' *** ');
  var n_texts = texts.length;
  var i, j;
  var word;
  var nWords;
  var n_words;
  var weights;
  var weight;
  var maxWeight = 0;

  if(colors!=null && !colors["isColorList"]) colors = colors.toColorList();

  relationThreshold = relationThreshold || 0.2;
  mode = mode || 0;

  if(wordsFrequencyTable) {
    wordsFrequencyTable[0] = wordsFrequencyTable[0].toLowerCase();
    var maxFreq = wordsFrequencyTable[1][0];
    var index;
  }

  var weightFunction;
  switch(mode) {
    case 0: //pseudo-entropy
      weightFunction = function(nOtherTexts) {
        return 1 - Math.pow(2 * nOtherTexts / (n_texts - 1) - 1, 2);
      };
      break;
    case 1: //originality
      weightFunction = function(nOtherTexts) {
        return 1 / (nOtherTexts + 1);
      };
      break;
    case 2: //skewed entropy (favoring very few external occurrences)
      weightFunction = function(nOtherTexts) {
        return 1 - Math.pow(2 * Math.pow(nOtherTexts / (n_texts - 1), 0.2) - 1, 2);
      };
      break;
    default: //originality except isolation
      weightFunction = function(nOtherTexts) {
        if(nOtherTexts === 0) return 0;
        return 1 / nOtherTexts;
      };
      break;
  }

  //console.log('A ===> StringListOperators.createShortTextsNetwork took:', new Date().getTime() - _time);
  _time = new Date().getTime();

  var node, name;

  texts.forEach(function(text, i) {
    name = names==null || names[i]==null?"_" + i:names[i];
    node = new Node("_" + i, name);
    network.addNode(node);
    node.content = text;
    if(colors!=null) node.color = colors[i];
    var words = StringOperators.getWords(text, true, stopWords, false, false, 0, 3);

    n_words = words.length;
    weights = new NumberList();
    //words.forEach(function(word, j){
    for(j = 0; words[j] != null; j++) {
      word = words[j];
      var nOtherTexts = 0;
      textsLowerCase.forEach(function(text, k) {
        if(i == k) return;
        nOtherTexts += Number(text.indexOf(word) != -1); //is this the fastest way?
      });

      if(nOtherTexts === 0) {
        words.splice(j, 1);
        j--;
        continue;
      }

      weights[j] = weightFunction(nOtherTexts); //1-Math.pow(2*Math.pow(nOtherTexts/(n_texts-1), 0.25)-1, 2);

      if(applyIntensity) weights[j] *= (1 - 1 / (StringOperators.countOccurrences(textsLowerCase[i], word) + 1));

      if(wordsFrequencyTable) {
        index = wordsFrequencyTable[0].indexOf(word);
        //console.log(' •>•>•>•>•>•>•>•>•>•>•>•>•>•>•>•>•> ', word, weights[j], index==-1?1:(1 - Math.pow(wordsFrequencyTable[1][index]/maxFreq, 0.2)) )
        weights[j] *= (index == -1 ? 1 : (1 - Math.pow(wordsFrequencyTable[1][index] / maxFreq, 0.2)));
      }

      maxWeight = Math.max(maxWeight, weights[j]);
    }

    nWords = Math.floor(Math.log(n_words + 1) * 3);

    words = words.getSortedByList(weights, false).slice(0, nWords);

    words.position = {};
    words.forEach(function(word, j) {
      words.position[word] = j;
    });

    weights = weights.getSorted(false).slice(0, nWords);
    node.wordsTable = new Table();
    node.wordsTable[0] = words;
    node.wordsTable[1] = weights;
  });


  //console.log('B ===> StringListOperators.createShortTextsNetwork took:', new Date().getTime() - _time);
  _time = new Date().getTime();

  for(i = 0; network.nodeList[i + 1] != null; i++) {
    var node = network.nodeList[i];
    for(j = i + 1; network.nodeList[j] != null; j++) {
      var node1 = network.nodeList[j];
      weight = 0;
      node.wordsTable[0].forEach(function(word, i) {
        //index = node1.wordsTable[0].indexOf(word);//TODO:this could be improved (as seen in forums, indexOf might be unneficient for arrays
        index = node1.wordsTable[0].position[word];
        if(index != null) weight += node.wordsTable[1][i] * node1.wordsTable[1][index];
      });
      weight = Math.sqrt((weight / maxWeight) / Math.max(node.wordsTable[0].length, node1.wordsTable[0].length));
      if(weight > relationThreshold) {
        var relation = new Relation(node.id + "_" + node1.id, node.id + "_" + node1.id, node, node1, weight);
        network.addRelation(relation);
      }
    }
  }

  //console.log('C ===> StringListOperators.createShortTextsNetwork took:', new Date().getTime() - _time);

  return network;
};

/**
 * splits each string by a separator so the output is a Table of the resulting components
 * @param  {StringList} texts  list of strings to split
 *
 * @param  {String} separator to use (default: ,)
 * @param  {Number} maxComponents to keep in own column (default: 4)
 * @param  {Boolean} bMergeExtraComponents if true (default), keep any excess components by including in the last one
 * @param  {String} sLabel to use for first level Null (default: '')
 * @return {Table}
 * tags:
 */
StringListOperators.splitStrings = function(texts, separator, maxComponents, bMergeExtraComponents, sLabel) {
  if(texts==null) return null;
  if(separator == null) separator = ',';
  if(maxComponents == null) maxComponents = 4;
  bMergeExtraComponents = bMergeExtraComponents == null ? true: bMergeExtraComponents;
  sLabel = sLabel == null ? '': sLabel;

  var tab = new Table();
  var i,j,k;
  for(i=0;i<maxComponents;i++){
    tab.push(new StringList());
    tab[i].name = 'Level ' + (i+1);
  }
  var nLUsed = new NumberList();
  for(i=0; i<texts.length; i++){
    var sComps = StringOperators.splitString(texts[i],separator);
    if(sComps == null){
      for(j=0; j<maxComponents; j++)
        tab[j][i] = j==0 ? sLabel : null;
      continue;
    }
    sComps = sComps.trim();
    for(j=0; j<maxComponents; j++){
      if(j < sComps.length){
        tab[j][i] = sComps[j];
        if(j >= nLUsed.length)
          nLUsed.push(j);
      }
      else
        tab[j][i] = null;
      if(bMergeExtraComponents && j == maxComponents-1 && sComps.length > maxComponents){
        // cannot easily support reg exp separators because we just append them back again in a simple fashion
        for(k = j+1; k < sComps.length; k++){
          tab[j][i] += separator + sComps[k];
        }
      }
    }
  }
  if(nLUsed.length == 0) tab = new Table(); // empty
  else if(nLUsed.length < tab.length){
    tab = tab.getColumns(nLUsed);
  }
  return tab;
};

/**
 * converts each string to a set of numeric features using different methods
 * @param  {StringList} texts  list of strings to convert
 *
 * @param  {Number} method to use<|>0: one hot encoding (default)<|>1: ordinal encoding<|>2: binary encoding<|>3: one hot encoding, keep all
 * @param  {StringList} sLComplete is the set of all possible values in the input StringList. If not specified then the unique values from the input are used
 * @param  {Boolean} bmakeLowerCase if true make the strings lowercase (default: false)
 * @param  {Boolean} bUseListNamePrefix if true use the name of the input list as a prefix to category column names(default: false)
 * @param  {Boolean} bThrowError fail with error on value not found in sLComplete when true (default: false when method inlet is 3, otherwise true). When false it will have 0 values in all indicator columns
 * @return {Table}
 * tags:
 */
StringListOperators.encodeStringsAsNumericFeatures = function(texts, method, sLComplete, bmakeLowerCase, bUseListNamePrefix, bThrowError) {
  if(texts==null) return null;
  method = method == null ? 0 : method;
  bmakeLowerCase = bmakeLowerCase == null ? false : bmakeLowerCase;
  bUseListNamePrefix = bUseListNamePrefix == null ? false : bUseListNamePrefix;
  if(bThrowError == null)
    bThrowError = method == 3 ? false : true;
  var dict = {}, LNoReps, output, j;
  if(texts.type != 'StringList')
    texts = texts.toStringList();
  if(bmakeLowerCase)
    texts = texts.toLowerCase();
  // leave values as they are, don't trim
  if(sLComplete == null)
    LNoReps = texts.getWithoutRepetitions().getSorted();
  else{
    LNoReps = bmakeLowerCase ? sLComplete.toLowerCase() : sLComplete;
    LNoReps = LNoReps.getWithoutRepetitions().getSorted();
  }
  var a = [];
  for(var i=0; i < LNoReps.length; i++){
    dict[LNoReps[i]] = i;
    a.push(LNoReps[i]);
  }
  var sNamePrefix = '';
  if(bUseListNamePrefix)
    sNamePrefix = texts.name == '' ? '' : texts.name + ':';
  output = new NumberTable();
  if(method == 0 || method == 3){
    for(i=0; i < LNoReps.length; i++){
      output.push(new NumberList());
      output[i].name = sNamePrefix + LNoReps[i];
    }
  }
  else if(method == 1){
    output.push(new NumberList());
    output[0].name = texts.name == '' ? 'Code' : texts.name + ' Code';
  }
  else{
    var sHighestInBinary = ((LNoReps.length - 1) >>> 0).toString(2);
    var nBinaryDigitsRequired = sHighestInBinary.length;
    for(i=0; i < nBinaryDigitsRequired; i++){
      output.push(new NumberList());
      output[i].name = sNamePrefix + 'Bit ' + i;
    }
  }
  if(method == 0 || method == 3){
    // one-hot
    for(i=0; i < texts.length; i++){
      j = dict[texts[i]];
      if(j != null){
        for(var c=0; c < output.length; c++){
          output[c].push(c == j ? 1 : 0);
        }
      }
      else{
        // can happen if texts contains things not in sLComplete
        if(bThrowError)
          throw new Error('Element in texts not found in sLComplete.');
        for(var c=0; c < output.length; c++){
          output[c].push(0);
        }
      }
    }
  }
  else if(method == 1){
    // ordinal
    for(i=0; i < texts.length; i++){
      j = dict[texts[i]];
      if(j != null){
        output[0].push(j);
      }
      else{
        // can happen if texts contains things not in sLComplete
        if(bThrowError)
          throw new Error('Element in texts not found in sLComplete.');
        for(var c=0; c < output.length; c++){
          output[c].push(0);
        }
      }
    }
  }
  else {
    // binary
    for(i=0; i < texts.length; i++){
      j = dict[texts[i]];
      if(j != null){
        var jBinary = (j >>> 0).toString(2);
        var lenDiff = output.length - jBinary.length;
        for(var c=0; c < output.length; c++){
          var bdigit = c-lenDiff < 0 ? '0' : jBinary.substr(c-lenDiff,1);
          output[c].push(Number(bdigit));
        }
      }
      else{
        // can happen if texts contains things not in sLComplete
        if(bThrowError)
          throw new Error('Element in texts not found in sLComplete.');
        for(var c=0; c < output.length; c++){
          output[c].push(0);
        }
      }
    }
  }
  if(method == 0){
    // remove last column since it is redundant
    if(output.length > 1)
      output = output.getWithoutElementAtIndex(output.length - 1);
  }
  return output;
};

/**
 * builds the checkbox object input for Controls module from a list or the column names of a table 
 * @param  {StringList|Table} sLInput  list of values or a table. If a table is input then the column names are used
 *
 * @param  {StringList} sLFieldNames a list of fields to populate. Default value: "X","Y"
 * @param  {StringList} sLTypes for a table input this list controls which type of columns are included for each field. Must be same length as sLFieldNames and valid values are [all,numberlist,stringlist]. Default is to include all values for every field
 * @param  {String} sBasename for property names. Default: field
 * @param  {boolean} bStartClosed if true then start with the groups closed (default: false)
 * @param  {boolean} bInitialValue if true then start checked (default: true)
 * @param  {boolean} bUseLabelValues if true then output the label values (default: false)
 * @return {Object}
 * tags:
 */
StringListOperators.buildVariableCheckboxControlsObject = function(sLInput, sLFieldNames, sLTypes, sBasename, bStartClosed, bInitialValue, bUseLabelValues) {
  if(sLInput == null) return;
  if(!sLInput.isList) throw new Error('First parm must be list or table');
  if(sLFieldNames == null) sLFieldNames = StringList.fromArray(['X','Y']);
  if(sLTypes == null) sLTypes = ListGenerators.createListWithSameElement(sLFieldNames.length,'all');
  if(sLTypes.length != sLFieldNames.length) throw new Error('sLFieldNames must be same length as sLTypes');
  sBasename = sBasename == null ? 'field' : sBasename;
  bStartClosed = bStartClosed == null ? false : bStartClosed;
  bInitialValue = bInitialValue == null ? true : bInitialValue;
  bUseLabelValues = bUseLabelValues == null ? false : bUseLabelValues;
  sLTypes = sLTypes.toLowerCase();
  var i,j,sField,sFieldMeta,sType,bFirst;
  var aValid = ['all','stringlist','numberlist'];
  for(i=0;i<sLTypes.length;i++){
    if(!aValid.includes(sLTypes[i])) throw new Error('Invalid type: ' + sLTypes[i]);
  }
  var sLNames = sLInput.isTable ? sLInput.getNames() : sLInput;
  var oResult = {};
  for(i=0; i < sLFieldNames.length; i++){
    sField = sBasename + String(i+1);
    sFieldMeta = sField + '_META';
    oResult[sField] = {};
    oResult[sFieldMeta] = { label:String(sLFieldNames[i]), values:[] };
    if(bStartClosed)
      oResult[sFieldMeta].startClosed = true;
    bFirst = true;
    for(j=0; j < sLNames.length; j++){
      sType = 'unknown';
      if(sLInput.isTable) sType = sLInput[j].type.toLowerCase();
      if(sType == 'unknown' || sType == sLTypes[i] || sLTypes[i] == 'all'){
        if(bUseLabelValues){
          if(bInitialValue)
            oResult[sField][sLNames[j]] = sLNames[j];
          else
            oResult[sField][sLNames[j]] = false;
          oResult[sField][sLNames[j]+'_META'] = { "true" : sLNames[j]};
        }
        else
          oResult[sField][sLNames[j]] = bInitialValue;
      }

    }
  }
  return oResult;
}

/**
 * builds the input for Controls module from a list or the column names of a table
 * @param  {StringList|Table} sLInput  list of values or a table. If a table is input then the column names are used
 *
 * @param  {StringList} sLFieldNames a list of fields to populate. Default value: "X","Y"
 * @param  {StringList} sLTypes for a table input this list controls which type of columns are included for each field. Must be same length as sLFieldNames and valid values are [all,numberlist,stringlist]. Default is to include all values for every field
 * @param  {String} sBasename for property names. Default: field
 * @return {Object}
 * tags:
 */
StringListOperators.buildVariableSelectionControlsObject = function(sLInput, sLFieldNames, sLTypes, sBasename) {
  if(sLInput == null) return;
  if(!sLInput.isList) throw new Error('First parm must be list or table');
  if(sLFieldNames == null) sLFieldNames = StringList.fromArray(['X','Y']);
  if(sLTypes == null) sLTypes = ListGenerators.createListWithSameElement(sLFieldNames.length,'all');
  if(sLTypes.length != sLFieldNames.length) throw new Error('sLFieldNames must be same length as sLTypes');
  sBasename = sBasename == null ? 'field' : sBasename;
  sLTypes = sLTypes.toLowerCase();
  var i,j,sField,sFieldMeta,sType,bFirst;
  var aValid = ['all','stringlist','numberlist'];
  for(i=0;i<sLTypes.length;i++){
    if(!aValid.includes(sLTypes[i])) throw new Error('Invalid type: ' + sLTypes[i]);
  }
  var sLNames = sLInput.isTable ? sLInput.getNames() : sLInput;
  var oResult = {};
  for(i=0; i < sLFieldNames.length; i++){
    sField = sBasename + String(i+1);
    sFieldMeta = sField + '_META';
    oResult[sField] = '';
    oResult[sFieldMeta] = { label:String(sLFieldNames[i]), values:[] };
    bFirst = true;
    for(j=0; j < sLNames.length; j++){
      sType = 'unknown';
      if(sLInput.isTable) sType = sLInput[j].type.toLowerCase();
      if(sType == 'unknown' || sType == sLTypes[i] || sLTypes[i] == 'all'){
        // valid type for this field
        if(bFirst){
          oResult[sField] = sLNames[j];
          bFirst = false;
        }
        oResult[sFieldMeta].values.push(sLNames[j]);
      }
    }
  }
  return oResult;
};

/**
 * correlation ratio is a measure of relationship between dispersion within categories and dispersion across the whole population. When unweighted, range is [0,1] and high values suggest coherence within categories. When weighted, higher values suggest coherence within categories and stronger variation between category averages.
 * @param {StringList} sL has the category for each corresponding numeric value
 * @param {NumberList} nL is the input list of numeric values
 *
 * @param {Number} weightMode <br>0: no weighting (default)<br>1: weight by minimum inter-category pairwise variation<br>2: weight by average inter-category pairwise variation<br>3: weight by maximum inter-category pairwise variation
 * @return {Number} ratio
 * tags: statistics
 */
StringListOperators.correlationRatio = function(sL, nL, weightMode) {
  if(nL == null || sL == null) return null;
  if(sL.type == 'List')
    sL = sL.toStringList();
  if(nL.type != 'NumberList')
    throw new Error('The first list must be a NumberList');
  if(sL.type != 'StringList')
    throw new Error('The second list must be a StringList');
  if(nL.length != sL.length)
    throw new Error('The two lists must have the same length');
  weightMode = weightMode == null ? 0 : weightMode;
  if(weightMode != 0)
    nL = NumberListOperators.normalizeByZScore(nL); // keeps weighted scores scale invariant

  var t = new Table();
  t.push(sL);
  t.push(nL);
  t = TableOperators.aggregateTable(t,0,[0,1],[0,7]);
  var SumSquaresOverall = nL.getVariance()*nL.length;
  var avgOverall = nL.getAverage();
  if(SumSquaresOverall == 0) return 0;
  var SumSquaresWeightedGroup, SumSquaresGroups = 0;
  var avgGroup, nLGroupAverages = new NumberList();
  for(var i=0; i < t[1].length; i++){
    avgGroup = t[1][i].getAverage();
    nLGroupAverages.push(avgGroup);
    SumSquaresWeightedGroup = Math.pow(avgGroup-avgOverall,2)*t[1][i].length;
    SumSquaresGroups += SumSquaresWeightedGroup;
  }
  var f=1;
  if(weightMode != 0){
    // Tried using coefficient of variation as weighting factor but a simple sum of pairwise diffs works better
    // There is likely room to improve this by making it more scale and translation invariant
    var interval = nL.getInterval();
    // normalize to original nL interval
    nLGroupAverages = NumberListOperators.normalizeToInterval(nLGroupAverages,interval);
    var diff,s=0;
    var min = Infinity, max = -Infinity;
    for(var i=0;i<nLGroupAverages.length;i++){
      for(var j=i+1;j<nLGroupAverages.length;j++){
        diff = Math.abs(nLGroupAverages[i]-nLGroupAverages[j]);
        s += diff;
        min = Math.min(min,diff);
        max = Math.max(max,diff);
      }
    }
    var nCombos = nLGroupAverages.length*(nLGroupAverages.length-1)/2;
    if(weightMode == 1)
      f = 10*min;
    else if(weightMode == 2)
      f = 10*s/nCombos;
    else if(weightMode == 3)
      f = 10*max;
  }
  return Number((Math.abs(f)*Math.sqrt(SumSquaresGroups/SumSquaresOverall)).toFixed(5));
};

/**
 * generates several metrics and properties for strings
 * @param {StringList} sL
 * 
 * @param {StringList} negativeWords optional list of words associated to negative values
 * @param {StringList} positiveWords optional list of words associated to positive values
 * @return {Table} texts properties
 * tags:report
 */
StringListOperators.getStringsTableReport = function(sL, negativeWords, positiveWords){
    var textLength = StringListOperators._createNewList('NumberList','textLength','text length');
    
    //var textLengthCategory = StringListOperators._createNewList('StringList','textLengthCategory','text length catagory');
    
    var hasLink = StringListOperators._createNewList('List','link_flag','has link');
    var link = StringListOperators._createNewList('StringList','link','link');
    
    var hasEmail = StringListOperators._createNewList('List','email_flag','has email');
    var email = StringListOperators._createNewList('StringList','email','email');
    
    var top_1_mostCommonWordValue = StringListOperators._createNewList(
        'StringList','top_1_mostCommonWordValue','most common word value - top 1');
    
    var top_1_mostCommonWordFrequency = StringListOperators._createNewList(
        'NumberList','top_1_mostCommonWordFrequency','most common word frequency - top 1');

    var top_2_mostCommonWordValue = StringListOperators._createNewList(
        'StringList','top_2_mostCommonWordFrequency','most common word value - top 2');

    var top_2_mostCommonWordFrequency = StringListOperators._createNewList(
        'NumberList','top_2_mostCommonWordValue','most common word frequency - top 2');
    
    var dichotomyAnalysis = StringListOperators._createNewList(
        'NumberList','dichotomyAnalysis','dichotomy analysis');
    var dichotomyAnalysisCategory = StringListOperators._createNewList(
        'StringList','dichotomyAnalysisCategory','dichotomy category');

    var tableReport = new mo.Table();

    tableReport.push(textLength);
    tableReport.push(top_1_mostCommonWordValue);
    tableReport.push(top_1_mostCommonWordFrequency);
    tableReport.push(top_2_mostCommonWordValue);
    tableReport.push(top_2_mostCommonWordFrequency);

    if(negativeWords!=null && positiveWords!=null){
      tableReport.push(dichotomyAnalysis);
      tableReport.push(dichotomyAnalysisCategory);
    }
   // mo.Table.puh();
    tableReport.push(hasLink);
    tableReport.push(link);
    tableReport.push(hasEmail);
    tableReport.push(email);
    
    for(var i=0; i<sL.length; i++) {
        var text = sL[i]==null?"":String(sL[i]).toLowerCase();
        
        if(!text) text = " ";
        else text.replace(/disclaimer:.*/i,'***')
        
        var occurTable = mo.StringOperators.getWordsOccurrencesTable(text,1,null,4);
        top_1_mostCommonWordValue.push(occurTable[0][0]);
        top_2_mostCommonWordValue.push(occurTable[0][1]);
        top_1_mostCommonWordFrequency.push(occurTable[1][0]);
        top_2_mostCommonWordFrequency.push(occurTable[1][1]);
        
        textLength.push(text.length);

        if(negativeWords!=null && positiveWords!=null){
          var dicoAna = mo.StringOperators.countWordsDichotomyAnalysis(
                  text, negativeWords,positiveWords, false);
          dichotomyAnalysis.push(dicoAna);
          
          dichotomyAnalysisCategory.push(dicoAna<0?'detractor':dicoAna>0?'promotor':'neutral');
        }
        
        //full fill link
        var link_url = StringListOperators._getLinks(text);
        link.push(link_url || '')
        
        if(link_url) hasLink.push(true);
        else hasLink.push(false);
        
        //full fill emails
        var email_value = StringListOperators._getEmails(text);
        email.push(email_value || '');
        
        if(email_value) hasEmail.push(true);
        else hasEmail.push(false);
        
    }

    return tableReport;
};

StringListOperators._createNewList = function(type, name, label, description) {
    var list = new mo[type]();
    list.name = name;
    list.label = label;
    list.description = description;
    return list;
};

StringListOperators._getLinks = function(text) {
  text += ' ';
  //debugger
  
//  var regexp = /\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/g;
  return text.match(/https*:\/\/[a-zA-Z0-9\/\.]+( |:|;|\r|\t|\n|\v)/gi) ;
};

StringListOperators._getEmails = function(text){
    return text.match(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+)/gi);
};

