Fuzzy Matches in Google Sheets

When trying to build our curriculum inventory I needed to match thousands of strings in Google Sheets. Doing this manually would have been tedious and prone to error. The EQ function would fail if there was one character different, forget about objectives that had words shifted around. Here’s my script that outputs the percentage of string A that is present in string B averaged with the percentage of string B that is in string A. I found that anything over 40% seems to be semantic match.

function howMuchMatches(string1, string2) {
 
  // clean both strings
  var array1 = string1.trim().toLowerCase().replace(/[^\w\s\d]/g,"").split(" ");
  var array2 = string2.trim().toLowerCase().replace(/[^\w\s\d]/g,"").split(" ");
  
  // how much of string1 is in string2
  var numberOfMatches12 = 0;
  
  for (i=0; i < array1.length; i++) {
    if (array2.indexOf(array1[i])>=0)
      { numberOfMatches12++; }

  // how much of string2 is in string1
  var numberOfMatches21 = 0;

  for (i=0; i < array2.length; i++) {
    if (array1.indexOf(array2[i])>=0)
      { numberOfMatches21++; }

  // convert those to percentages
  var percentMatch12 = numberOfMatches12/array1.length;
  var percentMatch21 = numberOfMatches21/array2.length;

  // return the average the two
  return (percentMatch12 + percentMatch21)/2; 
}