add labeling module in preparation for new tag assignment algorithm

2025-04-09 17:15:43 +02:00 · 2017-07-31 00:38:28 -04:00 · 2017-07-31 00:38:28 -04:00 · 1d49f7c303
commit 1d49f7c303
parent 4baecf8b5e
6 changed files with 400 additions and 2 deletions
--- a/src/main/kotlin/com/johnlindquist/acejump/control/AceAction.kt
+++ b/src/main/kotlin/com/johnlindquist/acejump/control/AceAction.kt
@ -4,7 +4,7 @@ import com.intellij.openapi.actionSystem.AnActionEvent
 import com.intellij.openapi.actionSystem.CommonDataKeys.EDITOR
 import com.intellij.openapi.project.DumbAwareAction
 import com.johnlindquist.acejump.search.Finder
-import com.johnlindquist.acejump.search.Pattern.LINE_MARK
+import com.johnlindquist.acejump.label.Pattern.LINE_MARK
 import com.johnlindquist.acejump.view.Model.editor
 import java.awt.event.KeyEvent

--- a/src/main/kotlin/com/johnlindquist/acejump/control/Handler.kt
+++ b/src/main/kotlin/com/johnlindquist/acejump/control/Handler.kt
@ -10,7 +10,7 @@ import com.intellij.util.SmartList
 import com.johnlindquist.acejump.config.AceConfig.Companion.settings
 import com.johnlindquist.acejump.search.*
 import com.johnlindquist.acejump.search.Finder.search
-import com.johnlindquist.acejump.search.Pattern.*
+import com.johnlindquist.acejump.label.Pattern.*
 import com.johnlindquist.acejump.search.Skipper.restoreScroll
 import com.johnlindquist.acejump.search.Skipper.storeScroll
 import com.johnlindquist.acejump.view.Canvas
--- a/src/main/kotlin/com/johnlindquist/acejump/label/Pattern.kt
+++ b/src/main/kotlin/com/johnlindquist/acejump/label/Pattern.kt
@ -0,0 +1,86 @@
+package com.johnlindquist.acejump.label
+
+import com.johnlindquist.acejump.config.AceConfig.Companion.settings
+
+/**
+ * Patterns related to key priority, separation, and regexps for line mode.
+ */
+
+enum class Pattern(val string: String) {
+  END_OF_LINE("\\n"),
+  START_OF_LINE("^.|^\\n"),
+  CODE_INDENTS("(?<=^\\s*)\\S|^\\n"),
+  LINE_MARK(END_OF_LINE.string + "|" +
+    START_OF_LINE.string + "|" +
+    CODE_INDENTS.string);
+
+  companion object {
+    private fun distance(fromKey: Char, toKey: Char) = nearby[fromKey]!![toKey]
+
+    private fun priority(char: Char) = priority[char]
+
+    private fun allBigrams() =
+      settings.allowedChars.run { flatMap { e -> map { c -> "$e$c" } } }
+
+    /**
+     * Sorts available tags by key distance. Tags which are ergonomically easier
+     * to type will be assigned first. We should prefer to use tags that contain
+     * repeated keys (ex. FF, JJ), and use tags that contain physically adjacent
+     * keys (ex. 12, 21) to keys that are located further apart on the keyboard.
+     */
+
+    fun setupTags(query: String) =
+      LinkedHashSet(allBigrams()).filter { it[0] != query[0] }
+        .sortedWith(compareBy({ it[0].isDigit() || it[1].isDigit() },
+          {
+            distance(it[0],
+              it.last())
+          },
+          { priority(it.first()) })).mapTo(linkedSetOf()) { it }
+
+    private val priority: Map<Char, Int> =
+      "fjghdkslavncmbxzrutyeiwoqp5849673210".mapIndices()
+
+    private val nearby = mapOf(
+      // Values are QWERTY keys sorted by physical proximity to the map key
+      'j' to "jikmnhuolbgypvftcdrxsezawq8796054321",
+      'f' to "ftgvcdryhbxseujnzawqikmolp5463728190",
+      'k' to "kolmjipnhubgyvftcdrxsezawq9807654321",
+      'd' to "drfcxsetgvzawyhbqujnikmolp4352617890",
+      'l' to "lkopmjinhubgyvftcdrxsezawq0987654321",
+      's' to "sedxzawrfcqtgvyhbujnikmolp3241567890",
+      'a' to "aqwszedxrfctgvyhbujnikmolp1234567890",
+      'h' to "hujnbgyikmvftolcdrpxsezawq6758493021",
+      'g' to "gyhbvftujncdrikmxseolzawpq5647382910",
+      'y' to "yuhgtijnbvfrokmcdeplxswzaq6758493021",
+      't' to "tygfruhbvcdeijnxswokmzaqpl5647382910",
+      'u' to "uijhyokmnbgtplvfrcdexswzaq7869504321",
+      'r' to "rtfdeygvcxswuhbzaqijnokmpl4536271890",
+      'n' to "nbhjmvgyuiklocftpxdrzseawq7685940321",
+      'v' to "vcfgbxdrtyhnzseujmawikqolp5463728190",
+      'm' to "mnjkbhuilvgyopcftxdrzseawq8970654321",
+      'c' to "cxdfvzsertgbawyhnqujmikolp4352617890",
+      'b' to "bvghncftyujmxdrikzseolawqp6574839201",
+      'i' to "iokjuplmnhybgtvfrcdexswzaq8970654321",
+      'e' to "erdswtfcxzaqygvuhbijnokmpl3425167890",
+      'x' to "xzsdcawerfvqtgbyhnujmikolp3241567890",
+      'z' to "zasxqwedcrfvtgbyhnujmikolp1234567890",
+      'o' to "oplkimjunhybgtvfrcdexswzaq9087654321",
+      'w' to "wesaqrdxztfcygvuhbijnokmpl2314567890",
+      'p' to "plokimjunhybgtvfrcdexswzaq0987654321",
+      'q' to "qwaeszrdxtfcygvuhbijnokmpl1234567890",
+      '1' to "1234567890qawzsexdrcftvgybhunjimkolp",
+      '2' to "2134567890qwasezxdrcftvgybhunjimkolp",
+      '3' to "3241567890weqasdrzxcftvgybhunjimkolp",
+      '4' to "4352617890erwsdftqazxcvgybhunjimkolp",
+      '5' to "5463728190rtedfgywsxcvbhuqaznjimkolp",
+      '6' to "6574839201tyrfghuedcvbnjiwsxmkoqazlp",
+      '7' to "7685940321yutghjirfvbnmkoedclpwsxqaz",
+      '8' to "8796054321uiyhjkotgbnmlprfvedcwsxqaz",
+      '9' to "9807654321ioujklpyhnmtgbrfvedcwsxqaz",
+      '0' to "0987654321opiklujmyhntgbrfvedcwsxqaz")
+      .mapValues { it.value.mapIndices() }
+
+    private fun String.mapIndices() = mapIndexed { i, c -> Pair(c, i) }.toMap()
+  }
+}
--- a/src/main/kotlin/com/johnlindquist/acejump/label/Solver.kt
+++ b/src/main/kotlin/com/johnlindquist/acejump/label/Solver.kt
@ -0,0 +1,118 @@
+package com.johnlindquist.acejump.label
+
+import com.google.common.collect.BiMap
+import com.google.common.collect.HashBiMap
+import com.johnlindquist.acejump.search.getView
+import com.johnlindquist.acejump.search.wordBounds
+import com.johnlindquist.acejump.view.Model.editor
+import com.johnlindquist.acejump.view.Model.editorText
+import com.johnlindquist.acejump.search.get
+import java.lang.Math.max
+import java.lang.Math.min
+
+/**
+ * Tumbles tags around sites to maximize the number of sites covered. Should be
+ * able tag all results in the editor, otherwise we have failed.
+ */
+
+object Solver {
+  private var bigrams: LinkedHashSet<String> = linkedSetOf()
+  private var newTags: BiMap<String, Int> = HashBiMap.create()
+
+  /**
+   * Iterates through the remaining available tags, until we find one that
+   * matches our criteria, i.e. does not collide with an existing tag or
+   * plaintext string. To have the desired behavior, this has a surprising
+   * number of edge cases that must explicitly prevented.
+   *
+   * @param idx the index which a tag is to be assigned
+   */
+
+  fun tryToAssignTagToIndex(idx: Int): Boolean {
+    val (left, right) = editorText.wordBounds(idx)
+
+    fun hasNearbyTag(index: Int) =
+      Pair(max(left, index - 2), min(right, index + 2))
+        .run { (first..second).any { newTags.containsValue(it) } }
+
+    if (hasNearbyTag(idx)) return true
+
+//      val (matching, nonMatching) = availableTags.partition { tag ->
+//        !newTags.containsKey("${tag[0]}") && !tag.collidesWithText(idx, right)
+//      }
+
+//      val tag = matching.firstOrNull()
+    val chosenTag = bigrams.firstOrNull {
+      !newTags.containsKey("${it[0]}") && !it.collidesWithText(idx, right)
+    }
+
+    if (chosenTag == null)
+      String(editorText[left, right]).let {
+        //          logger.info("\"$it\" rejected: " + nonMatching.size + " tags.")
+        return false
+      }
+    else {
+      newTags[chosenTag] = idx
+      // Prevents "...a[bc]...z[bc]..."
+      bigrams.remove(chosenTag)
+    }
+    return true
+  }
+
+  /**
+   * Sorts jump targets to determine which positions get first choice for tags,
+   * by taking into account the structure of the surrounding text. For example,
+   * if the jump target is the first letter in a word, it is advantageous to
+   * prioritize this location (in case we run out of tags), since the user is
+   * more likely to target words by their leading character than not.
+   */
+
+  fun sortValidJumpTargets(jumpTargets: Set<Int>) =
+    if (Tagger.regex) jumpTargets.sortedBy { it !in editor.getView() }
+    else jumpTargets.sortedWith(compareBy(
+      // Sites in immediate view should come first
+      { it !in editor.getView() },
+      // Ensure that the first letter of a word is prioritized for tagging
+      { editorText[max(0, it - 1)].isLetterOrDigit() },
+      // Target words with more unique characters to the immediate right ought
+      // to have first pick for tags, since they are the most "picky" targets
+      { -editorText[it, editorText.wordBounds(it).second].distinct().size }))
+
+  fun test(results: Set<Int>, tags: LinkedHashSet<String>, newTags: BiMap<String, Int>) {
+    bigrams = tags
+    Solver.newTags = newTags
+    var totalRejects = 0
+
+    // Hope for the best
+    sortValidJumpTargets(results).forEach {
+      if (tags.isEmpty()) {
+        Tagger.full = false; return
+      }
+      if (!tryToAssignTagToIndex(it)) {
+        // But fail as soon as we miss one
+        Tagger.full = false
+        totalRejects++
+        // We already outside the view, no need to search further if it failed
+        if (it !in editor.getView()) return
+      }
+    }
+
+    println("Total rejects: $totalRejects")
+  }
+
+  /**
+   * Returns true IFF the receiver, inserted between the left and right indices,
+   * matches an existing substring elsewhere in the editor text. We should never
+   * use a tag which can be partly completed by typing plaintext, where the tag
+   * is the receiver, the tag index is the leftIndex, and rightIndex is the last
+   * character we care about (this is usually the last letter of the same word).
+   *
+   * @param leftIndex index where a tag is to be used
+   * @param rightIndex index of last character (ie. end of the word)
+   */
+
+  private fun String.collidesWithText(leftIndex: Int, rightIndex: Int) =
+    ((leftIndex + 1)..min(rightIndex, editorText.length)).map {
+      editorText.substring(leftIndex, it) + this[0] // && it in view??
+    }.any { it in editorText }
+}
--- a/src/main/kotlin/com/johnlindquist/acejump/label/Tagger.kt
+++ b/src/main/kotlin/com/johnlindquist/acejump/label/Tagger.kt
@ -0,0 +1,192 @@
+package com.johnlindquist.acejump.label
+
+import com.google.common.collect.BiMap
+import com.google.common.collect.HashBiMap
+import com.intellij.find.FindModel
+import com.intellij.openapi.diagnostic.Logger
+import com.johnlindquist.acejump.search.Finder
+import com.johnlindquist.acejump.search.Jumper
+import com.johnlindquist.acejump.search.Skipper
+import com.johnlindquist.acejump.search.Tagger.textMatches
+import com.johnlindquist.acejump.search.getView
+import com.johnlindquist.acejump.view.Marker
+import com.johnlindquist.acejump.view.Model.editor
+import java.util.*
+
+/**
+ * Singleton that works with Finder to tag text search results in the editor.
+ *
+ * @see Finder
+ */
+
+object Tagger {
+  var markers: List<Marker> = emptyList()
+    private set
+
+  var regex = false
+  var query = ""
+    private set
+  var full = false
+  var textMatches: Set<Int> = emptySet()
+  private var tagMap: BiMap<String, Int> = HashBiMap.create()
+  private var bigrams: LinkedHashSet<String> = linkedSetOf()
+  private val logger = Logger.getInstance(Tagger::class.java)
+
+  private val Iterable<Int>.allInView
+    get() = all { it in editor.getView() }
+
+  fun markOrJump(model: FindModel, results: Set<Int>) {
+    textMatches = results
+    if (!regex) regex = model.isRegularExpressions
+
+    query = (if (model.isRegularExpressions) " "
+    else if (regex) " " + model.stringToFind
+    else model.stringToFind).toLowerCase()
+
+    bigrams = Pattern.setupTags(
+      query)
+    giveJumpOpportunity()
+    markTags()
+  }
+
+  fun maybeJumpIfJustOneTagRemains() =
+    tagMap.entries.firstOrNull()?.run {
+      Jumper.jump(value)
+    }
+
+  private fun markTags() {
+    computeMarkers()
+
+    if (markers.isEmpty() && query.length > 1 && !Finder.skim)
+      Skipper.ifQueryExistsSkipAhead()
+  }
+
+  private fun giveJumpOpportunity() =
+    tagMap.forEach {
+      if (query.endsWith(it.key)) {
+        return Jumper.jump(it.value)
+      }
+    }
+
+  private fun computeMarkers() {
+    if (Finder.skim && !regex) return
+
+    markers = scan().apply { if (this.isNotEmpty()) tagMap = this }
+      .map { (tag, index) -> Marker(query, tag, index) }
+  }
+
+  private var deep: Boolean = false
+
+  private fun scan(): BiMap<String, Int> {
+    deep = false
+    val resultsToTag =
+      if (deep) {
+        full = true
+        textMatches
+      } else {
+        full = false
+        textMatches.filter { it in editor.getView() }.toSet()
+      }
+
+    val tags = assignTags(resultsToTag).let {
+      compact(it)
+    }
+    val uniToBigram = tags.count { it.key.length == 1 }.toDouble() / tags.size
+    // If there are few unigrams, let's use all bigrams and try to cover all
+    if (uniToBigram < 0.5 && !deep && full) {
+      deep = true; scan()
+    }
+
+    return tags
+  }
+
+  /**
+   * Shortens assigned tags. Effectively, this will only shorten two-character
+   * tags to one-character tags. This will happen if and only if:
+   *
+   * 1. The shortened tag is unique among the set of existing tags.
+   * 3. The query does not end with the shortened tag, in whole or part.
+   */
+
+  private fun compact(tagMap: BiMap<String, Int>) =
+    tagMap.mapKeysTo(HashBiMap.create(tagMap.size)) { e ->
+      val firstChar = e.key[0]
+      val firstCharUnique = tagMap.keys.count { it[0] == firstChar } == 1
+      val queryEndsWith = query.endsWith(firstChar) || query.endsWith(e.key)
+      if (firstCharUnique && !queryEndsWith) firstChar.toString() else e.key
+    }
+
+  // Provides a way to short-circuit the full text search if a match is found
+  private operator fun String.contains(key: String) =
+    textMatches.any { regionMatches(it, key, 0, key.length) }
+
+  /**
+   * Maps tags to search results. Tags *must* have the following properties:
+   *
+   * 1. A tag must not match *any* bigrams on the screen.
+   * 2. A tag's 1st letter must not match any letters of the covered word.
+   * 3. Tag must not match any combination of any plaintext and tag. "e(a[B)X]"
+   * 4. Once assigned, a tag must never change until it has been selected. *A.
+   *
+   * Tags *should* have the following properties:
+   *
+   * A. Should be as short as possible. A tag may be "compacted" later.
+   * B. Should prefer keys that are physically closer to the last key pressed.
+   *
+   * @param results All indices to be tagged
+   *
+   * @return A list of all tags and their corresponding indices
+   */
+
+  private fun assignTags(results: Set<Int>): BiMap<String, Int> {
+    if (query.isEmpty()) return HashBiMap.create()
+    val newTags: BiMap<String, Int> = transferExistingTagsCompatibleWithQuery()
+    newTags.run { if (regex && isNotEmpty() && values.allInView) return this }
+    Solver.test(results,
+      bigrams,
+      newTags)
+
+    return newTags
+  }
+
+
+  /**
+   * Adds pre-existing tags where search string and tag overlap. For example,
+   * tags starting with the last character of the query should be considered.
+   */
+
+  private fun transferExistingTagsCompatibleWithQuery() =
+    tagMap.filterTo(HashBiMap.create(), { (tag, _) -> query overlaps tag })
+
+  fun reset() {
+    regex = false
+    full = false
+    deep = false
+    textMatches = emptySet()
+    tagMap.clear()
+    query = ""
+    bigrams.clear()
+    markers = emptyList()
+  }
+
+  /**
+   * Returns true if the Tagger contains a match in the new view, that is not
+   * contained (visible) in the old view. This method assumes that textMatches
+   * are in ascending order by index.
+   *
+   * @see textMatches
+   *
+   * @return true if there is a match in the new range not in the old range
+   */
+
+  fun hasMatchBetweenOldAndNewView(old: IntRange, new: IntRange) =
+    textMatches.lastOrNull { it < old.first } ?: -1 >= new.first ||
+      textMatches.firstOrNull { it > old.last } ?: new.last < new.last
+
+  fun hasTagSuffix(query: String) = tagMap.any {
+    query overlaps it.key && it.value in editor.getView()
+  }
+
+  infix fun String.overlaps(xx: String) = endsWith(xx.first()) || endsWith(xx)
+  infix fun canDiscard(i: Int) = !(Finder.skim || tagMap.containsValue(i))
+}
--- a/src/main/kotlin/com/johnlindquist/acejump/search/Finder.kt
+++ b/src/main/kotlin/com/johnlindquist/acejump/search/Finder.kt
@ -6,6 +6,8 @@ import com.intellij.openapi.editor.markup.HighlighterTargetArea.EXACT_RANGE
 import com.intellij.openapi.editor.markup.RangeHighlighter
 import com.johnlindquist.acejump.control.Handler
 import com.johnlindquist.acejump.control.Trigger
+import com.johnlindquist.acejump.label.Pattern
+import com.johnlindquist.acejump.label.Tagger
 import com.johnlindquist.acejump.view.Marker
 import com.johnlindquist.acejump.view.Model.editor
 import com.johnlindquist.acejump.view.Model.editorText