session-ios/SessionMessagingKit/Utilities/FullTextSearchFinder.swift
Niels Andriesse 519ffa4405 Fully switch to the new contact API
This should improve performance significantly as it avoids many unnecessary sync transactions. It also makes the code more readable.
2021-07-22 14:41:27 +10:00

247 lines
10 KiB
Swift

//
// Copyright (c) 2019 Open Whisper Systems. All rights reserved.
//
import Foundation
// Create a searchable index for objects of type T
public class SearchIndexer<T> {
private let indexBlock: (T, YapDatabaseReadTransaction) -> String
public init(indexBlock: @escaping (T, YapDatabaseReadTransaction) -> String) {
self.indexBlock = indexBlock
}
public func index(_ item: T, transaction: YapDatabaseReadTransaction) -> String {
return normalize(indexingText: indexBlock(item, transaction))
}
private func normalize(indexingText: String) -> String {
return FullTextSearchFinder.normalize(text: indexingText)
}
}
@objc
public class FullTextSearchFinder: NSObject {
// MARK: - Dependencies
private static var tsAccountManager: TSAccountManager {
return TSAccountManager.sharedInstance()
}
// MARK: - Querying
// We want to match by prefix for "search as you type" functionality.
// SQLite does not support suffix or contains matches.
public class func query(searchText: String) -> String {
// 1. Normalize the search text.
//
// TODO: We could arguably convert to lowercase since the search
// is case-insensitive.
let normalizedSearchText = FullTextSearchFinder.normalize(text: searchText)
// 2. Split the non-numeric text into query terms (or tokens).
let nonNumericText = String(String.UnicodeScalarView(normalizedSearchText.unicodeScalars.lazy.map {
if CharacterSet.decimalDigits.contains($0) {
return " "
} else {
return $0
}
}))
var queryTerms = nonNumericText.split(separator: " ")
// 3. Add an additional numeric-only query term.
let digitsOnlyScalars = normalizedSearchText.unicodeScalars.lazy.filter {
CharacterSet.decimalDigits.contains($0)
}
let digitsOnly: Substring = Substring(String(String.UnicodeScalarView(digitsOnlyScalars)))
queryTerms.append(digitsOnly)
// 4. De-duplicate and sort query terms.
// Duplicate terms are redundant.
// Sorting terms makes the output of this method deterministic and easier to test,
// and the order won't affect the search results.
queryTerms = Array(Set(queryTerms)).sorted()
// 5. Filter the query terms.
let filteredQueryTerms = queryTerms.filter {
// Ignore empty terms.
$0.count > 0
}.map {
// Allow partial match of each term.
//
// Note that we use double-quotes to enclose each search term.
// Quoted search terms can include a few more characters than
// "bareword" (non-quoted) search terms. This shouldn't matter,
// since we're filtering all of the affected characters, but
// quoting protects us from any bugs in that logic.
"\"\($0)\"*"
}
// 6. Join terms into query string.
let query = filteredQueryTerms.joined(separator: " ")
return query
}
public func enumerateObjects(searchText: String, transaction: YapDatabaseReadTransaction, block: @escaping (Any, String) -> Void) {
guard let ext: YapDatabaseFullTextSearchTransaction = ext(transaction: transaction) else {
return
}
let query = FullTextSearchFinder.query(searchText: searchText)
let maxSearchResults = 500
var searchResultCount = 0
let snippetOptions = YapDatabaseFullTextSearchSnippetOptions()
snippetOptions.startMatchText = ""
snippetOptions.endMatchText = ""
ext.enumerateKeysAndObjects(matching: query, with: snippetOptions) { (snippet: String, _: String, _: String, object: Any, stop: UnsafeMutablePointer<ObjCBool>) in
guard searchResultCount < maxSearchResults else {
stop.pointee = true
return
}
searchResultCount += 1
block(object, snippet)
}
}
// MARK: - Normalization
fileprivate static var charactersToRemove: CharacterSet = {
// * We want to strip punctuation - and our definition of "punctuation"
// is broader than `CharacterSet.punctuationCharacters`.
// * FTS should be robust to (i.e. ignore) illegal and control characters,
// but it's safer if we filter them ourselves as well.
var charactersToFilter = CharacterSet.punctuationCharacters
charactersToFilter.formUnion(CharacterSet.illegalCharacters)
charactersToFilter.formUnion(CharacterSet.controlCharacters)
// We want to strip all ASCII characters except:
// * Letters a-z, A-Z
// * Numerals 0-9
// * Whitespace
var asciiToFilter = CharacterSet(charactersIn: UnicodeScalar(0x0)!..<UnicodeScalar(0x80)!)
assert(!asciiToFilter.contains(UnicodeScalar(0x80)!))
asciiToFilter.subtract(CharacterSet.alphanumerics)
asciiToFilter.subtract(CharacterSet.whitespacesAndNewlines)
charactersToFilter.formUnion(asciiToFilter)
return charactersToFilter
}()
// This is a hot method, especially while running large migrations.
// Changes to it should go through a profiler to make sure large migrations
// aren't adversely affected.
@objc
public class func normalize(text: String) -> String {
// 1. Filter out invalid characters.
let filtered = text.removeCharacters(characterSet: charactersToRemove)
// 2. Simplify whitespace.
let simplified = filtered.replaceCharacters(characterSet: .whitespacesAndNewlines,
replacement: " ")
// 3. Strip leading & trailing whitespace last, since we may replace
// filtered characters with whitespace.
return simplified.trimmingCharacters(in: .whitespacesAndNewlines)
}
// MARK: - Index Building
private static let groupThreadIndexer: SearchIndexer<TSGroupThread> = SearchIndexer { (groupThread: TSGroupThread, transaction: YapDatabaseReadTransaction) in
let groupName = groupThread.groupModel.groupName ?? ""
let memberStrings = groupThread.groupModel.groupMemberIds.map { recipientId in
recipientIndexer.index(recipientId, transaction: transaction)
}.joined(separator: " ")
return "\(groupName) \(memberStrings)"
}
private static let contactThreadIndexer: SearchIndexer<TSContactThread> = SearchIndexer { (contactThread: TSContactThread, transaction: YapDatabaseReadTransaction) in
let recipientId = contactThread.contactSessionID()
var result = recipientIndexer.index(recipientId, transaction: transaction)
if IsNoteToSelfEnabled(),
let localNumber = tsAccountManager.storedOrCachedLocalNumber(transaction),
localNumber == recipientId {
let noteToSelfLabel = NSLocalizedString("NOTE_TO_SELF", comment: "Label for 1:1 conversation with yourself.")
result += " \(noteToSelfLabel)"
}
return result
}
private static let recipientIndexer: SearchIndexer<String> = SearchIndexer { (recipientId: String, transaction: YapDatabaseReadTransaction) in
let displayName = Storage.shared.getContact(with: recipientId)?.displayName(for: Contact.Context.regular) ?? recipientId
return "\(recipientId) \(displayName)"
}
private static let messageIndexer: SearchIndexer<TSMessage> = SearchIndexer { (message: TSMessage, transaction: YapDatabaseReadTransaction) in
if let bodyText = message.bodyText(with: transaction) {
return bodyText
}
return ""
}
private class func indexContent(object: Any, transaction: YapDatabaseReadTransaction) -> String? {
if let groupThread = object as? TSGroupThread {
return self.groupThreadIndexer.index(groupThread, transaction: transaction)
} else if let contactThread = object as? TSContactThread {
guard contactThread.shouldBeVisible else {
// If we've never sent/received a message in a TSContactThread,
// then we want it to appear in the "Other Contacts" section rather
// than in the "Conversations" section.
return nil
}
return self.contactThreadIndexer.index(contactThread, transaction: transaction)
} else if let message = object as? TSMessage {
return self.messageIndexer.index(message, transaction: transaction)
} else {
return nil
}
}
// MARK: - Extension Registration
private static let dbExtensionName: String = "FullTextSearchFinderExtension"
private func ext(transaction: YapDatabaseReadTransaction) -> YapDatabaseFullTextSearchTransaction? {
return transaction.ext(FullTextSearchFinder.dbExtensionName) as? YapDatabaseFullTextSearchTransaction
}
@objc
public class func asyncRegisterDatabaseExtension(storage: OWSStorage) {
storage.asyncRegister(dbExtensionConfig, withName: dbExtensionName)
}
// Only for testing.
public class func ensureDatabaseExtensionRegistered(storage: OWSStorage) {
guard storage.registeredExtension(dbExtensionName) == nil else {
return
}
storage.register(dbExtensionConfig, withName: dbExtensionName)
}
private class var dbExtensionConfig: YapDatabaseFullTextSearch {
let contentColumnName = "content"
let handler = YapDatabaseFullTextSearchHandler.withObjectBlock { (transaction: YapDatabaseReadTransaction, dict: NSMutableDictionary, _: String, _: String, object: Any) in
dict[contentColumnName] = indexContent(object: object, transaction: transaction)
}
// update search index on contact name changes?
return YapDatabaseFullTextSearch(columnNames: ["content"],
options: nil,
handler: handler,
ftsVersion: YapDatabaseFullTextSearchFTS5Version,
versionTag: "1")
}
}