session-ios/SessionMessagingKit/Database/Models/LinkPreview.swift
Morgan Pretty 93b54a3b7d Added logic for the GarbageCollectionJob
Fixed a bug where the GalleryRailView wasn't appearing
Fixed a database reentrancy error
Fixed a bug where the threadId for migrated attachmentUpload jobs wasn't getting set to the non-legacy version
Deleted the buggy overwritten 'delete' methods to avoid confusion
Updated the GroupMember table to cascade delete if it's thread is deleted (can't do so for the Closed/Open group but they share the same id conveniently)
Updated the 'isRunningTests' to be based on the existence of XCInjectBundleInfo which seems to be more consistent than the old approach
2022-06-02 14:13:07 +10:00

580 lines
23 KiB
Swift

// Copyright © 2022 Rangeproof Pty Ltd. All rights reserved.
import Foundation
import GRDB
import PromiseKit
import AFNetworking
import SignalCoreKit
import SessionUtilitiesKit
public struct LinkPreview: Codable, Equatable, Hashable, FetchableRecord, PersistableRecord, TableRecord, ColumnExpressible {
public static var databaseTableName: String { "linkPreview" }
internal static let interactionForeignKey = ForeignKey(
[Columns.url],
to: [Interaction.Columns.linkPreviewUrl]
)
internal static let interactions = hasMany(Interaction.self, using: Interaction.linkPreviewForeignKey)
public static let attachment = hasOne(Attachment.self, using: Attachment.linkPreviewForeignKey)
/// We want to cache url previews to the nearest 100,000 seconds (~28 hours - simpler than 86,400) to ensure the user isn't shown a preview that is too stale
internal static let timstampResolution: Double = 100000
public typealias Columns = CodingKeys
public enum CodingKeys: String, CodingKey, ColumnExpression {
case url
case timestamp
case variant
case title
case attachmentId
}
public enum Variant: Int, Codable, Hashable, DatabaseValueConvertible {
case standard
case openGroupInvitation
}
/// The url for the link preview
public let url: String
/// The number of seconds since epoch rounded down to the nearest 100,000 seconds (~day) - This
/// allows us to optimise against duplicate urls without having stale data last too long
public let timestamp: TimeInterval
/// The type of link preview
public let variant: Variant
/// The title for the link
public let title: String?
/// The id for the attachment for the link preview image
public let attachmentId: String?
// MARK: - Relationships
public var attachment: QueryInterfaceRequest<Attachment> {
request(for: LinkPreview.attachment)
}
// MARK: - Initialization
public init(
url: String,
timestamp: TimeInterval = LinkPreview.timestampFor(
sentTimestampMs: (Date().timeIntervalSince1970 * 1000) // Default to now
),
variant: Variant = .standard,
title: String?,
attachmentId: String? = nil
) {
self.url = url
self.timestamp = timestamp
self.variant = variant
self.title = title
self.attachmentId = attachmentId
}
}
// MARK: - Protobuf
public extension LinkPreview {
init?(_ db: Database, proto: SNProtoDataMessage, body: String?, sentTimestampMs: TimeInterval) throws {
guard let previewProto = proto.preview.first else { throw LinkPreviewError.noPreview }
guard proto.attachments.count < 1 else { throw LinkPreviewError.invalidInput }
guard URL(string: previewProto.url) != nil else { throw LinkPreviewError.invalidInput }
guard LinkPreview.isValidLinkUrl(previewProto.url) else { throw LinkPreviewError.invalidInput }
guard let body: String = body else { throw LinkPreviewError.invalidInput }
guard LinkPreview.allPreviewUrls(forMessageBodyText: body).contains(previewProto.url) else {
throw LinkPreviewError.invalidInput
}
// Try to get an existing link preview first
let timestamp: TimeInterval = LinkPreview.timestampFor(sentTimestampMs: sentTimestampMs)
let maybeLinkPreview: LinkPreview? = try? LinkPreview
.filter(LinkPreview.Columns.url == previewProto.url)
.filter(LinkPreview.Columns.timestamp == LinkPreview.timestampFor(
sentTimestampMs: Double(proto.timestamp)
))
.fetchOne(db)
if let linkPreview: LinkPreview = maybeLinkPreview {
self = linkPreview
return
}
self.url = previewProto.url
self.timestamp = timestamp
self.variant = .standard
self.title = LinkPreview.normalizeTitle(title: previewProto.title)
if let imageProto = previewProto.image {
let attachment: Attachment = Attachment(proto: imageProto)
try attachment.insert(db)
self.attachmentId = attachment.id
}
else {
self.attachmentId = nil
}
// Make sure the quote is valid before completing
guard self.title != nil || self.attachmentId != nil else { throw LinkPreviewError.invalidInput }
}
}
// MARK: - Convenience
public extension LinkPreview {
struct URLMatchResult {
let urlString: String
let matchRange: NSRange
}
static func timestampFor(sentTimestampMs: Double) -> TimeInterval {
// We want to round the timestamp down to the nearest 100,000 seconds (~28 hours - simpler
// than 86,400) to optimise LinkPreview storage without having too stale data
return (floor(sentTimestampMs / 1000 / LinkPreview.timstampResolution) * LinkPreview.timstampResolution)
}
static func saveAttachmentIfPossible(_ db: Database, imageData: Data?, mimeType: String) throws -> String? {
guard let imageData: Data = imageData, !imageData.isEmpty else { return nil }
guard let fileExtension: String = MIMETypeUtil.fileExtension(forMIMEType: mimeType) else { return nil }
let filePath = OWSFileSystem.temporaryFilePath(withFileExtension: fileExtension)
try imageData.write(to: NSURL.fileURL(withPath: filePath), options: .atomicWrite)
guard let dataSource = DataSourcePath.dataSource(withFilePath: filePath, shouldDeleteOnDeallocation: true) else {
return nil
}
return try Attachment(contentType: mimeType, dataSource: dataSource)?
.inserted(db)
.id
}
static func isValidLinkUrl(_ urlString: String) -> Bool {
return URL(string: urlString) != nil
}
static func allPreviewUrls(forMessageBodyText body: String) -> [String] {
return allPreviewUrlMatches(forMessageBodyText: body).map { $0.urlString }
}
// MARK: - Private Methods
private static func allPreviewUrlMatches(forMessageBodyText body: String) -> [URLMatchResult] {
let detector: NSDataDetector
do {
detector = try NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue)
}
catch {
return []
}
var urlMatches: [URLMatchResult] = []
let matches = detector.matches(in: body, options: [], range: NSRange(location: 0, length: body.count))
for match in matches {
guard let matchURL = match.url else { continue }
// If the URL entered didn't have a scheme it will default to 'http', we want to catch this and
// set the scheme to 'https' instead as we don't load previews for 'http' so this will result
// in more previews actually getting loaded without forcing the user to enter 'https://' before
// every URL they enter
let urlString: String = (matchURL.absoluteString == "http://\(body)" ?
"https://\(body)" :
matchURL.absoluteString
)
if isValidLinkUrl(urlString) {
let matchResult = URLMatchResult(urlString: urlString, matchRange: match.range)
urlMatches.append(matchResult)
}
}
return urlMatches
}
fileprivate static func normalizeTitle(title: String?) -> String? {
guard var result: String = title, !result.isEmpty else { return nil }
// Truncate title after 2 lines of text.
let maxLineCount = 2
var components = result.components(separatedBy: .newlines)
if components.count > maxLineCount {
components = Array(components[0..<maxLineCount])
result = components.joined(separator: "\n")
}
let maxCharacterCount = 2048
if result.count > maxCharacterCount {
let endIndex = result.index(result.startIndex, offsetBy: maxCharacterCount)
result = String(result[..<endIndex])
}
return result.filterStringForDisplay()
}
// MARK: - Text Parsing
private static var previewUrlCache: Atomic<NSCache<NSString, NSString>> = Atomic(NSCache())
static func previewUrl(for body: String?, selectedRange: NSRange? = nil) -> String? {
guard GRDBStorage.shared[.areLinkPreviewsEnabled] else { return nil }
guard let body: String = body else { return nil }
if let cachedUrl = previewUrlCache.wrappedValue.object(forKey: body as NSString) as String? {
guard cachedUrl.count > 0 else {
return nil
}
return cachedUrl
}
let previewUrlMatches: [URLMatchResult] = allPreviewUrlMatches(forMessageBodyText: body)
guard let urlMatch: URLMatchResult = previewUrlMatches.first else {
// Use empty string to indicate "no preview URL" in the cache.
previewUrlCache.mutate { $0.setObject("", forKey: body as NSString) }
return nil
}
if let selectedRange: NSRange = selectedRange {
let cursorAtEndOfMatch: Bool = (
(urlMatch.matchRange.location + urlMatch.matchRange.length) == selectedRange.location
)
if selectedRange.location != body.count, (urlMatch.matchRange.intersection(selectedRange) != nil || cursorAtEndOfMatch) {
// we don't want to cache the result here, as we want to fetch the link preview
// if the user moves the cursor.
return nil
}
}
previewUrlCache.mutate { $0.setObject(urlMatch.urlString as NSString, forKey: body as NSString) }
return urlMatch.urlString
}
}
// MARK: - Drafts
public extension LinkPreview {
private struct Contents {
public var title: String?
public var imageUrl: String?
public init(title: String?, imageUrl: String? = nil) {
self.title = title
self.imageUrl = imageUrl
}
}
private static let serialQueue = DispatchQueue(label: "org.signal.linkPreview")
// This cache should only be accessed on serialQueue.
//
// We should only maintain a "cache" of the last known draft.
private static var linkPreviewDraftCache: LinkPreviewDraft?
// Twitter doesn't return OpenGraph tags to Signal
// `curl -A Signal "https://twitter.com/signalapp/status/1280166087577997312?s=20"`
// If this ever changes, we can switch back to our default User-Agent
private static let userAgentString = "WhatsApp"
private static func cachedLinkPreview(forPreviewUrl previewUrl: String) -> LinkPreviewDraft? {
return serialQueue.sync {
guard let linkPreviewDraft = linkPreviewDraftCache,
linkPreviewDraft.urlString == previewUrl else {
return nil
}
return linkPreviewDraft
}
}
private static func setCachedLinkPreview(_ linkPreviewDraft: LinkPreviewDraft, forPreviewUrl previewUrl: String) {
assert(previewUrl == linkPreviewDraft.urlString)
// Exit early if link previews are not enabled in order to avoid
// tainting the cache.
guard GRDBStorage.shared[.areLinkPreviewsEnabled] else { return }
serialQueue.sync {
linkPreviewDraftCache = linkPreviewDraft
}
}
static func tryToBuildPreviewInfo(previewUrl: String?) -> Promise<LinkPreviewDraft> {
guard GRDBStorage.shared[.areLinkPreviewsEnabled] else {
return Promise(error: LinkPreviewError.featureDisabled)
}
guard let previewUrl: String = previewUrl else {
return Promise(error: LinkPreviewError.invalidInput)
}
if let cachedInfo = cachedLinkPreview(forPreviewUrl: previewUrl) {
return Promise.value(cachedInfo)
}
return downloadLink(url: previewUrl)
.then(on: DispatchQueue.global()) { data, response -> Promise<LinkPreviewDraft> in
return parseLinkDataAndBuildDraft(linkData: data, response: response, linkUrlString: previewUrl)
}
.then(on: DispatchQueue.global()) { linkPreviewDraft -> Promise<LinkPreviewDraft> in
guard linkPreviewDraft.isValid() else { throw LinkPreviewError.noPreview }
setCachedLinkPreview(linkPreviewDraft, forPreviewUrl: previewUrl)
return Promise.value(linkPreviewDraft)
}
}
private static func downloadLink(url urlString: String, remainingRetries: UInt = 3) -> Promise<(Data, URLResponse)> {
Logger.verbose("url: \(urlString)")
// let sessionConfiguration = ContentProxy.sessionConfiguration() // Loki: Signal's proxy appears to have been banned by YouTube
let sessionConfiguration = URLSessionConfiguration.ephemeral
// Don't use any caching to protect privacy of these requests.
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
sessionConfiguration.urlCache = nil
// FIXME: Refactor to stop using AFHTTPRequest
let sessionManager = AFHTTPSessionManager(baseURL: nil,
sessionConfiguration: sessionConfiguration)
sessionManager.requestSerializer = AFHTTPRequestSerializer()
sessionManager.responseSerializer = AFHTTPResponseSerializer()
guard ContentProxy.configureSessionManager(sessionManager: sessionManager, forUrl: urlString) else {
return Promise(error: LinkPreviewError.assertionFailure)
}
sessionManager.requestSerializer.setValue(self.userAgentString, forHTTPHeaderField: "User-Agent")
let (promise, resolver) = Promise<(Data, URLResponse)>.pending()
sessionManager.get(
urlString,
parameters: [String: AnyObject](),
headers: nil,
progress: nil,
success: { task, value in
guard let response = task.response as? HTTPURLResponse else {
resolver.reject(LinkPreviewError.assertionFailure)
return
}
if let contentType = response.allHeaderFields["Content-Type"] as? String {
guard contentType.lowercased().hasPrefix("text/") else {
resolver.reject(LinkPreviewError.invalidContent)
return
}
}
guard let data = value as? Data else {
resolver.reject(LinkPreviewError.assertionFailure)
return
}
guard data.count > 0 else {
resolver.reject(LinkPreviewError.invalidContent)
return
}
resolver.fulfill((data, response))
},
failure: { _, error in
guard isRetryable(error: error) else {
resolver.reject(LinkPreviewError.couldNotDownload)
return
}
guard remainingRetries > 0 else {
resolver.reject(LinkPreviewError.couldNotDownload)
return
}
LinkPreview.downloadLink(
url: urlString,
remainingRetries: (remainingRetries - 1)
)
.done(on: DispatchQueue.global()) { (data, response) in
resolver.fulfill((data, response))
}
.catch(on: DispatchQueue.global()) { (error) in
resolver.reject(error)
}
.retainUntilComplete()
}
)
return promise
}
private static func parseLinkDataAndBuildDraft(linkData: Data, response: URLResponse, linkUrlString: String) -> Promise<LinkPreviewDraft> {
do {
let contents = try parse(linkData: linkData, response: response)
let title = contents.title
guard let imageUrl = contents.imageUrl else {
return Promise.value(LinkPreviewDraft(urlString: linkUrlString, title: title))
}
guard URL(string: imageUrl) != nil else {
return Promise.value(LinkPreviewDraft(urlString: linkUrlString, title: title))
}
guard let imageFileExtension = fileExtension(forImageUrl: imageUrl) else {
return Promise.value(LinkPreviewDraft(urlString: linkUrlString, title: title))
}
guard let imageMimeType = mimetype(forImageFileExtension: imageFileExtension) else {
return Promise.value(LinkPreviewDraft(urlString: linkUrlString, title: title))
}
return downloadImage(url: imageUrl, imageMimeType: imageMimeType)
.map(on: DispatchQueue.global()) { (imageData: Data) -> LinkPreviewDraft in
// We always recompress images to Jpeg
LinkPreviewDraft(urlString: linkUrlString, title: title, jpegImageData: imageData)
}
.recover(on: DispatchQueue.global()) { _ -> Promise<LinkPreviewDraft> in
Promise.value(LinkPreviewDraft(urlString: linkUrlString, title: title))
}
} catch {
return Promise(error: error)
}
}
private static func parse(linkData: Data, response: URLResponse) throws -> Contents {
guard let linkText = String(data: linkData, urlResponse: response) else {
print("Could not parse link text.")
throw LinkPreviewError.invalidInput
}
let content = HTMLMetadata.construct(parsing: linkText)
var title: String?
let rawTitle = content.ogTitle ?? content.titleTag
if
let decodedTitle: String = decodeHTMLEntities(inString: rawTitle ?? ""),
let normalizedTitle: String = LinkPreview.normalizeTitle(title: decodedTitle),
normalizedTitle.count > 0
{
title = normalizedTitle
}
Logger.verbose("title: \(String(describing: title))")
guard let rawImageUrlString = content.ogImageUrlString ?? content.faviconUrlString else {
return Contents(title: title)
}
guard let imageUrlString = decodeHTMLEntities(inString: rawImageUrlString)?.ows_stripped() else {
return Contents(title: title)
}
return Contents(title: title, imageUrl: imageUrlString)
}
private static func downloadImage(url urlString: String, imageMimeType: String) -> Promise<Data> {
guard let url = URL(string: urlString) else { return Promise(error: LinkPreviewError.invalidInput) }
guard let assetDescription = ProxiedContentAssetDescription(url: url as NSURL) else {
return Promise(error: LinkPreviewError.invalidInput)
}
let (promise, resolver) = Promise<ProxiedContentAsset>.pending()
DispatchQueue.main.async {
_ = ProxiedContentDownloader.defaultDownloader.requestAsset(
assetDescription: assetDescription,
priority: .high,
success: { _, asset in
resolver.fulfill(asset)
},
failure: { _ in
resolver.reject(LinkPreviewError.couldNotDownload)
},
shouldIgnoreSignalProxy: true
)
}
return promise.then(on: DispatchQueue.global()) { (asset: ProxiedContentAsset) -> Promise<Data> in
do {
let imageSize = NSData.imageSize(forFilePath: asset.filePath, mimeType: imageMimeType)
guard imageSize.width > 0, imageSize.height > 0 else {
return Promise(error: LinkPreviewError.invalidContent)
}
let data = try Data(contentsOf: URL(fileURLWithPath: asset.filePath))
guard let srcImage = UIImage(data: data) else {
return Promise(error: LinkPreviewError.invalidContent)
}
// Loki: If it's a GIF then ensure its validity and don't download it as a JPG
if (imageMimeType == OWSMimeTypeImageGif && NSData(data: data).ows_isValidImage(withMimeType: OWSMimeTypeImageGif)) { return Promise.value(data) }
let maxImageSize: CGFloat = 1024
let shouldResize = imageSize.width > maxImageSize || imageSize.height > maxImageSize
guard shouldResize else {
guard let dstData = srcImage.jpegData(compressionQuality: 0.8) else {
return Promise(error: LinkPreviewError.invalidContent)
}
return Promise.value(dstData)
}
guard let dstImage = srcImage.resized(withMaxDimensionPoints: maxImageSize) else {
return Promise(error: LinkPreviewError.invalidContent)
}
guard let dstData = dstImage.jpegData(compressionQuality: 0.8) else {
return Promise(error: LinkPreviewError.invalidContent)
}
return Promise.value(dstData)
}
catch {
return Promise(error: LinkPreviewError.assertionFailure)
}
}
}
private static func isRetryable(error: Error) -> Bool {
if (error as NSError).domain == kCFErrorDomainCFNetwork as String {
// Network failures are retried.
return true
}
return false
}
private static func fileExtension(forImageUrl urlString: String) -> String? {
guard let imageUrl = URL(string: urlString) else { return nil }
let imageFilename = imageUrl.lastPathComponent
let imageFileExtension = (imageFilename as NSString).pathExtension.lowercased()
guard imageFileExtension.count > 0 else {
// TODO: For those links don't have a file extension, we should figure out a way to know the image mime type
return "png"
}
return imageFileExtension
}
private static func mimetype(forImageFileExtension imageFileExtension: String) -> String? {
guard imageFileExtension.count > 0 else { return nil }
guard let imageMimeType = MIMETypeUtil.mimeType(forFileExtension: imageFileExtension) else { return nil }
return imageMimeType
}
private static func decodeHTMLEntities(inString value: String) -> String? {
guard let data = value.data(using: .utf8) else { return nil }
let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [
.documentType: NSAttributedString.DocumentType.html,
.characterEncoding: String.Encoding.utf8.rawValue
]
guard let attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil) else {
return nil
}
return attributedString.string
}
}