From 8bfd4edb53fbb2f5a42f82906326ffe39f17a5c7 Mon Sep 17 00:00:00 2001 From: Ryan ZHAO Date: Wed, 4 Nov 2020 09:56:28 +1100 Subject: [PATCH 1/6] enable link preview for any sites --- .../src/Loki/API/Utilities/HTMLMetadata.swift | 119 +++++++++++++++ .../Interactions/OWSLinkPreview.swift | 142 ++---------------- .../src/Util/NSRegularExpression+SSK.swift | 53 +++++++ 3 files changed, 184 insertions(+), 130 deletions(-) create mode 100644 SignalServiceKit/src/Loki/API/Utilities/HTMLMetadata.swift diff --git a/SignalServiceKit/src/Loki/API/Utilities/HTMLMetadata.swift b/SignalServiceKit/src/Loki/API/Utilities/HTMLMetadata.swift new file mode 100644 index 000000000..5037faaa9 --- /dev/null +++ b/SignalServiceKit/src/Loki/API/Utilities/HTMLMetadata.swift @@ -0,0 +1,119 @@ +import Foundation + +public struct HTMLMetadata: Equatable { + /// Parsed from + var titleTag: String? + /// Parsed from <link rel="icon"...> + var faviconUrlString: String? + /// Parsed from <meta name="description"...> + var description: String? + /// Parsed from the og:title meta property + var ogTitle: String? + /// Parsed from the og:description meta property + var ogDescription: String? + /// Parsed from the og:image or og:image:url meta property + var ogImageUrlString: String? + /// Parsed from the og:published_time meta property + var ogPublishDateString: String? + /// Parsed from article:published_time meta property + var articlePublishDateString: String? + /// Parsed from the og:modified_time meta property + var ogModifiedDateString: String? + /// Parsed from the article:modified_time meta property + var articleModifiedDateString: String? + + static func construct(parsing rawHTML: String) -> HTMLMetadata { + let metaPropertyTags = Self.parseMetaProperties(in: rawHTML) + return HTMLMetadata( + titleTag: Self.parseTitleTag(in: rawHTML), + faviconUrlString: Self.parseFaviconUrlString(in: rawHTML), + description: Self.parseDescriptionTag(in: rawHTML), + ogTitle: metaPropertyTags["og:title"], + ogDescription: metaPropertyTags["og:description"], + ogImageUrlString: (metaPropertyTags["og:image"] ?? metaPropertyTags["og:image:url"]), + ogPublishDateString: metaPropertyTags["og:published_time"], + articlePublishDateString: metaPropertyTags["article:published_time"], + ogModifiedDateString: metaPropertyTags["og:modified_time"], + articleModifiedDateString: metaPropertyTags["article:modified_time"] + ) + } +} + +// MARK: - Parsing +extension HTMLMetadata { + + private static func parseTitleTag(in rawHTML: String) -> String? { + titleRegex + .firstMatchSet(in: rawHTML)? + .group(idx: 0) + .flatMap { decodeHTMLEntities(in: String($0)) } + } + + private static func parseFaviconUrlString(in rawHTML: String) -> String? { + guard let matchedTag = faviconRegex + .firstMatchSet(in: rawHTML) + .map({ String($0.fullString) }) else { return nil } + + return faviconUrlRegex + .parseFirstMatch(inText: matchedTag) + .flatMap { decodeHTMLEntities(in: String($0)) } + } + + private static func parseDescriptionTag(in rawHTML: String) -> String? { + guard let matchedTag = metaDescriptionRegex + .firstMatchSet(in: rawHTML) + .map({ String($0.fullString) }) else { return nil } + + return metaContentRegex + .parseFirstMatch(inText: matchedTag) + .flatMap { decodeHTMLEntities(in: String($0)) } + } + + private static func parseMetaProperties(in rawHTML: String) -> [String: String] { + metaPropertyRegex + .allMatchSets(in: rawHTML) + .reduce(into: [:]) { (builder, matchSet) in + guard let ogTypeSubstring = matchSet.group(idx: 0) else { return } + let ogType = String(ogTypeSubstring) + let fullTag = String(matchSet.fullString) + + // Exit early if we've already found a tag of this type + guard builder[ogType] == nil else { return } + guard let content = metaContentRegex.parseFirstMatch(inText: fullTag) else { return } + + builder[ogType] = decodeHTMLEntities(in: content) + } + } + + private static func decodeHTMLEntities(in string: String) -> String? { + guard let data = string.data(using: .utf8) else { + return nil + } + + let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [ + .documentType: NSAttributedString.DocumentType.html, + .characterEncoding: String.Encoding.utf8.rawValue + ] + + guard let attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil) else { + return nil + } + return attributedString.string + } +} + + // MARK: - Regular Expressions +extension HTMLMetadata { + static let titleRegex = regex(pattern: "<\\s*title[^>]*>(.*?)<\\s*/title[^>]*>") + static let faviconRegex = regex(pattern: "<\\s*link[^>]*rel\\s*=\\s*\"\\s*(shortcut\\s+)?icon\\s*\"[^>]*>") + static let faviconUrlRegex = regex(pattern: "href\\s*=\\s*\"([^\"]*)\"") + static let metaDescriptionRegex = regex(pattern: "<\\s*meta[^>]*name\\s*=\\s*\"\\s*description[^\"]*\"[^>]*>") + static let metaPropertyRegex = regex(pattern: "<\\s*meta[^>]*property\\s*=\\s*\"\\s*([^\"]+?)\"[^>]*>") + static let metaContentRegex = regex(pattern: "content\\s*=\\s*\"([^\"]*?)\"") + + static private func regex(pattern: String) -> NSRegularExpression { + try! NSRegularExpression( + pattern: pattern, + options: [.dotMatchesLineSeparators, .caseInsensitive]) + } +} diff --git a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift index 80faf4545..cd9e63987 100644 --- a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift +++ b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift @@ -291,83 +291,6 @@ public class OWSLinkPreview: MTLModel { return result.filterStringForDisplay() } - // MARK: - Whitelists - - // For link domains, we require an exact match - no subdomains allowed. - // - // Note that order matters in this whitelist since the logic for determining - // how to render link preview domains in displayDomain(...) uses the first match. - // We should list TLDs first and subdomains later. - private static let linkDomainWhitelist = [ - // YouTube - "youtube.com", - "www.youtube.com", - "m.youtube.com", - "youtu.be", - - // Reddit - "reddit.com", - "www.reddit.com", - "m.reddit.com", - // NOTE: We don't use redd.it. - - // Imgur - // - // NOTE: Subdomains are also used for content. - // - // For example, you can access "user/member" pages: https://sillygoose2.imgur.com/ - // A different member page can be accessed without a subdomain: https://imgur.com/user/SillyGoose2 - // - // I'm not sure we need to support these subdomains; they don't appear to be core functionality. - "imgur.com", - "www.imgur.com", - "m.imgur.com", - - // Instagram - "instagram.com", - "www.instagram.com", - "m.instagram.com", - - // Pinterest - "pinterest.com", - "www.pinterest.com", - "pin.it", - - // Giphy - "giphy.com", - "media.giphy.com", - "media1.giphy.com", - "media2.giphy.com", - "media3.giphy.com", - "gph.is" - ] - - // For media domains, we DO NOT require an exact match - subdomains are allowed. - private static let mediaDomainWhitelist = [ - // YouTube - "ytimg.com", - - // Reddit - "redd.it", - - // Imgur - "imgur.com", - - // Instagram - "cdninstagram.com", - "fbcdn.net", - - // Pinterest - "pinimg.com", - - // Giphy - "giphy.com" - ] - - private static let protocolWhitelist = [ - "https" - ] - @objc public func displayDomain() -> String? { return OWSLinkPreview.displayDomain(forUrl: urlString) @@ -383,13 +306,7 @@ public class OWSLinkPreview: MTLModel { owsFailDebug("Invalid url.") return nil } - guard let result = whitelistedDomain(forUrl: url, - domainWhitelist: OWSLinkPreview.linkDomainWhitelist, - allowSubdomains: false) else { - Logger.error("Missing domain.") - return nil - } - return result + return url.host } @objc @@ -397,9 +314,7 @@ public class OWSLinkPreview: MTLModel { guard let url = URL(string: urlString) else { return false } - return whitelistedDomain(forUrl: url, - domainWhitelist: OWSLinkPreview.linkDomainWhitelist, - allowSubdomains: false) != nil + return true } @objc @@ -407,36 +322,7 @@ public class OWSLinkPreview: MTLModel { guard let url = URL(string: urlString) else { return false } - return whitelistedDomain(forUrl: url, - domainWhitelist: OWSLinkPreview.mediaDomainWhitelist, - allowSubdomains: true) != nil - } - - private class func whitelistedDomain(forUrl url: URL, domainWhitelist: [String], allowSubdomains: Bool) -> String? { - guard let urlProtocol = url.scheme?.lowercased() else { - return nil - } - guard protocolWhitelist.contains(urlProtocol) else { - return nil - } - guard let domain = url.host?.lowercased() else { - return nil - } - guard url.path.count > 1 else { - // URL must have non-empty path. - return nil - } - - for whitelistedDomain in domainWhitelist { - if domain == whitelistedDomain.lowercased() { - return whitelistedDomain - } - if allowSubdomains, - domain.hasSuffix("." + whitelistedDomain.lowercased()) { - return whitelistedDomain - } - } - return nil + return true } // MARK: - Serial Queue @@ -812,31 +698,27 @@ public class OWSLinkPreview: MTLModel { } } - // Example: - // - // <meta property="og:title" content="Randomness is Random - Numberphile"> - // <meta property="og:image" content="https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg"> class func parse(linkData: Data) throws -> OWSLinkPreviewContents { guard let linkText = String(bytes: linkData, encoding: .utf8) else { owsFailDebug("Could not parse link text.") throw LinkPreviewError.invalidInput } + + let content = HTMLMetadata.construct(parsing: linkText) var title: String? - if let rawTitle = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:title\"\\s+[^>]*content\\s*=\\s*\"(.*?)\"\\s*[^>]*/?>", - text: linkText, - options: .dotMatchesLineSeparators) { - if let decodedTitle = decodeHTMLEntities(inString: rawTitle) { - let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle) - if normalizedTitle.count > 0 { - title = normalizedTitle - } + let rawTitle = content.ogTitle ?? content.titleTag + if let decodedTitle = decodeHTMLEntities(inString: rawTitle ?? "") { + let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle) + if normalizedTitle.count > 0 { + title = normalizedTitle } } + Logger.verbose("title: \(String(describing: title))") - guard let rawImageUrlString = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:image\"\\s+[^>]*content\\s*=\\s*\"(.*?)\"[^>]*/?>", text: linkText) else { + guard let rawImageUrlString = content.ogImageUrlString ?? content.faviconUrlString else { return OWSLinkPreviewContents(title: title) } guard let imageUrlString = decodeHTMLEntities(inString: rawImageUrlString)?.ows_stripped() else { diff --git a/SignalServiceKit/src/Util/NSRegularExpression+SSK.swift b/SignalServiceKit/src/Util/NSRegularExpression+SSK.swift index e4574467d..17ff74c78 100644 --- a/SignalServiceKit/src/Util/NSRegularExpression+SSK.swift +++ b/SignalServiceKit/src/Util/NSRegularExpression+SSK.swift @@ -52,4 +52,57 @@ public extension NSRegularExpression { let substring = String(text[textRange]) return substring } + + @nonobjc + func firstMatchSet(in searchString: String) -> MatchSet? { + firstMatch(in: searchString, options: [], range: searchString.completeNSRange)?.createMatchSet(originalSearchString: searchString) + } + + @nonobjc + func allMatchSets(in searchString: String) -> [MatchSet] { + matches(in: searchString, options: [], range: searchString.completeNSRange).compactMap { $0.createMatchSet(originalSearchString: searchString) } + } + } + +public struct MatchSet { + let fullString: Substring + let matchedGroups: [Substring?] + + func group(idx: Int) -> Substring? { + guard idx < matchedGroups.count else { return nil } + return matchedGroups[idx] + } +} + +fileprivate extension String { + subscript(_ nsRange: NSRange) -> Substring? { + guard let swiftRange = Range(nsRange, in: self) else { return nil } + return self[swiftRange] + } + + var completeRange: Range<String.Index> { + startIndex..<endIndex + } + + var completeNSRange: NSRange { + NSRange(completeRange, in: self) + } +} + +fileprivate extension NSTextCheckingResult { + func createMatchSet(originalSearchString string: String) -> MatchSet? { + guard numberOfRanges > 0 else { return nil } + let substrings = (0..<numberOfRanges) + .map { range(at: $0) } + .map { string[$0] } + + guard let fullString = substrings[0] else { + owsFailDebug("Missing expected full string") + return nil + } + + return MatchSet(fullString: fullString, matchedGroups: Array(substrings[1...])) + } +} + From f41e25701ab06ab0da7d1da575fcb7f7185ab7be Mon Sep 17 00:00:00 2001 From: Ryan ZHAO <ryanzhaors@qq.com> Date: Thu, 5 Nov 2020 16:58:19 +1100 Subject: [PATCH 2/6] ignore Signal's proxy for link preview image download --- .../Interactions/OWSLinkPreview.swift | 5 ++- .../Network/ProxiedContentDownloader.swift | 44 ++++++++++++++++--- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift index cd9e63987..bc6145ec3 100644 --- a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift +++ b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift @@ -606,7 +606,7 @@ public class OWSLinkPreview: MTLModel { }, failure: { (_) in Logger.warn("Error downloading asset") resolver.reject(LinkPreviewError.couldNotDownload) - }) + }, shouldIgnoreSignalProxy: true) } return promise.then(on: DispatchQueue.global()) { (asset: ProxiedContentAsset) -> Promise<Data> in do { @@ -736,7 +736,8 @@ public class OWSLinkPreview: MTLModel { let imageFilename = imageUrl.lastPathComponent let imageFileExtension = (imageFilename as NSString).pathExtension.lowercased() guard imageFileExtension.count > 0 else { - return nil + // TODO: For those links don't have a file extension, we should figure out a way to know the image mime type + return "png" } return imageFileExtension } diff --git a/SignalServiceKit/src/Network/ProxiedContentDownloader.swift b/SignalServiceKit/src/Network/ProxiedContentDownloader.swift index 5b6d2f23e..f8eed8fc0 100644 --- a/SignalServiceKit/src/Network/ProxiedContentDownloader.swift +++ b/SignalServiceKit/src/Network/ProxiedContentDownloader.swift @@ -149,7 +149,8 @@ public class ProxiedContentAssetRequest: NSObject { // the request succeeds or fails. private var success: ((ProxiedContentAssetRequest?, ProxiedContentAsset) -> Void)? private var failure: ((ProxiedContentAssetRequest) -> Void)? - + + var shouldIgnoreSignalProxy = false var wasCancelled = false // This property is an internal implementation detail of the download process. var assetFilePath: String? @@ -484,6 +485,21 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio delegateQueue: nil) return session }() + + private lazy var downloadSessionWithoutProxy: URLSession = { + AssertIsOnMainThread() + + let configuration = URLSessionConfiguration.ephemeral + // Don't use any caching to protect privacy of these requests. + configuration.urlCache = nil + configuration.requestCachePolicy = .reloadIgnoringCacheData + + configuration.httpMaximumConnectionsPerHost = 10 + let session = URLSession(configuration: configuration, + delegate: self, + delegateQueue: nil) + return session + }() // 100 entries of which at least half will probably be stills. // Actual animated GIFs will usually be less than 3 MB so the @@ -504,7 +520,8 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio public func requestAsset(assetDescription: ProxiedContentAssetDescription, priority: ProxiedContentRequestPriority, success:@escaping ((ProxiedContentAssetRequest?, ProxiedContentAsset) -> Void), - failure:@escaping ((ProxiedContentAssetRequest) -> Void)) -> ProxiedContentAssetRequest? { + failure:@escaping ((ProxiedContentAssetRequest) -> Void), + shouldIgnoreSignalProxy: Bool = false) -> ProxiedContentAssetRequest? { AssertIsOnMainThread() if let asset = assetMap.get(key: assetDescription.url) { @@ -522,6 +539,7 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio priority: priority, success: success, failure: failure) + assetRequest.shouldIgnoreSignalProxy = shouldIgnoreSignalProxy assetRequestQueue.append(assetRequest) // Process the queue (which may start this request) // asynchronously so that the caller has time to store @@ -676,10 +694,17 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio processRequestQueueSync() return } - - let task = downloadSession.dataTask(with: request, completionHandler: { data, response, error -> Void in - self.handleAssetSizeResponse(assetRequest: assetRequest, data: data, response: response, error: error) - }) + + var task: URLSessionDataTask + if (assetRequest.shouldIgnoreSignalProxy) { + task = downloadSessionWithoutProxy.dataTask(with: request, completionHandler: { data, response, error -> Void in + self.handleAssetSizeResponse(assetRequest: assetRequest, data: data, response: response, error: error) + }) + } else { + task = downloadSession.dataTask(with: request, completionHandler: { data, response, error -> Void in + self.handleAssetSizeResponse(assetRequest: assetRequest, data: data, response: response, error: error) + }) + } assetRequest.contentLengthTask = task task.resume() @@ -704,7 +729,12 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio return } - let task: URLSessionDataTask = downloadSession.dataTask(with: request) + var task: URLSessionDataTask + if (assetRequest.shouldIgnoreSignalProxy) { + task = downloadSessionWithoutProxy.dataTask(with: request) + } else { + task = downloadSession.dataTask(with: request) + } task.assetRequest = assetRequest task.assetSegment = assetSegment assetSegment.task = task From 4f3a8d93a74a64caf90006a9b304ab7e93122d23 Mon Sep 17 00:00:00 2001 From: Ryan ZHAO <ryanzhaors@qq.com> Date: Wed, 18 Nov 2020 10:00:19 +1100 Subject: [PATCH 3/6] deal with conflicts --- Podfile.lock | 2 +- Signal.xcodeproj/project.pbxproj | 4 + .../Interactions/OWSLinkPreview.swift | 790 ------------------ SignalUtilitiesKit/OWSLinkPreview.swift | 149 +--- 4 files changed, 21 insertions(+), 924 deletions(-) delete mode 100644 SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift diff --git a/Podfile.lock b/Podfile.lock index 54a6863c1..395bcf45e 100644 --- a/Podfile.lock +++ b/Podfile.lock @@ -232,4 +232,4 @@ SPEC CHECKSUMS: PODFILE CHECKSUM: 3489ed70ea51f2bf705bf99703efc71d697de373 -COCOAPODS: 1.10.0.rc.1 +COCOAPODS: 1.9.3 diff --git a/Signal.xcodeproj/project.pbxproj b/Signal.xcodeproj/project.pbxproj index 17796af1f..28182695f 100644 --- a/Signal.xcodeproj/project.pbxproj +++ b/Signal.xcodeproj/project.pbxproj @@ -234,6 +234,7 @@ 768A1A2B17FC9CD300E00ED8 /* libz.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 768A1A2A17FC9CD300E00ED8 /* libz.dylib */; }; 76C87F19181EFCE600C4ACAB /* MediaPlayer.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 76C87F18181EFCE600C4ACAB /* MediaPlayer.framework */; }; 76EB054018170B33006006FC /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 76EB03C318170B33006006FC /* AppDelegate.m */; }; + 7B4933F4256485E500160515 /* HTMLMetadata.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7B4933F3256485E400160515 /* HTMLMetadata.swift */; }; 7BC01A3E241F40AB00BC7C55 /* NotificationServiceExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BC01A3D241F40AB00BC7C55 /* NotificationServiceExtension.swift */; }; 7BC01A42241F40AB00BC7C55 /* SessionPushNotificationExtension.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 7BC01A3B241F40AB00BC7C55 /* SessionPushNotificationExtension.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; 7BDCFC08242186E700641C39 /* NotificationServiceExtensionContext.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BDCFC07242186E700641C39 /* NotificationServiceExtensionContext.swift */; }; @@ -1589,6 +1590,7 @@ 76C87F18181EFCE600C4ACAB /* MediaPlayer.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MediaPlayer.framework; path = System/Library/Frameworks/MediaPlayer.framework; sourceTree = SDKROOT; }; 76EB03C218170B33006006FC /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; }; 76EB03C318170B33006006FC /* AppDelegate.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; }; + 7B4933F3256485E400160515 /* HTMLMetadata.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = HTMLMetadata.swift; sourceTree = "<group>"; }; 7BC01A3B241F40AB00BC7C55 /* SessionPushNotificationExtension.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = SessionPushNotificationExtension.appex; sourceTree = BUILT_PRODUCTS_DIR; }; 7BC01A3D241F40AB00BC7C55 /* NotificationServiceExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = NotificationServiceExtension.swift; sourceTree = "<group>"; }; 7BC01A3F241F40AB00BC7C55 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; @@ -3355,6 +3357,7 @@ C33FD9AC255A548A00E217F9 /* SignalUtilitiesKit */ = { isa = PBXGroup; children = ( + 7B4933F3256485E400160515 /* HTMLMetadata.swift */, C33FDC09255A581D00E217F9 /* AccountServiceClient.swift */, C33FDBC3255A581700E217F9 /* AnyPromise+Conversion.swift */, C33FDB8A255A581200E217F9 /* AppContext.h */, @@ -5754,6 +5757,7 @@ C33FDCB3255A582000E217F9 /* TSContactThread.m in Sources */, C33FDC45255A581F00E217F9 /* AppVersion.m in Sources */, C33FDD2A255A582000E217F9 /* OWSMessageServiceParams.m in Sources */, + 7B4933F4256485E500160515 /* HTMLMetadata.swift in Sources */, C33FDC7F255A582000E217F9 /* OWSCountryMetadata.m in Sources */, C33FDC84255A582000E217F9 /* LokiMessage.swift in Sources */, C33FDC7C255A582000E217F9 /* TSAttachment.m in Sources */, diff --git a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift b/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift deleted file mode 100644 index bc6145ec3..000000000 --- a/SignalServiceKit/src/Messages/Interactions/OWSLinkPreview.swift +++ /dev/null @@ -1,790 +0,0 @@ -// -// Copyright (c) 2019 Open Whisper Systems. All rights reserved. -// - -import Foundation -import PromiseKit - -@objc -public enum LinkPreviewError: Int, Error { - case invalidInput - case noPreview - case assertionFailure - case couldNotDownload - case featureDisabled - case invalidContent - case invalidMediaContent - case attachmentFailedToSave -} - -// MARK: - OWSLinkPreviewDraft - -public class OWSLinkPreviewContents: NSObject { - @objc - public var title: String? - - @objc - public var imageUrl: String? - - public init(title: String?, imageUrl: String? = nil) { - self.title = title - self.imageUrl = imageUrl - - super.init() - } -} - -// This contains the info for a link preview "draft". -public class OWSLinkPreviewDraft: NSObject { - @objc - public var urlString: String - - @objc - public var title: String? - - @objc - public var jpegImageData: Data? - - public init(urlString: String, title: String?, jpegImageData: Data? = nil) { - self.urlString = urlString - self.title = title - self.jpegImageData = jpegImageData - - super.init() - } - - fileprivate func isValid() -> Bool { - var hasTitle = false - if let titleValue = title { - hasTitle = titleValue.count > 0 - } - let hasImage = jpegImageData != nil - return hasTitle || hasImage - } - - @objc - public func displayDomain() -> String? { - return OWSLinkPreview.displayDomain(forUrl: urlString) - } -} - -// MARK: - OWSLinkPreview - -@objc -public class OWSLinkPreview: MTLModel { - @objc - public static let featureEnabled = true - - @objc - public var urlString: String? - - @objc - public var title: String? - - @objc - public var imageAttachmentId: String? - - // Whether this preview can be rendered as an attachment - @objc - public var isDirectAttachment: Bool = false - - @objc - public init(urlString: String, title: String?, imageAttachmentId: String?, isDirectAttachment: Bool = false) { - self.urlString = urlString - self.title = title - self.imageAttachmentId = imageAttachmentId - self.isDirectAttachment = isDirectAttachment - - super.init() - } - - @objc - public override init() { - super.init() - } - - @objc - public required init!(coder: NSCoder) { - super.init(coder: coder) - } - - @objc - public required init(dictionary dictionaryValue: [String: Any]!) throws { - try super.init(dictionary: dictionaryValue) - } - - @objc - public class func isNoPreviewError(_ error: Error) -> Bool { - guard let error = error as? LinkPreviewError else { - return false - } - return error == .noPreview - } - - @objc - public class func isInvalidContentError(_ error: Error) -> Bool { - guard let error = error as? LinkPreviewError else { return false } - return error == .invalidContent - } - - @objc - public class func buildValidatedLinkPreview(dataMessage: SSKProtoDataMessage, - body: String?, - transaction: YapDatabaseReadWriteTransaction) throws -> OWSLinkPreview { - guard OWSLinkPreview.featureEnabled else { - throw LinkPreviewError.noPreview - } - guard let previewProto = dataMessage.preview.first else { - throw LinkPreviewError.noPreview - } - guard dataMessage.attachments.count < 1 else { - Logger.error("Discarding link preview; message has attachments.") - throw LinkPreviewError.invalidInput - } - let urlString = previewProto.url - - guard URL(string: urlString) != nil else { - Logger.error("Could not parse preview URL.") - throw LinkPreviewError.invalidInput - } - - guard let body = body else { - Logger.error("Preview for message without body.") - throw LinkPreviewError.invalidInput - } - let previewUrls = allPreviewUrls(forMessageBodyText: body) - guard previewUrls.contains(urlString) else { - Logger.error("URL not present in body.") - throw LinkPreviewError.invalidInput - } - - guard isValidLinkUrl(urlString) else { - Logger.verbose("Invalid link URL \(urlString).") - Logger.error("Invalid link URL.") - throw LinkPreviewError.invalidInput - } - - var title: String? - if let rawTitle = previewProto.title { - let normalizedTitle = OWSLinkPreview.normalizeTitle(title: rawTitle) - if normalizedTitle.count > 0 { - title = normalizedTitle - } - } - - var imageAttachmentId: String? - if let imageProto = previewProto.image { - if let imageAttachmentPointer = TSAttachmentPointer(fromProto: imageProto, albumMessage: nil) { - imageAttachmentPointer.save(with: transaction) - imageAttachmentId = imageAttachmentPointer.uniqueId - } else { - Logger.error("Could not parse image proto.") - throw LinkPreviewError.invalidInput - } - } - - let linkPreview = OWSLinkPreview(urlString: urlString, title: title, imageAttachmentId: imageAttachmentId) - - guard linkPreview.isValid() else { - Logger.error("Preview has neither title nor image.") - throw LinkPreviewError.invalidInput - } - - return linkPreview - } - - @objc - public class func buildValidatedLinkPreview(fromInfo info: OWSLinkPreviewDraft, - transaction: YapDatabaseReadWriteTransaction) throws -> OWSLinkPreview { - guard OWSLinkPreview.featureEnabled else { - throw LinkPreviewError.noPreview - } - guard SSKPreferences.areLinkPreviewsEnabled else { - throw LinkPreviewError.noPreview - } - let imageAttachmentId = OWSLinkPreview.saveAttachmentIfPossible(jpegImageData: info.jpegImageData, - transaction: transaction) - - let linkPreview = OWSLinkPreview(urlString: info.urlString, title: info.title, imageAttachmentId: imageAttachmentId) - - guard linkPreview.isValid() else { - owsFailDebug("Preview has neither title nor image.") - throw LinkPreviewError.invalidInput - } - - return linkPreview - } - - private class func saveAttachmentIfPossible(jpegImageData: Data?, - transaction: YapDatabaseReadWriteTransaction) -> String? { - return saveAttachmentIfPossible(imageData: jpegImageData, mimeType: OWSMimeTypeImageJpeg, transaction: transaction); - } - - private class func saveAttachmentIfPossible(imageData: Data?, mimeType: String, transaction: YapDatabaseReadWriteTransaction) -> String? { - guard let imageData = imageData else { return nil } - - let fileSize = imageData.count - guard fileSize > 0 else { - owsFailDebug("Invalid file size for image data.") - return nil - } - - guard let fileExtension = fileExtension(forMimeType: mimeType) else { return nil } - let filePath = OWSFileSystem.temporaryFilePath(withFileExtension: fileExtension) - do { - try imageData.write(to: NSURL.fileURL(withPath: filePath), options: .atomicWrite) - } catch let error as NSError { - owsFailDebug("file write failed: \(filePath), \(error)") - return nil - } - - guard let dataSource = DataSourcePath.dataSource(withFilePath: filePath, shouldDeleteOnDeallocation: true) else { - owsFailDebug("Could not create data source for path: \(filePath)") - return nil - } - let attachment = TSAttachmentStream(contentType: mimeType, byteCount: UInt32(fileSize), sourceFilename: nil, caption: nil, albumMessageId: nil) - guard attachment.write(dataSource) else { - owsFailDebug("Could not write data source for path: \(filePath)") - return nil - } - attachment.save(with: transaction) - - return attachment.uniqueId - } - - private func isValid() -> Bool { - var hasTitle = false - if let titleValue = title { - hasTitle = titleValue.count > 0 - } - let hasImage = imageAttachmentId != nil - return hasTitle || hasImage - } - - @objc - public func removeAttachment(transaction: YapDatabaseReadWriteTransaction) { - guard let imageAttachmentId = imageAttachmentId else { - owsFailDebug("No attachment id.") - return - } - guard let attachment = TSAttachment.fetch(uniqueId: imageAttachmentId, transaction: transaction) else { - owsFailDebug("Could not load attachment.") - return - } - attachment.remove(with: transaction) - } - - private class func normalizeTitle(title: String) -> String { - var result = title - // Truncate title after 2 lines of text. - let maxLineCount = 2 - var components = result.components(separatedBy: .newlines) - if components.count > maxLineCount { - components = Array(components[0..<maxLineCount]) - result = components.joined(separator: "\n") - } - let maxCharacterCount = 2048 - if result.count > maxCharacterCount { - let endIndex = result.index(result.startIndex, offsetBy: maxCharacterCount) - result = String(result[..<endIndex]) - } - return result.filterStringForDisplay() - } - - @objc - public func displayDomain() -> String? { - return OWSLinkPreview.displayDomain(forUrl: urlString) - } - - @objc - public class func displayDomain(forUrl urlString: String?) -> String? { - guard let urlString = urlString else { - owsFailDebug("Missing url.") - return nil - } - guard let url = URL(string: urlString) else { - owsFailDebug("Invalid url.") - return nil - } - return url.host - } - - @objc - public class func isValidLinkUrl(_ urlString: String) -> Bool { - guard let url = URL(string: urlString) else { - return false - } - return true - } - - @objc - public class func isValidMediaUrl(_ urlString: String) -> Bool { - guard let url = URL(string: urlString) else { - return false - } - return true - } - - // MARK: - Serial Queue - - private static let serialQueue = DispatchQueue(label: "org.signal.linkPreview") - - private class func assertIsOnSerialQueue() { - if _isDebugAssertConfiguration(), #available(iOS 10.0, *) { - assertOnQueue(serialQueue) - } - } - - // MARK: - Text Parsing - - // This cache should only be accessed on main thread. - private static var previewUrlCache: NSCache<NSString, NSString> = NSCache() - - @objc - public class func previewUrl(forRawBodyText body: String?, selectedRange: NSRange) -> String? { - return previewUrl(forMessageBodyText: body, selectedRange: selectedRange) - } - - @objc - public class func previewURL(forRawBodyText body: String?) -> String? { - return previewUrl(forMessageBodyText: body, selectedRange: nil) - } - - public class func previewUrl(forMessageBodyText body: String?, selectedRange: NSRange?) -> String? { - AssertIsOnMainThread() - - // Exit early if link previews are not enabled in order to avoid - // tainting the cache. - guard OWSLinkPreview.featureEnabled else { - return nil - } - - guard SSKPreferences.areLinkPreviewsEnabled else { - return nil - } - - guard let body = body else { - return nil - } - - if let cachedUrl = previewUrlCache.object(forKey: body as NSString) as String? { - Logger.verbose("URL parsing cache hit.") - guard cachedUrl.count > 0 else { - return nil - } - return cachedUrl - } - let previewUrlMatches = allPreviewUrlMatches(forMessageBodyText: body) - guard let urlMatch = previewUrlMatches.first else { - // Use empty string to indicate "no preview URL" in the cache. - previewUrlCache.setObject("", forKey: body as NSString) - return nil - } - - if let selectedRange = selectedRange { - Logger.verbose("match: urlString: \(urlMatch.urlString) range: \(urlMatch.matchRange) selectedRange: \(selectedRange)") - let cursorAtEndOfMatch = urlMatch.matchRange.location + urlMatch.matchRange.length == selectedRange.location - if selectedRange.location != body.count, - (urlMatch.matchRange.intersection(selectedRange) != nil || cursorAtEndOfMatch) { - Logger.debug("ignoring URL, since the user is currently editing it.") - // we don't want to cache the result here, as we want to fetch the link preview - // if the user moves the cursor. - return nil - } - Logger.debug("considering URL, since the user is not currently editing it.") - } - - previewUrlCache.setObject(urlMatch.urlString as NSString, forKey: body as NSString) - return urlMatch.urlString - } - - struct URLMatchResult { - let urlString: String - let matchRange: NSRange - } - - class func allPreviewUrls(forMessageBodyText body: String) -> [String] { - return allPreviewUrlMatches(forMessageBodyText: body).map { $0.urlString } - } - - class func allPreviewUrlMatches(forMessageBodyText body: String) -> [URLMatchResult] { - guard OWSLinkPreview.featureEnabled else { - return [] - } - guard SSKPreferences.areLinkPreviewsEnabled else { - return [] - } - - let detector: NSDataDetector - do { - detector = try NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue) - } catch { - owsFailDebug("Could not create NSDataDetector: \(error).") - return [] - } - - var urlMatches: [URLMatchResult] = [] - let matches = detector.matches(in: body, options: [], range: NSRange(location: 0, length: body.count)) - for match in matches { - guard let matchURL = match.url else { - owsFailDebug("Match missing url") - continue - } - let urlString = matchURL.absoluteString - if isValidLinkUrl(urlString) { - let matchResult = URLMatchResult(urlString: urlString, matchRange: match.range) - urlMatches.append(matchResult) - } - } - return urlMatches - } - - // MARK: - Preview Construction - - // This cache should only be accessed on serialQueue. - // - // We should only maintain a "cache" of the last known draft. - private static var linkPreviewDraftCache: OWSLinkPreviewDraft? - - private class func cachedLinkPreview(forPreviewUrl previewUrl: String) -> OWSLinkPreviewDraft? { - return serialQueue.sync { - guard let linkPreviewDraft = linkPreviewDraftCache, - linkPreviewDraft.urlString == previewUrl else { - Logger.verbose("----- Cache miss.") - return nil - } - Logger.verbose("----- Cache hit.") - return linkPreviewDraft - } - } - - private class func setCachedLinkPreview(_ linkPreviewDraft: OWSLinkPreviewDraft, - forPreviewUrl previewUrl: String) { - assert(previewUrl == linkPreviewDraft.urlString) - - // Exit early if link previews are not enabled in order to avoid - // tainting the cache. - guard OWSLinkPreview.featureEnabled else { - return - } - guard SSKPreferences.areLinkPreviewsEnabled else { - return - } - - serialQueue.sync { - linkPreviewDraftCache = linkPreviewDraft - } - } - - @objc - public class func tryToBuildPreviewInfoObjc(previewUrl: String?) -> AnyPromise { - return AnyPromise(tryToBuildPreviewInfo(previewUrl: previewUrl)) - } - - public class func tryToBuildPreviewInfo(previewUrl: String?) -> Promise<OWSLinkPreviewDraft> { - guard OWSLinkPreview.featureEnabled else { - return Promise(error: LinkPreviewError.featureDisabled) - } - guard SSKPreferences.areLinkPreviewsEnabled else { - return Promise(error: LinkPreviewError.featureDisabled) - } - guard let previewUrl = previewUrl else { - return Promise(error: LinkPreviewError.invalidInput) - } - if let cachedInfo = cachedLinkPreview(forPreviewUrl: previewUrl) { - Logger.verbose("Link preview info cache hit.") - return Promise.value(cachedInfo) - } - return downloadLink(url: previewUrl) - .then(on: DispatchQueue.global()) { (data) -> Promise<OWSLinkPreviewDraft> in - return parseLinkDataAndBuildDraft(linkData: data, linkUrlString: previewUrl) - }.then(on: DispatchQueue.global()) { (linkPreviewDraft) -> Promise<OWSLinkPreviewDraft> in - guard linkPreviewDraft.isValid() else { - throw LinkPreviewError.noPreview - } - setCachedLinkPreview(linkPreviewDraft, forPreviewUrl: previewUrl) - - return Promise.value(linkPreviewDraft) - } - } - - class func downloadLink(url urlString: String, - remainingRetries: UInt = 3) -> Promise<Data> { - - Logger.verbose("url: \(urlString)") - - // let sessionConfiguration = ContentProxy.sessionConfiguration() // Loki: Signal's proxy appears to have been banned by YouTube - let sessionConfiguration = URLSessionConfiguration.ephemeral - - // Don't use any caching to protect privacy of these requests. - sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData - sessionConfiguration.urlCache = nil - - let sessionManager = AFHTTPSessionManager(baseURL: nil, - sessionConfiguration: sessionConfiguration) - sessionManager.requestSerializer = AFHTTPRequestSerializer() - sessionManager.responseSerializer = AFHTTPResponseSerializer() - - guard ContentProxy.configureSessionManager(sessionManager: sessionManager, forUrl: urlString) else { - owsFailDebug("Could not configure url: \(urlString).") - return Promise(error: LinkPreviewError.assertionFailure) - } - - let (promise, resolver) = Promise<Data>.pending() - sessionManager.get(urlString, - parameters: [String: AnyObject](), - progress: nil, - success: { task, value in - - guard let response = task.response as? HTTPURLResponse else { - Logger.warn("Invalid response: \(type(of: task.response)).") - resolver.reject(LinkPreviewError.assertionFailure) - return - } - if let contentType = response.allHeaderFields["Content-Type"] as? String { - guard contentType.lowercased().hasPrefix("text/") else { - Logger.warn("Invalid content type: \(contentType).") - resolver.reject(LinkPreviewError.invalidContent) - return - } - } - guard let data = value as? Data else { - Logger.warn("Result is not data: \(type(of: value)).") - resolver.reject(LinkPreviewError.assertionFailure) - return - } - guard data.count > 0 else { - Logger.warn("Empty data: \(type(of: value)).") - resolver.reject(LinkPreviewError.invalidContent) - return - } - resolver.fulfill(data) - }, - failure: { _, error in - Logger.verbose("Error: \(error)") - - guard isRetryable(error: error) else { - Logger.warn("Error is not retryable.") - resolver.reject(LinkPreviewError.couldNotDownload) - return - } - - guard remainingRetries > 0 else { - Logger.warn("No more retries.") - resolver.reject(LinkPreviewError.couldNotDownload) - return - } - OWSLinkPreview.downloadLink(url: urlString, remainingRetries: remainingRetries - 1) - .done(on: DispatchQueue.global()) { (data) in - resolver.fulfill(data) - }.catch(on: DispatchQueue.global()) { (error) in - resolver.reject(error) - }.retainUntilComplete() - }) - return promise - } - - private class func downloadImage(url urlString: String, imageMimeType: String) -> Promise<Data> { - - Logger.verbose("url: \(urlString)") - - guard let url = URL(string: urlString) else { - Logger.error("Could not parse URL.") - return Promise(error: LinkPreviewError.invalidInput) - } - - guard let assetDescription = ProxiedContentAssetDescription(url: url as NSURL) else { - Logger.error("Could not create asset description.") - return Promise(error: LinkPreviewError.invalidInput) - } - let (promise, resolver) = Promise<ProxiedContentAsset>.pending() - DispatchQueue.main.async { - _ = ProxiedContentDownloader.defaultDownloader.requestAsset(assetDescription: assetDescription, - priority: .high, - success: { (_, asset) in - resolver.fulfill(asset) - }, failure: { (_) in - Logger.warn("Error downloading asset") - resolver.reject(LinkPreviewError.couldNotDownload) - }, shouldIgnoreSignalProxy: true) - } - return promise.then(on: DispatchQueue.global()) { (asset: ProxiedContentAsset) -> Promise<Data> in - do { - let imageSize = NSData.imageSize(forFilePath: asset.filePath, mimeType: imageMimeType) - guard imageSize.width > 0, imageSize.height > 0 else { - Logger.error("Link preview is invalid or has invalid size.") - return Promise(error: LinkPreviewError.invalidContent) - } - let data = try Data(contentsOf: URL(fileURLWithPath: asset.filePath)) - - guard let srcImage = UIImage(data: data) else { - Logger.error("Could not parse image.") - return Promise(error: LinkPreviewError.invalidContent) - } - - // Loki: If it's a GIF then ensure its validity and don't download it as a JPG - if (imageMimeType == OWSMimeTypeImageGif && NSData(data: data).ows_isValidImage(withMimeType: OWSMimeTypeImageGif)) { return Promise.value(data) } - - let maxImageSize: CGFloat = 1024 - let shouldResize = imageSize.width > maxImageSize || imageSize.height > maxImageSize - guard shouldResize else { - guard let dstData = srcImage.jpegData(compressionQuality: 0.8) else { - Logger.error("Could not write resized image.") - return Promise(error: LinkPreviewError.invalidContent) - } - return Promise.value(dstData) - } - - guard let dstImage = srcImage.resized(withMaxDimensionPoints: maxImageSize) else { - Logger.error("Could not resize image.") - return Promise(error: LinkPreviewError.invalidContent) - } - guard let dstData = dstImage.jpegData(compressionQuality: 0.8) else { - Logger.error("Could not write resized image.") - return Promise(error: LinkPreviewError.invalidContent) - } - return Promise.value(dstData) - } catch { - owsFailDebug("Could not load asset data: \(type(of: asset.filePath)).") - return Promise(error: LinkPreviewError.assertionFailure) - } - } - } - - private class func isRetryable(error: Error) -> Bool { - let nsError = error as NSError - if nsError.domain == kCFErrorDomainCFNetwork as String { - // Network failures are retried. - return true - } - return false - } - - class func parseLinkDataAndBuildDraft(linkData: Data, - linkUrlString: String) -> Promise<OWSLinkPreviewDraft> { - do { - let contents = try parse(linkData: linkData) - - let title = contents.title - guard let imageUrl = contents.imageUrl else { - return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) - } - - guard isValidMediaUrl(imageUrl) else { - Logger.error("Invalid image URL.") - return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) - } - guard let imageFileExtension = fileExtension(forImageUrl: imageUrl) else { - Logger.error("Image URL has unknown or invalid file extension: \(imageUrl).") - return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) - } - guard let imageMimeType = mimetype(forImageFileExtension: imageFileExtension) else { - Logger.error("Image URL has unknown or invalid content type: \(imageUrl).") - return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) - } - - return downloadImage(url: imageUrl, imageMimeType: imageMimeType) - .map(on: DispatchQueue.global()) { (imageData: Data) -> OWSLinkPreviewDraft in - // We always recompress images to Jpeg. - let linkPreviewDraft = OWSLinkPreviewDraft(urlString: linkUrlString, title: title, jpegImageData: imageData) - return linkPreviewDraft - } - .recover(on: DispatchQueue.global()) { (_) -> Promise<OWSLinkPreviewDraft> in - return Promise.value(OWSLinkPreviewDraft(urlString: linkUrlString, title: title)) - } - } catch { - owsFailDebug("Could not parse link data: \(error).") - return Promise(error: error) - } - } - - class func parse(linkData: Data) throws -> OWSLinkPreviewContents { - guard let linkText = String(bytes: linkData, encoding: .utf8) else { - owsFailDebug("Could not parse link text.") - throw LinkPreviewError.invalidInput - } - - let content = HTMLMetadata.construct(parsing: linkText) - - var title: String? - let rawTitle = content.ogTitle ?? content.titleTag - if let decodedTitle = decodeHTMLEntities(inString: rawTitle ?? "") { - let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle) - if normalizedTitle.count > 0 { - title = normalizedTitle - } - } - - - Logger.verbose("title: \(String(describing: title))") - - guard let rawImageUrlString = content.ogImageUrlString ?? content.faviconUrlString else { - return OWSLinkPreviewContents(title: title) - } - guard let imageUrlString = decodeHTMLEntities(inString: rawImageUrlString)?.ows_stripped() else { - return OWSLinkPreviewContents(title: title) - } - - return OWSLinkPreviewContents(title: title, imageUrl: imageUrlString) - } - - class func fileExtension(forImageUrl urlString: String) -> String? { - guard let imageUrl = URL(string: urlString) else { - Logger.error("Could not parse image URL.") - return nil - } - let imageFilename = imageUrl.lastPathComponent - let imageFileExtension = (imageFilename as NSString).pathExtension.lowercased() - guard imageFileExtension.count > 0 else { - // TODO: For those links don't have a file extension, we should figure out a way to know the image mime type - return "png" - } - return imageFileExtension - } - - class func fileExtension(forMimeType mimeType: String) -> String? { - switch mimeType { - case OWSMimeTypeImageGif: return "gif" - case OWSMimeTypeImagePng: return "png" - case OWSMimeTypeImageJpeg: return "jpg" - default: return nil - } - } - - class func mimetype(forImageFileExtension imageFileExtension: String) -> String? { - guard imageFileExtension.count > 0 else { - return nil - } - guard let imageMimeType = MIMETypeUtil.mimeType(forFileExtension: imageFileExtension) else { - Logger.error("Image URL has unknown content type: \(imageFileExtension).") - return nil - } - let kValidMimeTypes = [ - OWSMimeTypeImagePng, - OWSMimeTypeImageJpeg, - OWSMimeTypeImageGif, - ] - guard kValidMimeTypes.contains(imageMimeType) else { - Logger.error("Image URL has invalid content type: \(imageMimeType).") - return nil - } - return imageMimeType - } - - private class func decodeHTMLEntities(inString value: String) -> String? { - guard let data = value.data(using: .utf8) else { - return nil - } - - let options: [NSAttributedString.DocumentReadingOptionKey: Any] = [ - NSAttributedString.DocumentReadingOptionKey.documentType: NSAttributedString.DocumentType.html, - NSAttributedString.DocumentReadingOptionKey.characterEncoding: String.Encoding.utf8.rawValue - ] - - guard let attributedString = try? NSAttributedString(data: data, options: options, documentAttributes: nil) else { - return nil - } - - return attributedString.string - } -} diff --git a/SignalUtilitiesKit/OWSLinkPreview.swift b/SignalUtilitiesKit/OWSLinkPreview.swift index 656f43657..8f2d9977f 100644 --- a/SignalUtilitiesKit/OWSLinkPreview.swift +++ b/SignalUtilitiesKit/OWSLinkPreview.swift @@ -291,83 +291,6 @@ public class OWSLinkPreview: MTLModel { return result.filterStringForDisplay() } - // MARK: - Whitelists - - // For link domains, we require an exact match - no subdomains allowed. - // - // Note that order matters in this whitelist since the logic for determining - // how to render link preview domains in displayDomain(...) uses the first match. - // We should list TLDs first and subdomains later. - private static let linkDomainWhitelist = [ - // YouTube - "youtube.com", - "www.youtube.com", - "m.youtube.com", - "youtu.be", - - // Reddit - "reddit.com", - "www.reddit.com", - "m.reddit.com", - // NOTE: We don't use redd.it. - - // Imgur - // - // NOTE: Subdomains are also used for content. - // - // For example, you can access "user/member" pages: https://sillygoose2.imgur.com/ - // A different member page can be accessed without a subdomain: https://imgur.com/user/SillyGoose2 - // - // I'm not sure we need to support these subdomains; they don't appear to be core functionality. - "imgur.com", - "www.imgur.com", - "m.imgur.com", - - // Instagram - "instagram.com", - "www.instagram.com", - "m.instagram.com", - - // Pinterest - "pinterest.com", - "www.pinterest.com", - "pin.it", - - // Giphy - "giphy.com", - "media.giphy.com", - "media1.giphy.com", - "media2.giphy.com", - "media3.giphy.com", - "gph.is" - ] - - // For media domains, we DO NOT require an exact match - subdomains are allowed. - private static let mediaDomainWhitelist = [ - // YouTube - "ytimg.com", - - // Reddit - "redd.it", - - // Imgur - "imgur.com", - - // Instagram - "cdninstagram.com", - "fbcdn.net", - - // Pinterest - "pinimg.com", - - // Giphy - "giphy.com" - ] - - private static let protocolWhitelist = [ - "https" - ] - @objc public func displayDomain() -> String? { return OWSLinkPreview.displayDomain(forUrl: urlString) @@ -383,13 +306,7 @@ public class OWSLinkPreview: MTLModel { owsFailDebug("Invalid url.") return nil } - guard let result = whitelistedDomain(forUrl: url, - domainWhitelist: OWSLinkPreview.linkDomainWhitelist, - allowSubdomains: false) else { - Logger.error("Missing domain.") - return nil - } - return result + return url.host } @objc @@ -397,9 +314,7 @@ public class OWSLinkPreview: MTLModel { guard let url = URL(string: urlString) else { return false } - return whitelistedDomain(forUrl: url, - domainWhitelist: OWSLinkPreview.linkDomainWhitelist, - allowSubdomains: false) != nil + return true } @objc @@ -407,36 +322,7 @@ public class OWSLinkPreview: MTLModel { guard let url = URL(string: urlString) else { return false } - return whitelistedDomain(forUrl: url, - domainWhitelist: OWSLinkPreview.mediaDomainWhitelist, - allowSubdomains: true) != nil - } - - private class func whitelistedDomain(forUrl url: URL, domainWhitelist: [String], allowSubdomains: Bool) -> String? { - guard let urlProtocol = url.scheme?.lowercased() else { - return nil - } - guard protocolWhitelist.contains(urlProtocol) else { - return nil - } - guard let domain = url.host?.lowercased() else { - return nil - } - guard url.path.count > 1 else { - // URL must have non-empty path. - return nil - } - - for whitelistedDomain in domainWhitelist { - if domain == whitelistedDomain.lowercased() { - return whitelistedDomain - } - if allowSubdomains, - domain.hasSuffix("." + whitelistedDomain.lowercased()) { - return whitelistedDomain - } - } - return nil + return true } // MARK: - Serial Queue @@ -647,7 +533,7 @@ public class OWSLinkPreview: MTLModel { let (promise, resolver) = Promise<Data>.pending() sessionManager.get(urlString, parameters: [String: AnyObject](), - headers: nil, + headers: [:], progress: nil, success: { task, value in @@ -721,7 +607,7 @@ public class OWSLinkPreview: MTLModel { }, failure: { (_) in Logger.warn("Error downloading asset") resolver.reject(LinkPreviewError.couldNotDownload) - }) + }, shouldIgnoreSignalProxy: true) } return promise.then(on: DispatchQueue.global()) { (asset: ProxiedContentAsset) -> Promise<Data> in do { @@ -813,31 +699,27 @@ public class OWSLinkPreview: MTLModel { } } - // Example: - // - // <meta property="og:title" content="Randomness is Random - Numberphile"> - // <meta property="og:image" content="https://i.ytimg.com/vi/tP-Ipsat90c/maxresdefault.jpg"> class func parse(linkData: Data) throws -> OWSLinkPreviewContents { guard let linkText = String(bytes: linkData, encoding: .utf8) else { owsFailDebug("Could not parse link text.") throw LinkPreviewError.invalidInput } + + let content = HTMLMetadata.construct(parsing: linkText) var title: String? - if let rawTitle = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:title\"\\s+[^>]*content\\s*=\\s*\"(.*?)\"\\s*[^>]*/?>", - text: linkText, - options: .dotMatchesLineSeparators) { - if let decodedTitle = decodeHTMLEntities(inString: rawTitle) { - let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle) - if normalizedTitle.count > 0 { - title = normalizedTitle - } + let rawTitle = content.ogTitle ?? content.titleTag + if let decodedTitle = decodeHTMLEntities(inString: rawTitle ?? "") { + let normalizedTitle = OWSLinkPreview.normalizeTitle(title: decodedTitle) + if normalizedTitle.count > 0 { + title = normalizedTitle } } + Logger.verbose("title: \(String(describing: title))") - guard let rawImageUrlString = NSRegularExpression.parseFirstMatch(pattern: "<meta\\s+property\\s*=\\s*\"og:image\"\\s+[^>]*content\\s*=\\s*\"(.*?)\"[^>]*/?>", text: linkText) else { + guard let rawImageUrlString = content.ogImageUrlString ?? content.faviconUrlString else { return OWSLinkPreviewContents(title: title) } guard let imageUrlString = decodeHTMLEntities(inString: rawImageUrlString)?.ows_stripped() else { @@ -855,7 +737,8 @@ public class OWSLinkPreview: MTLModel { let imageFilename = imageUrl.lastPathComponent let imageFileExtension = (imageFilename as NSString).pathExtension.lowercased() guard imageFileExtension.count > 0 else { - return nil + // TODO: For those links don't have a file extension, we should figure out a way to know the image mime type + return "png" } return imageFileExtension } From e35756609b5a96a6dcd5c3163de9d57792685c62 Mon Sep 17 00:00:00 2001 From: Ryan ZHAO <ryanzhaors@qq.com> Date: Wed, 18 Nov 2020 11:56:11 +1100 Subject: [PATCH 4/6] fix crash --- SignalUtilitiesKit/OWSLinkPreview.swift | 32 +++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/SignalUtilitiesKit/OWSLinkPreview.swift b/SignalUtilitiesKit/OWSLinkPreview.swift index 8f2d9977f..ac278707e 100644 --- a/SignalUtilitiesKit/OWSLinkPreview.swift +++ b/SignalUtilitiesKit/OWSLinkPreview.swift @@ -496,8 +496,8 @@ public class OWSLinkPreview: MTLModel { return Promise.value(cachedInfo) } return downloadLink(url: previewUrl) - .then(on: DispatchQueue.global()) { (data) -> Promise<OWSLinkPreviewDraft> in - return parseLinkDataAndBuildDraft(linkData: data, linkUrlString: previewUrl) + .then(on: DispatchQueue.global()) { (data, response) -> Promise<OWSLinkPreviewDraft> in + return parseLinkDataAndBuildDraft(linkData: data, response: response, linkUrlString: previewUrl) }.then(on: DispatchQueue.global()) { (linkPreviewDraft) -> Promise<OWSLinkPreviewDraft> in guard linkPreviewDraft.isValid() else { throw LinkPreviewError.noPreview @@ -507,9 +507,14 @@ public class OWSLinkPreview: MTLModel { return Promise.value(linkPreviewDraft) } } + + // Twitter doesn't return OpenGraph tags to Signal + // `curl -A Signal "https://twitter.com/signalapp/status/1280166087577997312?s=20"` + // If this ever changes, we can switch back to our default User-Agent + private static let userAgentString = "WhatsApp" class func downloadLink(url urlString: String, - remainingRetries: UInt = 3) -> Promise<Data> { + remainingRetries: UInt = 3) -> Promise<(Data, URLResponse)> { Logger.verbose("url: \(urlString)") @@ -529,11 +534,13 @@ public class OWSLinkPreview: MTLModel { owsFailDebug("Could not configure url: \(urlString).") return Promise(error: LinkPreviewError.assertionFailure) } + + sessionManager.requestSerializer.setValue(self.userAgentString, forHTTPHeaderField: "User-Agent") - let (promise, resolver) = Promise<Data>.pending() + let (promise, resolver) = Promise<(Data, URLResponse)>.pending() sessionManager.get(urlString, parameters: [String: AnyObject](), - headers: [:], + headers: nil, progress: nil, success: { task, value in @@ -559,7 +566,7 @@ public class OWSLinkPreview: MTLModel { resolver.reject(LinkPreviewError.invalidContent) return } - resolver.fulfill(data) + resolver.fulfill((data, response)) }, failure: { _, error in Logger.verbose("Error: \(error)") @@ -576,8 +583,8 @@ public class OWSLinkPreview: MTLModel { return } OWSLinkPreview.downloadLink(url: urlString, remainingRetries: remainingRetries - 1) - .done(on: DispatchQueue.global()) { (data) in - resolver.fulfill(data) + .done(on: DispatchQueue.global()) { (data, response) in + resolver.fulfill((data, response)) }.catch(on: DispatchQueue.global()) { (error) in resolver.reject(error) }.retainUntilComplete() @@ -662,9 +669,10 @@ public class OWSLinkPreview: MTLModel { } class func parseLinkDataAndBuildDraft(linkData: Data, + response: URLResponse, linkUrlString: String) -> Promise<OWSLinkPreviewDraft> { do { - let contents = try parse(linkData: linkData) + let contents = try parse(linkData: linkData, response: response) let title = contents.title guard let imageUrl = contents.imageUrl else { @@ -699,9 +707,9 @@ public class OWSLinkPreview: MTLModel { } } - class func parse(linkData: Data) throws -> OWSLinkPreviewContents { - guard let linkText = String(bytes: linkData, encoding: .utf8) else { - owsFailDebug("Could not parse link text.") + class func parse(linkData: Data, response: URLResponse) throws -> OWSLinkPreviewContents { + guard let linkText = String(data: linkData, urlResponse: response) else { + print("Could not parse link text.") throw LinkPreviewError.invalidInput } From 7f71ddabab77d20c53ba765cd4d2cceb1d3ac35a Mon Sep 17 00:00:00 2001 From: Ryan ZHAO <ryanzhaors@qq.com> Date: Thu, 19 Nov 2020 09:39:05 +1100 Subject: [PATCH 5/6] remove owsFailDebug on link preview image download --- SignalUtilitiesKit/ProxiedContentDownloader.swift | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/SignalUtilitiesKit/ProxiedContentDownloader.swift b/SignalUtilitiesKit/ProxiedContentDownloader.swift index f8eed8fc0..3611b9e03 100644 --- a/SignalUtilitiesKit/ProxiedContentDownloader.swift +++ b/SignalUtilitiesKit/ProxiedContentDownloader.swift @@ -712,7 +712,7 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio // Start a download task. guard let assetSegment = assetRequest.firstWaitingSegment() else { - owsFailDebug("queued asset request does not have a waiting segment.") + print("queued asset request does not have a waiting segment.") return } assetSegment.state = .downloading @@ -753,13 +753,13 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio } guard let data = data, data.count > 0 else { - owsFailDebug("Asset size response missing data.") + print("Asset size response missing data.") assetRequest.state = .failed self.assetRequestDidFail(assetRequest: assetRequest) return } guard let httpResponse = response as? HTTPURLResponse else { - owsFailDebug("Asset size response is invalid.") + print("Asset size response is invalid.") assetRequest.state = .failed self.assetRequestDidFail(assetRequest: assetRequest) return @@ -767,7 +767,7 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio var firstContentRangeString: String? for header in httpResponse.allHeaderFields.keys { guard let headerString = header as? String else { - owsFailDebug("Invalid header: \(header)") + print("Invalid header: \(header)") continue } if headerString.lowercased() == "content-range" { @@ -775,7 +775,7 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio } } guard let contentRangeString = firstContentRangeString else { - owsFailDebug("Asset size response is missing content range.") + print("Asset size response is missing content range.") assetRequest.state = .failed self.assetRequestDidFail(assetRequest: assetRequest) return @@ -791,13 +791,13 @@ open class ProxiedContentDownloader: NSObject, URLSessionTaskDelegate, URLSessio } guard contentLengthString.count > 0, let contentLength = Int(contentLengthString) else { - owsFailDebug("Asset size response has unparsable content length.") + print("Asset size response has unparsable content length.") assetRequest.state = .failed self.assetRequestDidFail(assetRequest: assetRequest) return } guard contentLength > 0 else { - owsFailDebug("Asset size response has invalid content length.") + print("Asset size response has invalid content length.") assetRequest.state = .failed self.assetRequestDidFail(assetRequest: assetRequest) return From aab39ffd8043054f22385c39ce79147ed603ec8e Mon Sep 17 00:00:00 2001 From: Ryan ZHAO <ryanzhaors@qq.com> Date: Thu, 19 Nov 2020 16:28:06 +1100 Subject: [PATCH 6/6] show link preview cancel button --- Session/Signal/LinkPreviewView.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Session/Signal/LinkPreviewView.swift b/Session/Signal/LinkPreviewView.swift index 4a95fd4ed..d8b3b0abe 100644 --- a/Session/Signal/LinkPreviewView.swift +++ b/Session/Signal/LinkPreviewView.swift @@ -657,7 +657,7 @@ public class LinkPreviewView: UIStackView { cancelButton.tintColor = Theme.secondaryColor cancelButton.setContentHuggingHigh() cancelButton.setCompressionResistanceHigh() - cancelButton.isHidden = true + cancelButton.isHidden = false cancelStack.addArrangedSubview(cancelButton) rightStack.addArrangedSubview(cancelStack)