status-go/appdatabase/migrationsprevnodecfg/bindata.go

1544 lines
92 KiB
Go
Raw Normal View History

// Code generated by go-bindata. DO NOT EDIT.
// sources:
// 0001_app.down.sql (356B)
// 0001_app.up.sql (2.967kB)
// 0002_tokens.down.sql (19B)
// 0002_tokens.up.sql (248B)
// 0003_settings.down.sql (118B)
// 0003_settings.up.sql (1.311kB)
// 0004_pending_stickers.down.sql (0)
// 0004_pending_stickers.up.sql (61B)
// 0005_waku_mode.down.sql (0)
// 0005_waku_mode.up.sql (146B)
// 0006_appearance.up.sql (67B)
// 0007_enable_waku_default.up.sql (38B)
// 0008_add_push_notifications.up.sql (349B)
// 0009_enable_sending_push_notifications.down.sql (49B)
// 0009_enable_sending_push_notifications.up.sql (49B)
// 0010_add_block_mentions.down.sql (83B)
// 0010_add_block_mentions.up.sql (89B)
// 0011_allow_webview_permission_requests.down.sql (0)
// 0011_allow_webview_permission_requests.up.sql (88B)
// 0012_pending_transactions.down.sql (33B)
// 0012_pending_transactions.up.sql (321B)
// 0013_favourites.down.sql (23B)
// 0013_favourites.up.sql (132B)
// 0014_add_use_mailservers.down.sql (0)
// 0014_add_use_mailservers.up.sql (111B)
// 0015_link_previews.down.sql (0)
// 0015_link_previews.up.sql (203B)
// 0016_local_notifications_preferences.down.sql (43B)
// 0016_local_notifications_preferences.up.sql (204B)
// 0017_bookmarks.down.sql (22B)
// 0017_bookmarks.up.sql (147B)
// 0018_profile_pictures_visibility.up.sql (84B)
// 0019_blocks_ranges_extra_data.up.sql (89B)
// 0020_metrics.up.sql (235B)
// 0021_add_session_id_to_metrics.up.sql (55B)
// 0022_pending_transfers.up.sql (706B)
// 1618237885_settings_anon_metrics_should_send.up.sql (80B)
// 1618395756_contacts_only.up.sql (136B)
// 1622184614_add_default_sync_period.up.sql (125B)
// 1625872445_user_status.up.sql (351B)
// 1627983977_add_gif_to_settings.up.sql (102B)
// 1628580203_add_hidden_account.up.sql (67B)
// 1629123384_add_id_to_app_metrics.up.sql (589B)
// 1630401853_add_opensea_enabled_to_settings.up.sql (70B)
// 1630464455_create-saved_addresses-table.down.sql (28B)
// 1630464455_create-saved_addresses-table.up.sql (187B)
// 1630485153_networks.down.sql (21B)
// 1630485153_networks.up.sql (394B)
// 1632262444_profile_pictures_show_to.up.sql (81B)
// 1635942153_add_telemetry_server_url_to_settings.up.sql (128B)
// 1635942154_add_backup_setting.up.sql (287B)
// 1637745568_add_auto_message_setting.up.sql (122B)
// 1640111208_nodeconfig.up.sql (7.659kB)
// doc.go (85B)
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
package migrationsprevnodecfg
import (
"bytes"
"compress/gzip"
"crypto/sha256"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
)
func bindataRead(data []byte, name string) ([]byte, error) {
gz, err := gzip.NewReader(bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("read %q: %v", name, err)
}
var buf bytes.Buffer
_, err = io.Copy(&buf, gz)
clErr := gz.Close()
if err != nil {
return nil, fmt.Errorf("read %q: %v", name, err)
}
if clErr != nil {
return nil, err
}
return buf.Bytes(), nil
}
type asset struct {
bytes []byte
info os.FileInfo
digest [sha256.Size]byte
}
type bindataFileInfo struct {
name string
size int64
mode os.FileMode
modTime time.Time
}
func (fi bindataFileInfo) Name() string {
return fi.name
}
func (fi bindataFileInfo) Size() int64 {
return fi.size
}
func (fi bindataFileInfo) Mode() os.FileMode {
return fi.mode
}
func (fi bindataFileInfo) ModTime() time.Time {
return fi.modTime
}
func (fi bindataFileInfo) IsDir() bool {
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
return false
}
func (fi bindataFileInfo) Sys() interface{} {
return nil
}
var __0001_appDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x74\xcd\xcb\xaa\xc2\x40\x0c\xc6\xf1\x7d\x9f\xa2\xef\xd1\xd5\x39\xb4\x0b\x41\x54\xc4\x85\xbb\x21\x4e\x63\x1b\x6c\x27\x63\x92\x7a\x79\x7b\x41\xf0\x32\xea\x6c\x7f\xf9\xf8\xa7\x5e\x2f\x57\xe5\xe6\xef\x7f\xde\x94\x8a\x66\x14\x3a\xad\x8a\x37\x04\xef\x79\x0a\x96\xe2\x4e\xf8\xac\x28\xbf\xd1\xf5\xa4\xc6\x72\x4d\x8e\x2d\xc4\x98\xce\x23\xca\x48\xaa\xc4\x21\x75\x13\x08\xba\xff\x8a\x0f\xec\x0f\x29\x8d\x40\x83\xa2\x9c\x3e\xa7\x2f\x77\x82\xc7\x09\xd5\x5c\x07\xcf\xe7\xb3\x45\xdd\x6c\x73\x1b\xe7\x7b\x30\x47\xad\xa3\xf6\x92\x6b\x1a\x47\xf2\xd9\x8f\xf7\xc0\x23\x29\x10\x3a\xd4\xaa\xb8\x05\x00\x00\xff\xff\xf6\xca\x86\xce\x64\x01\x00\x00")
func _0001_appDownSqlBytes() ([]byte, error) {
return bindataRead(
__0001_appDownSql,
"0001_app.down.sql",
)
}
func _0001_appDownSql() (*asset, error) {
bytes, err := _0001_appDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0001_app.down.sql", size: 356, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xb5, 0x25, 0xa0, 0xf8, 0x7d, 0x2d, 0xd, 0xcf, 0x18, 0xe4, 0x73, 0xc3, 0x95, 0xf5, 0x24, 0x20, 0xa9, 0xe6, 0x9e, 0x1d, 0x93, 0xe5, 0xc5, 0xad, 0x93, 0x8f, 0x5e, 0x40, 0xb5, 0x30, 0xaa, 0x25}}
return a, nil
}
var __0001_appUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xac\x56\x4f\x73\xa2\x30\x14\xbf\xf3\x29\x72\xd4\x19\x2e\x7b\xee\x09\x35\x5a\x66\x29\xec\x22\x6e\xdb\x53\x26\x42\xc4\x8c\x40\xd2\x24\xd4\xfa\xed\x77\x02\x04\x50\x41\xeb\xce\xde\x4c\xde\xcb\xe3\xf7\xe7\xe5\xc5\x79\x08\x9d\x08\x82\xc8\x99\x79\x10\xb8\x4b\xe0\x07\x11\x80\x6f\xee\x3a\x5a\x03\x49\x94\xa2\x45\x2a\xc1\xc4\x52\x27\x4e\xc0\x1f\x27\x9c\x3f\x3b\x21\xf8\x15\xba\x2f\x4e\xf8\x0e\x7e\xc2\x77\xdb\xfa\xc4\x59\x49\xc0\xcc\x0b\x66\xd6\x14\xbc\xba\xd1\x73\xb0\x89\x40\x18\xbc\xba\x8b\x27\xcb\xba\x51\x1c\xc7\x31\x2b\x0b\xa5\x8b\xe3\x24\x11\x44\xca\xe1\xfa\x47\x9c\x65\x44\x81\x59\x10\x78\xd0\xf1\x6d\x2b\xde\xe3\xde\xaa\xc2\x15\xc1\xb7\xc8\xb6\xa4\x62\x02\xa7\x66\xc5\xcb\xed\x81\x9c\x2a\x5c\xb6\xc5\xb1\xda\x37\xfb\x05\xce\x4d\x4a\xcc\x32\x26\xcc\x6f\x41\xb0\x22\x09\xc2\x0a\x2c\x9c\x08\x46\xee\x0b\xac\xc0\xfa\x1b\xcf\xb3\xad\x92\x27\xa3\xd1\x71\xd6\x1b\xdf\xfd\xbd\x81\xc0\xf5\x17\xf0\x0d\x94\x05\xfd\x28\x09\xaa\xd9\x20\xc3\x38\xf0\x7b\x3a\xd4\xb1\x29\x78\x7d\x86\x21\x6c\x97\x4f\xb7\xca\x69\x31\x86\x8b\xe9\x48\x5b\xaa\x5a\xb4\x85\xea\x0a\x1d\x63\xd4\x9c\xba\x28\xd0\xc6\xbb\x32\xdd\xd6\x6d\x6f\xb7\x82\x1d\x25\x11\xda\x5b\x9a\x54\x0a\x9f\x7b\xda\x9a\xd0\xd3\x58\xd1\x9c\x48\x85\x73\x0e\x36\xeb\x95\xbb\xf2\xe1\x02\xcc\xdc\x95\xeb\x47\xb6\x95\x60\xce\x8d\xe5\x60\x01\x97\xce\xc6\x8b\xc0\x0e\x67\x92\xd8\xd6\x9e\x6a\xdf\x4f\x6e\x91\x90\x2f\xb0\xf1\xd7\xf5\x49\xd7\x8f\x1e\xeb\x46\x83\x18\x35\xf5\xc0\xc4\x6a\xb6\x90\x61\xd0\x41\x35\x39\x75\xeb\x2c\x83\x10\xba\x2b\x5f\x33\x9b\x74\x67\xa6\x20\x84\x4b\x18\x42\x7f\x0e\xbb\xea\x13\xbd\x1f\x68\x0e\x1e\x8c\x20\x98\x3b\xeb\xb9\xb3\x80\xd6\x1d\x35\x35\x7d\x2d\x65\xa7\x5a\x4f\xcc\xc7\x68\x72\x22\x72\x2a\x25\x65\x85\x2e\xa8\x0b\xa3\x21\x2f\xba\xb4\xcb\x48\x9f\x6c\x7b\xfc\x8c\x6b\x85\x76\x52\x6f\x0f\x53\xbd\x05\x50\x09\x5c\xc8\x5d\xdd\x3a\x05\x51\x47\x26\x0e\xda\x80\xd6\xd8\xba\x25\xfa\x5e\x60\xb9\x6f\x07\x47\xb7\x7d\x39\x52\xba\xc8\x36\x3b\xa0\x91\x43\xea\xab\x99\x17\x92\x14\x09\x11\x26\xc3\xb6\x04\x89\x09\xe5\xaa\x89\x66\x2c\x6d\x7e\x9d\x4d\xc5\xf3\x4f\x14\x65\xbe\x25\xe2\x1a\x6f\xaf\xcd\x47\x39\x65\x0c\x27\x24\xa9\x3a\xbe\x6d\xf7\x1f\xe7\xda\x77\xda\xd8\x0d\x55\xdb\x10\x3b\xef\xbc\x8c\xc5\x07\x79\x3b\xfd\xca\x25\xdb\x9a\x07\xfe\x3a\x0a\x1d\x0d\xab\x99\x34\xc6\x18\xc4\x89\x30\x13\xa7\xfa\xdd\x94\x36\xe3\x69\xa2\x6b\xb6\x1f\xe9\xbe\x3b\xbd\xd7\xe5\x35\xd2\xef\xda\x7e\xdb\xdf\x31\xf1\x5b\xef\xbf\x25\xf9\xd2\xf1\xd6\x83\x5a\xe4\x98\x73\x5a\xa4\x68\xc7\x84\x99\x9d\x48\x31\x54\x31\x18\xd4\xe4\x52\xf3\xc7\x75\x41\x02\x17\x29\xf9\x4f\xf2\xec\x04\xcb\x87\xc5\x51\xec\x72\xff\x1e\xbc\x1c\xd3\x4c\x12\xf1\x59\x5f\x59\x00\x00\xa0\xc9\xf0\x43\xae\x63\xd5\xb0\xb9\x06\xa5\x43\xe3\x90\x75\x94\x63\x29\x8f\x4c\x24\xdd\x9d\xd4\xbb\xbb\x8c\x10\x75\x75\xe2\xb1\x91\xd8\x11\x40\x82\x7c\x94\x44\x2a\x94\x62\x6e\xc8\xa4\x98\xd7\x72\xf5\x9f\x16\xb8\x82\x97\xf8\x74\x9e\x62\xf7\xb2\x06\x1f\x43\x1d\xa8\xde\xf1\xcb\x87\x66\x9c\x47\xfd\x82\x8f\x20\x47\x4d\x31\x44\x93\x2f\x7d\xb7\x47\x09\x36\x79\xdf\x36\x18\x29\xc6\x69\x6c\x94\xa9\x16\xe3\x4e\x37\xc5\xe5\xb9\x61\x19\x96\xca\xa0\x68\x35\xea\x8d\x38\x9d\x93\x50\x19\xb3\x4f\x22\x4e\x57\x4f\x7e\x73\x21\xab\x46\x22\x29\x53\x54\xff\x1b\x19\xce\xfa\xe7\x1e\xa8\x70\x1b\x9d\xda\x4b\xd7\xf7\x68\x94\x72\xc6\x8e\xa4\xa3\x57\xb7\x4d\xc3\xb1\x4e\xd8\xd3\x74\xdf\xcf\x50\xcc\xc4\xaf\xe1\xfe\x0d\x00\x00\xff\xff\xe8\x42\x77\x9b\x97\x0b\x00\x00")
func _0001_appUpSqlBytes() ([]byte, error) {
return bindataRead(
__0001_appUpSql,
"0001_app.up.sql",
)
}
func _0001_appUpSql() (*asset, error) {
bytes, err := _0001_appUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0001_app.up.sql", size: 2967, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xf7, 0x3a, 0xa7, 0xf2, 0x8f, 0xfa, 0x82, 0x7c, 0xc5, 0x49, 0xac, 0xac, 0xf, 0xc, 0x77, 0xe2, 0xba, 0xe8, 0x4d, 0xe, 0x6f, 0x5d, 0x2c, 0x2c, 0x18, 0x80, 0xc2, 0x1d, 0xe, 0x25, 0xe, 0x18}}
return a, nil
}
var __0002_tokensDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\x28\xc9\xcf\x4e\xcd\x2b\xb6\xe6\x02\x04\x00\x00\xff\xff\xf0\xdb\x32\xa7\x13\x00\x00\x00")
func _0002_tokensDownSqlBytes() ([]byte, error) {
return bindataRead(
__0002_tokensDownSql,
"0002_tokens.down.sql",
)
}
func _0002_tokensDownSql() (*asset, error) {
bytes, err := _0002_tokensDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0002_tokens.down.sql", size: 19, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xd1, 0x31, 0x2, 0xcc, 0x2f, 0x38, 0x90, 0xf7, 0x58, 0x37, 0x47, 0xf4, 0x18, 0xf7, 0x72, 0x74, 0x67, 0x14, 0x7e, 0xf3, 0xb1, 0xd6, 0x5f, 0xb0, 0xd5, 0xe7, 0x91, 0xf4, 0x26, 0x77, 0x8e, 0x68}}
return a, nil
}
var __0002_tokensUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x6c\x8e\xcd\x6a\x85\x30\x10\x46\xf7\x79\x8a\x6f\x79\x05\xdf\xa0\xab\xa8\xa9\x0e\xb5\xb1\xc4\xb1\xea\xaa\x58\x93\x85\xf8\x13\x30\x42\xe9\xdb\x17\x4b\x4b\x2b\xdc\xed\x37\x67\x0e\x27\x35\x4a\xb2\x02\xcb\xa4\x54\xa0\x47\xe8\x8a\xa1\x3a\xaa\xb9\xc6\xe1\x67\xb7\x05\xdc\x04\x30\x58\xbb\xbb\x10\xf0\x2a\x4d\x5a\x48\xf3\x4d\xe9\xa6\x2c\x63\x01\x6c\xee\xf8\xf0\xfb\xfc\x36\x59\x34\xba\xa6\x5c\xab\x0c\x09\xe5\xa4\xf9\x8a\x0d\xab\x03\xab\xee\xba\x86\xcf\xf5\xdd\x2f\x77\xbd\xd6\x8d\xd3\x3a\x2c\xe1\xcf\x4a\x9a\xcf\xc3\xe8\x17\xbf\xff\xbe\x9c\xc3\x8b\xa1\x67\x69\x7a\x3c\xa9\x1e\xb7\x9f\xd4\xf8\x5f\x57\x24\x22\xb4\xc4\x45\xd5\x30\x4c\xd5\x52\xf6\x20\xc4\x57\x00\x00\x00\xff\xff\x73\xf3\x87\xe5\xf8\x00\x00\x00")
func _0002_tokensUpSqlBytes() ([]byte, error) {
return bindataRead(
__0002_tokensUpSql,
"0002_tokens.up.sql",
)
}
func _0002_tokensUpSql() (*asset, error) {
bytes, err := _0002_tokensUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0002_tokens.up.sql", size: 248, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xcc, 0xd6, 0xde, 0xd3, 0x7b, 0xee, 0x92, 0x11, 0x38, 0xa4, 0xeb, 0x84, 0xca, 0xcb, 0x37, 0x75, 0x5, 0x77, 0x7f, 0x14, 0x39, 0xee, 0xa1, 0x8b, 0xd4, 0x5c, 0x6e, 0x55, 0x6, 0x50, 0x16, 0xd4}}
return a, nil
}
var __0003_settingsDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x3c\xca\xb1\x0a\xc2\x40\x0c\x06\xe0\x3d\x4f\xf1\x8f\x0a\xbe\x41\xa7\x5c\x1b\x69\xb0\x9a\x92\x46\x6b\x47\x87\x43\x04\x11\xe1\x4e\xc1\xb7\x77\x11\xd7\x8f\xaf\x73\x1b\x11\x9c\x06\x41\xc9\xb5\xde\x1e\xd7\xd2\x50\xeb\xc2\x21\x3f\xd6\x2d\x0e\x16\x90\xb3\x4e\x31\xfd\x13\x56\x04\xd4\xcf\x33\xe3\xc4\xde\xf6\xec\x18\x5d\xf7\xec\x0b\x76\xb2\x6c\x08\x78\x5f\xee\xaf\x8c\x34\x58\xa2\x35\x66\x8d\xde\x8e\x01\xb7\x59\xbb\x86\xe8\x1b\x00\x00\xff\xff\x49\x2e\x16\x6c\x76\x00\x00\x00")
func _0003_settingsDownSqlBytes() ([]byte, error) {
return bindataRead(
__0003_settingsDownSql,
"0003_settings.down.sql",
)
}
func _0003_settingsDownSql() (*asset, error) {
bytes, err := _0003_settingsDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0003_settings.down.sql", size: 118, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe5, 0xa6, 0xf5, 0xc0, 0x60, 0x64, 0x77, 0xe2, 0xe7, 0x3c, 0x9b, 0xb1, 0x52, 0xa9, 0x95, 0x16, 0xf8, 0x60, 0x2f, 0xa5, 0xeb, 0x46, 0xb9, 0xb9, 0x8f, 0x4c, 0xf4, 0xfd, 0xbb, 0xe7, 0xe5, 0xe5}}
return a, nil
}
var __0003_settingsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x7c\x93\xbd\x6e\xdb\x30\x10\xc7\x77\x3f\x05\xb7\xb4\x40\x87\x66\x28\x50\x20\x93\x1c\xab\x89\x50\x57\x0a\x54\xb9\x41\xa6\x03\x4d\x9e\xad\x83\x29\x92\xe0\x51\x0e\xf4\xf6\x85\x1c\x45\x56\x53\xd9\x23\x79\xbf\xfb\xbe\xff\xaa\x2c\x9e\x44\x95\x2c\xd7\xa9\x60\x8c\x91\xec\x9e\xef\x16\xf7\x65\x9a\x54\xe9\x87\x6f\xf1\x69\x21\x84\xd4\x3a\x20\xb3\xf8\x93\x94\xf7\x8f\x49\x29\xf2\xa2\x12\xf9\x66\xbd\xfe\xb2\x10\x42\xd5\xd2\x31\x34\x4e\xa3\x58\x16\xc5\x3a\x4d\x72\xb1\x4a\x7f\x24\x9b\x75\x25\x76\xd2\x30\x9e\x98\x36\x04\xb4\xaa\x1b\x03\xbc\x13\x37\x2d\xeb\x9b\x33\x11\xc1\x62\x7c\x75\xe1\x30\x9f\xa9\xe5\xe8\x1a\xd8\x3a\x17\xad\xd3\xc8\x62\xb9\x2e\x96\x73\x06\x40\x2b\xb7\x06\xf5\x08\x68\xe9\x3d\xc3\xb5\x2e\x90\xfc\xed\xb7\xef\xb7\x1f\x99\xde\xb4\x33\x88\x71\xfa\x51\x93\x46\xa8\x5d\x83\x10\x9d\x33\x91\xfc\xe5\xc6\xc9\x72\x94\xc6\xc8\x48\xce\x02\xe9\xd9\xd4\x07\xec\xa0\xbd\x6c\x53\x32\x68\x38\xc5\xb1\x0a\xa7\xe0\xd4\xee\x25\x05\xd4\xe0\xac\xd8\xe4\xbf\xb3\x87\x3c\x5d\x89\x65\xf6\x90\xe5\xd5\x47\x88\xec\x7e\xea\x6f\x24\x47\x68\xbd\x96\x11\xf5\x9c\xab\x91\x11\x39\x82\xc6\x40\x47\xec\x23\xc4\xfa\x8c\x65\x79\x35\x76\xfc\xf5\x44\xbb\x3d\x18\x3c\xa2\x99\xa6\x68\x2c\x36\xce\x92\x9a\xfe\x59\xd9\xe0\x6c\xbf\xc3\xfa\xdf\x56\xfb\xaf\xc5\x69\x04\xe5\xec\x8e\xf6\xe3\x5a\xad\x8b\xb4\x23\x75\x9a\xee\x64\xe9\x97\x96\xe1\x6b\x17\xdd\x5b\x0f\xff\x85\xf7\x64\x2d\x6a\x68\x24\x19\xc6\x70\xc4\x70\xbe\x2e\x1f\x70\x87\xa1\x1f\xef\xb4\xec\xc1\x72\x24\x7c\x05\x1f\xe8\x28\x55\x77\x25\x73\xbb\x35\xa4\xe0\x80\xdd\x6c\xd7\x01\x1b\x6c\xb6\x18\x80\x3b\xab\xc8\xee\x41\xd5\x8e\xd4\x15\x3d\x31\xed\x6d\xcf\xf9\x3a\x48\x9e\x9f\x24\x47\x52\x07\x0c\x0c\x5e\xaa\x03\xc3\x70\x88\x13\x4d\x8c\x40\x40\xd5\x2b\xef\xfd\x7d\x06\x86\x62\x9c\x85\xc6\x6d\xc9\xe0\x28\xce\xcb\x75\x75\x36\xd6\x18\x49\x4d\x6f\x7d\xd4\x3a\xe9\x1b\xf1\x54\x66\xbf\x92\xf2\x45\xfc\x4c\x5f\x7a\x87\x96\x31\xf4\x53\x3d\x67\x7d\xed\xab\x8c\x10\x9c\x8b\x57\x05\x3b\x70\x8c\xfd\xfd\x82\x97\xcc\xd7\x56\x3f\xd0\x47\x62\xda\x9a\x5e\xb7\x07\xb4\x63\xdc\xc5\x67\xf1\x9c\x55\x8f\xc5\xa6\x12\x65\xf1\x9c\xad\xee\x16\x7f\x03\x00\x00\xff\xff\xa5\xa1\x7b\x78\x1f\x05\x00\x00")
func _0003_settingsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0003_settingsUpSql,
"0003_settings.up.sql",
)
}
func _0003_settingsUpSql() (*asset, error) {
bytes, err := _0003_settingsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0003_settings.up.sql", size: 1311, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xea, 0x35, 0x0, 0xeb, 0xe2, 0x33, 0x68, 0xb9, 0xf4, 0xf6, 0x8e, 0x9e, 0x10, 0xe9, 0x58, 0x68, 0x28, 0xb, 0xcd, 0xec, 0x74, 0x71, 0xa7, 0x9a, 0x5a, 0x77, 0x59, 0xb1, 0x13, 0x1c, 0xa1, 0x5b}}
return a, nil
}
var __0004_pending_stickersDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x01\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00")
func _0004_pending_stickersDownSqlBytes() ([]byte, error) {
return bindataRead(
__0004_pending_stickersDownSql,
"0004_pending_stickers.down.sql",
)
}
func _0004_pending_stickersDownSql() (*asset, error) {
bytes, err := _0004_pending_stickersDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0004_pending_stickers.down.sql", size: 0, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}}
return a, nil
}
var __0004_pending_stickersUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x28\x2e\xc9\x4c\xce\x4e\x2d\x2a\x8e\x2f\x48\x4c\xce\x2e\x8e\x2f\x48\xcd\x4b\xc9\xcc\x4b\x57\x70\xf2\xf1\x77\xb2\xe6\x02\x04\x00\x00\xff\xff\xc9\xc1\xc2\xc6\x3d\x00\x00\x00")
func _0004_pending_stickersUpSqlBytes() ([]byte, error) {
return bindataRead(
__0004_pending_stickersUpSql,
"0004_pending_stickers.up.sql",
)
}
func _0004_pending_stickersUpSql() (*asset, error) {
bytes, err := _0004_pending_stickersUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0004_pending_stickers.up.sql", size: 61, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x3c, 0xed, 0x25, 0xdf, 0x75, 0x2, 0x6c, 0xf0, 0xa2, 0xa8, 0x37, 0x62, 0x65, 0xad, 0xfd, 0x98, 0xa0, 0x9d, 0x63, 0x94, 0xdf, 0x6b, 0x46, 0xe0, 0x68, 0xec, 0x9c, 0x7f, 0x77, 0xdd, 0xb3, 0x6}}
return a, nil
}
var __0005_waku_modeDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x01\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00")
func _0005_waku_modeDownSqlBytes() ([]byte, error) {
return bindataRead(
__0005_waku_modeDownSql,
"0005_waku_mode.down.sql",
)
}
func _0005_waku_modeDownSql() (*asset, error) {
bytes, err := _0005_waku_modeDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0005_waku_mode.down.sql", size: 0, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}}
return a, nil
}
var __0005_waku_modeUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x28\x4f\xcc\x2e\x8d\x4f\xcd\x4b\x4c\xca\x49\x4d\x51\x70\xf2\xf7\xf7\x71\x75\xf4\x53\x70\x71\x75\x73\x0c\xf5\x09\x51\x48\x4b\xcc\x29\x4e\xb5\xe6\x22\xca\x8c\xa4\x9c\xfc\xfc\xdc\xf8\xb4\xcc\x9c\x92\xd4\xa2\xf8\xdc\xfc\x94\x54\x5c\xa6\x01\x02\x00\x00\xff\xff\x00\x97\x79\x75\x92\x00\x00\x00")
func _0005_waku_modeUpSqlBytes() ([]byte, error) {
return bindataRead(
__0005_waku_modeUpSql,
"0005_waku_mode.up.sql",
)
}
func _0005_waku_modeUpSql() (*asset, error) {
bytes, err := _0005_waku_modeUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0005_waku_mode.up.sql", size: 146, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xa6, 0x91, 0xc, 0xd7, 0x89, 0x61, 0x2e, 0x4c, 0x5a, 0xb6, 0x67, 0xd1, 0xc1, 0x42, 0x24, 0x38, 0xd6, 0x1b, 0x75, 0x41, 0x9c, 0x23, 0xb0, 0xca, 0x5c, 0xf1, 0x5c, 0xd0, 0x13, 0x92, 0x3e, 0xe1}}
return a, nil
}
var __0006_appearanceUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x48\x2c\x28\x48\x4d\x2c\x4a\xcc\x4b\x4e\x55\xf0\xf4\x0b\x51\xf0\xf3\x0f\x51\xf0\x0b\xf5\xf1\x51\x70\x71\x75\x73\x0c\xf5\x09\x51\x30\xb0\xe6\x02\x04\x00\x00\xff\xff\x0b\x18\x43\x21\x43\x00\x00\x00")
func _0006_appearanceUpSqlBytes() ([]byte, error) {
return bindataRead(
__0006_appearanceUpSql,
"0006_appearance.up.sql",
)
}
func _0006_appearanceUpSql() (*asset, error) {
bytes, err := _0006_appearanceUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0006_appearance.up.sql", size: 67, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xae, 0x6, 0x25, 0x6c, 0xe4, 0x9d, 0xa7, 0x72, 0xe8, 0xbc, 0xe4, 0x1f, 0x1e, 0x2d, 0x7c, 0xb7, 0xf6, 0xa3, 0xec, 0x3b, 0x4e, 0x93, 0x2e, 0xa4, 0xec, 0x6f, 0xe5, 0x95, 0x94, 0xe8, 0x4, 0xfb}}
return a, nil
}
var __0007_enable_waku_defaultUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x0a\x0d\x70\x71\x0c\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x08\x76\x0d\x51\x28\x4f\xcc\x2e\x8d\x4f\xcd\x4b\x4c\xca\x49\x4d\x51\xb0\x55\x30\xb4\xe6\x02\x04\x00\x00\xff\xff\xa7\x77\xcb\x82\x26\x00\x00\x00")
func _0007_enable_waku_defaultUpSqlBytes() ([]byte, error) {
return bindataRead(
__0007_enable_waku_defaultUpSql,
"0007_enable_waku_default.up.sql",
)
}
func _0007_enable_waku_defaultUpSql() (*asset, error) {
bytes, err := _0007_enable_waku_defaultUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0007_enable_waku_default.up.sql", size: 38, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xd4, 0x42, 0xb6, 0xe5, 0x48, 0x41, 0xeb, 0xc0, 0x7e, 0x3b, 0xe6, 0x8e, 0x96, 0x33, 0x20, 0x92, 0x24, 0x5a, 0x60, 0xfa, 0xa0, 0x3, 0x5e, 0x76, 0x4b, 0x89, 0xaa, 0x37, 0x66, 0xbc, 0x26, 0x11}}
return a, nil
}
var __0008_add_push_notificationsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xa4\xce\x51\x0e\x82\x30\x0c\x00\xd0\x7f\x4f\xd1\x7b\xf8\x35\x64\x7c\x55\x48\x70\x7c\x2f\x08\x45\x97\x40\x6b\xd6\x6a\xe2\xed\x3d\x80\x33\x9a\x78\x81\x97\xe7\x30\xf8\x1e\x82\xab\xd0\x83\x92\x59\xe2\x8b\x82\xab\x6b\x38\x74\x38\x1c\x5b\xc8\xb4\x89\x51\xbc\xdd\xf5\x1a\x59\x2c\x2d\x69\x1a\x2d\x09\x6b\x24\x1e\xcf\x2b\xcd\x50\x75\x1d\x7a\xd7\x42\xed\x1b\x37\x60\x80\xc6\xe1\xc9\xef\x77\xdf\x60\x25\x9e\x0b\xec\x1b\x17\xfa\xe1\x07\xad\xf0\x53\xca\x0f\xca\xff\x36\x0b\xf0\x92\x65\x8b\x93\xb0\x8d\x93\x69\x14\x5e\x9f\x9f\xf0\x57\x00\x00\x00\xff\xff\x30\xc0\x56\xbd\x5d\x01\x00\x00")
func _0008_add_push_notificationsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0008_add_push_notificationsUpSql,
"0008_add_push_notifications.up.sql",
)
}
func _0008_add_push_notificationsUpSql() (*asset, error) {
bytes, err := _0008_add_push_notificationsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0008_add_push_notifications.up.sql", size: 349, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x5a, 0x0, 0xbf, 0xd0, 0xdd, 0xcd, 0x73, 0xe0, 0x7c, 0x56, 0xef, 0xdc, 0x57, 0x61, 0x94, 0x64, 0x70, 0xb9, 0xfa, 0xa1, 0x2a, 0x36, 0xc, 0x2f, 0xf8, 0x95, 0xa, 0x57, 0x3e, 0x7a, 0xd7, 0x12}}
return a, nil
}
var __0009_enable_sending_push_notificationsDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x0a\x0d\x70\x71\x0c\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x08\x76\x0d\x51\x28\x4e\xcd\x4b\x89\x2f\x28\x2d\xce\x88\xcf\xcb\x2f\xc9\x4c\xcb\x4c\x4e\x2c\xc9\xcc\xcf\x2b\x56\xb0\x55\x30\xb0\xe6\x02\x04\x00\x00\xff\xff\x57\x2d\xdb\x16\x31\x00\x00\x00")
func _0009_enable_sending_push_notificationsDownSqlBytes() ([]byte, error) {
return bindataRead(
__0009_enable_sending_push_notificationsDownSql,
"0009_enable_sending_push_notifications.down.sql",
)
}
func _0009_enable_sending_push_notificationsDownSql() (*asset, error) {
bytes, err := _0009_enable_sending_push_notificationsDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0009_enable_sending_push_notifications.down.sql", size: 49, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe9, 0xae, 0x1b, 0x41, 0xcb, 0x9c, 0x2c, 0x93, 0xc6, 0x2a, 0x77, 0x3, 0xb9, 0x51, 0xe0, 0x68, 0x68, 0x0, 0xf7, 0x5b, 0xb3, 0x1e, 0x94, 0x44, 0xba, 0x9c, 0xd0, 0x3b, 0x80, 0x21, 0x6f, 0xb5}}
return a, nil
}
var __0009_enable_sending_push_notificationsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x0a\x0d\x70\x71\x0c\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x08\x76\x0d\x51\x28\x4e\xcd\x4b\x89\x2f\x28\x2d\xce\x88\xcf\xcb\x2f\xc9\x4c\xcb\x4c\x4e\x2c\xc9\xcc\xcf\x2b\x56\xb0\x55\x30\xb4\xe6\x02\x04\x00\x00\xff\xff\x60\x47\x19\x17\x31\x00\x00\x00")
func _0009_enable_sending_push_notificationsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0009_enable_sending_push_notificationsUpSql,
"0009_enable_sending_push_notifications.up.sql",
)
}
func _0009_enable_sending_push_notificationsUpSql() (*asset, error) {
bytes, err := _0009_enable_sending_push_notificationsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0009_enable_sending_push_notifications.up.sql", size: 49, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x1b, 0x80, 0xe4, 0x9c, 0xc8, 0xb8, 0xd5, 0xef, 0xce, 0x74, 0x9b, 0x7b, 0xdd, 0xa, 0x99, 0x1e, 0xef, 0x7f, 0xb8, 0x99, 0x84, 0x4, 0x0, 0x6b, 0x1d, 0x2c, 0xa, 0xf8, 0x2c, 0x4f, 0xb5, 0x44}}
return a, nil
}
var __0010_add_block_mentionsDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x28\x28\x2d\xce\x88\xcf\xcb\x2f\xc9\x4c\xcb\x4c\x4e\x2c\xc9\xcc\xcf\x2b\x8e\x4f\xca\xc9\x4f\xce\x8e\xcf\x4d\xcd\x03\x73\x15\x9c\xfc\xfd\x7d\x5c\x1d\xfd\x14\x5c\x5c\xdd\x1c\x43\x7d\x42\x14\xdc\x1c\x7d\x82\x5d\xad\xb9\x00\x01\x00\x00\xff\xff\xa8\x45\x75\x3b\x53\x00\x00\x00")
func _0010_add_block_mentionsDownSqlBytes() ([]byte, error) {
return bindataRead(
__0010_add_block_mentionsDownSql,
"0010_add_block_mentions.down.sql",
)
}
func _0010_add_block_mentionsDownSql() (*asset, error) {
bytes, err := _0010_add_block_mentionsDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0010_add_block_mentions.down.sql", size: 83, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x6d, 0x9e, 0x27, 0x1e, 0xba, 0x9f, 0xca, 0xae, 0x98, 0x2e, 0x6e, 0xe3, 0xdd, 0xac, 0x73, 0x34, 0x4e, 0x69, 0x92, 0xb5, 0xf6, 0x9, 0xab, 0x50, 0x35, 0xd, 0xee, 0xeb, 0x3e, 0xcc, 0x7e, 0xce}}
return a, nil
}
var __0010_add_block_mentionsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x1c\xc7\x31\x0a\x42\x31\x0c\x06\xe0\xdd\x53\xfc\xf7\x70\xca\xb3\x79\x53\x7c\x05\x6d\xe7\xa2\xa5\x6a\x50\x53\x21\xf1\xfe\x82\xe3\x47\x52\xf8\x84\x42\x8b\x30\x7c\x44\xa8\xdd\x1d\x94\x12\x0e\x59\xea\x71\xc3\xe7\xeb\x8f\x66\x33\xf4\xa6\xfd\x12\x3a\xcd\xdb\xf5\x35\xfb\xb3\xbd\x87\xfd\x89\x25\x67\x61\xda\x90\x78\xa5\x2a\x05\x2b\xc9\x99\xf7\xbb\x5f\x00\x00\x00\xff\xff\x2b\x4e\x3f\xc5\x59\x00\x00\x00")
func _0010_add_block_mentionsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0010_add_block_mentionsUpSql,
"0010_add_block_mentions.up.sql",
)
}
func _0010_add_block_mentionsUpSql() (*asset, error) {
bytes, err := _0010_add_block_mentionsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0010_add_block_mentions.up.sql", size: 89, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xd7, 0x23, 0x85, 0xa2, 0xb5, 0xb6, 0xb4, 0x3f, 0xdc, 0x4e, 0xff, 0xe2, 0x6b, 0x66, 0x68, 0x5e, 0xb2, 0xb4, 0x14, 0xb2, 0x1b, 0x4d, 0xb1, 0xce, 0xf7, 0x6, 0x58, 0xa7, 0xaf, 0x93, 0x3f, 0x25}}
return a, nil
}
var __0011_allow_webview_permission_requestsDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x01\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00")
func _0011_allow_webview_permission_requestsDownSqlBytes() ([]byte, error) {
return bindataRead(
__0011_allow_webview_permission_requestsDownSql,
"0011_allow_webview_permission_requests.down.sql",
)
}
func _0011_allow_webview_permission_requestsDownSql() (*asset, error) {
bytes, err := _0011_allow_webview_permission_requestsDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0011_allow_webview_permission_requests.down.sql", size: 0, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}}
return a, nil
}
var __0011_allow_webview_permission_requestsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x04\xc0\x41\x0e\x02\x21\x0c\x05\xd0\xab\xfc\x7b\xb8\xea\x48\x67\x55\x87\x44\x61\x4d\x34\x69\x4c\x13\x04\xa5\x28\xd7\xf7\x91\x24\xbe\x22\xd1\x26\x0c\xd7\x39\xad\x3d\x1d\x14\x02\xce\x51\xf2\xe5\xc0\xd2\xc7\xcf\x74\x95\x7b\xad\x7d\x95\xb7\x8e\x97\xb9\x5b\x6f\x65\xe8\xe7\xab\x3e\x1d\x5b\x8c\xc2\x74\x20\xf0\x4e\x59\x12\x76\x92\x1b\x9f\xfe\x01\x00\x00\xff\xff\xc8\x7d\x35\xfa\x58\x00\x00\x00")
func _0011_allow_webview_permission_requestsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0011_allow_webview_permission_requestsUpSql,
"0011_allow_webview_permission_requests.up.sql",
)
}
func _0011_allow_webview_permission_requestsUpSql() (*asset, error) {
bytes, err := _0011_allow_webview_permission_requestsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0011_allow_webview_permission_requests.up.sql", size: 88, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x43, 0x5f, 0x22, 0x4c, 0x98, 0x1d, 0xc6, 0xf4, 0x89, 0xaf, 0xf4, 0x44, 0xba, 0xf8, 0x28, 0xa7, 0xb5, 0xb9, 0xf0, 0xf2, 0xcb, 0x5, 0x59, 0x7a, 0xc, 0xdf, 0xd3, 0x38, 0xa4, 0xb8, 0x98, 0xc2}}
return a, nil
}
var __0012_pending_transactionsDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\x28\x48\xcd\x4b\xc9\xcc\x4b\x8f\x2f\x29\x4a\xcc\x2b\x4e\x4c\x2e\xc9\xcc\xcf\x2b\xb6\xe6\x02\x04\x00\x00\xff\xff\x62\x4b\x2a\x6c\x21\x00\x00\x00")
func _0012_pending_transactionsDownSqlBytes() ([]byte, error) {
return bindataRead(
__0012_pending_transactionsDownSql,
"0012_pending_transactions.down.sql",
)
}
func _0012_pending_transactionsDownSql() (*asset, error) {
bytes, err := _0012_pending_transactionsDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0012_pending_transactions.down.sql", size: 33, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x7e, 0x41, 0xfe, 0x5c, 0xd8, 0xc3, 0x29, 0xfd, 0x31, 0x78, 0x99, 0x7a, 0xeb, 0x17, 0x62, 0x88, 0x41, 0xb3, 0xe7, 0xb5, 0x5, 0x0, 0x90, 0xa1, 0x7, 0x1a, 0x23, 0x88, 0x81, 0xba, 0x56, 0x9d}}
return a, nil
}
var __0012_pending_transactionsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x7c\x8f\xc1\x4e\x02\x31\x14\x45\xf7\xfd\x8a\xbb\x84\x84\x3f\x70\x55\xa0\xc2\x8b\x63\xc7\x74\xde\x08\xac\x9a\x62\xab\x4c\x90\x76\xd2\xd6\x18\xff\xde\xe8\x46\x8d\x13\x96\x2f\xe7\x9d\x9b\x9c\x95\x51\x92\x15\x58\x2e\x1b\x05\xba\x85\x6e\x19\x6a\x4f\x1d\x77\x18\x43\xf4\x43\x7c\xb1\x35\xbb\x58\xdc\x53\x1d\x52\x2c\x98\x09\x20\x86\xfa\x9e\xf2\xd9\x0e\x1e\xbd\xee\x68\xa3\xd5\x1a\x4b\xda\x90\xe6\x6f\x5d\xf7\x4d\xb3\x10\xc0\x2f\xcf\x9e\x5c\x39\xe1\x51\x9a\xd5\x56\x9a\x3f\x4f\xc7\xd7\xb3\x8d\x6f\x97\x63\xc8\x53\x13\xcf\x39\x5d\xac\xf3\x3e\x87\x52\x26\xf5\x9a\xae\xe3\x8f\x31\x4c\x02\xef\xaa\x03\xab\x3d\x7f\x1d\x0f\x86\xee\xa5\x39\xe0\x4e\x1d\x30\xfb\x89\x5b\xfc\x2b\x98\x8b\x39\x76\xc4\xdb\xb6\x67\x98\x76\x47\xeb\x1b\x21\x3e\x03\x00\x00\xff\xff\x9c\xbd\x6e\xaa\x41\x01\x00\x00")
func _0012_pending_transactionsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0012_pending_transactionsUpSql,
"0012_pending_transactions.up.sql",
)
}
func _0012_pending_transactionsUpSql() (*asset, error) {
bytes, err := _0012_pending_transactionsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0012_pending_transactions.up.sql", size: 321, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xd, 0x17, 0xff, 0xd7, 0xa7, 0x49, 0x1e, 0x7b, 0x34, 0x63, 0x7c, 0x53, 0xaa, 0x6b, 0x2d, 0xc8, 0xe0, 0x82, 0x21, 0x90, 0x3a, 0x94, 0xf1, 0xa6, 0xe4, 0x70, 0xe5, 0x85, 0x1a, 0x48, 0x25, 0xb}}
return a, nil
}
var __0013_favouritesDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\x48\x4b\x2c\xcb\x2f\x2d\xca\x2c\x49\x2d\xb6\xe6\x02\x04\x00\x00\xff\xff\x76\xaf\x3d\x70\x17\x00\x00\x00")
func _0013_favouritesDownSqlBytes() ([]byte, error) {
return bindataRead(
__0013_favouritesDownSql,
"0013_favourites.down.sql",
)
}
func _0013_favouritesDownSql() (*asset, error) {
bytes, err := _0013_favouritesDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0013_favourites.down.sql", size: 23, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x32, 0xf8, 0x55, 0x13, 0x4f, 0x4a, 0x19, 0x83, 0x9c, 0xda, 0x34, 0xb8, 0x3, 0x54, 0x82, 0x1e, 0x99, 0x36, 0x6b, 0x42, 0x3, 0xf6, 0x43, 0xde, 0xe6, 0x32, 0xb6, 0xdf, 0xe2, 0x59, 0x8c, 0x84}}
return a, nil
}
var __0013_favouritesUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x54\xcb\xb1\xaa\xc2\x40\x10\x46\xe1\x7e\x9e\xe2\x2f\x13\xb8\x6f\x70\xab\x31\x8e\x64\x70\xcd\xca\x64\x62\x92\x72\x21\x2b\x58\xa8\x90\x55\x9f\x5f\x10\x1b\xdb\xc3\xf9\x1a\x13\x76\x81\xf3\x26\x08\x74\x87\x2e\x3a\x64\xd2\xde\x7b\x9c\xd3\xeb\xfe\x5c\x2f\x8f\x5c\x50\x11\x90\x96\x65\xcd\xa5\xe0\xc4\xd6\xb4\x6c\x9f\xb3\x1b\x42\xf8\x23\xe0\x96\xae\x19\x2e\x93\xff\xd4\xa3\xe9\x81\x6d\xc6\x5e\x66\x54\x5f\x5e\x53\x8d\x51\xbd\x8d\x83\xc3\xe2\xa8\xdb\x7f\xa2\x77\x00\x00\x00\xff\xff\xb6\x82\x8c\x1e\x84\x00\x00\x00")
func _0013_favouritesUpSqlBytes() ([]byte, error) {
return bindataRead(
__0013_favouritesUpSql,
"0013_favourites.up.sql",
)
}
func _0013_favouritesUpSql() (*asset, error) {
bytes, err := _0013_favouritesUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0013_favourites.up.sql", size: 132, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xbe, 0x1, 0x27, 0x38, 0x76, 0xf5, 0xcb, 0x61, 0xda, 0x5b, 0xce, 0xd9, 0x8b, 0x18, 0x77, 0x61, 0x84, 0xe7, 0x22, 0xe2, 0x13, 0x99, 0xab, 0x32, 0xbc, 0xbe, 0xed, 0x1f, 0x2f, 0xb0, 0xe4, 0x8d}}
return a, nil
}
var __0014_add_use_mailserversDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x01\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00")
func _0014_add_use_mailserversDownSqlBytes() ([]byte, error) {
return bindataRead(
__0014_add_use_mailserversDownSql,
"0014_add_use_mailservers.down.sql",
)
}
func _0014_add_use_mailserversDownSql() (*asset, error) {
bytes, err := _0014_add_use_mailserversDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0014_add_use_mailservers.down.sql", size: 0, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}}
return a, nil
}
var __0014_add_use_mailserversUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x28\x2d\x4e\x8d\xcf\x4d\xcc\xcc\x29\x4e\x2d\x2a\x4b\x2d\x2a\x56\x70\xf2\xf7\xf7\x71\x75\xf4\x53\x70\x71\x75\x73\x0c\xf5\x09\x51\x08\x09\x0a\x75\xb5\xe6\x0a\x0d\x70\x71\x0c\x41\x32\x20\xd8\x35\x04\x43\xa7\xad\x82\xa1\x35\x17\x20\x00\x00\xff\xff\x78\x22\xcb\x3c\x6f\x00\x00\x00")
func _0014_add_use_mailserversUpSqlBytes() ([]byte, error) {
return bindataRead(
__0014_add_use_mailserversUpSql,
"0014_add_use_mailservers.up.sql",
)
}
func _0014_add_use_mailserversUpSql() (*asset, error) {
bytes, err := _0014_add_use_mailserversUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0014_add_use_mailservers.up.sql", size: 111, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xc9, 0xba, 0x65, 0xbf, 0x1b, 0xc9, 0x6d, 0x45, 0xf2, 0xf5, 0x30, 0x7c, 0xc1, 0xde, 0xb8, 0xe3, 0x3f, 0xa9, 0x2f, 0x9f, 0xea, 0x1, 0x29, 0x29, 0x65, 0xe7, 0x38, 0xab, 0xa4, 0x62, 0xf, 0xd0}}
return a, nil
}
var __0015_link_previewsDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x01\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00")
func _0015_link_previewsDownSqlBytes() ([]byte, error) {
return bindataRead(
__0015_link_previewsDownSql,
"0015_link_previews.down.sql",
)
}
func _0015_link_previewsDownSql() (*asset, error) {
bytes, err := _0015_link_previewsDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0015_link_previews.down.sql", size: 0, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}}
return a, nil
}
var __0015_link_previewsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x94\xce\x31\x0a\x42\x31\x0c\x80\xe1\xbd\xa7\xc8\x15\x9c\x8b\x43\x6a\xe3\x14\x5f\xe5\x99\xce\x45\x31\x48\xf1\x51\xb4\xa9\x7a\x7d\x27\xc1\x49\xf0\x00\xff\xc7\x8f\x2c\x34\x83\x60\x60\x02\xd3\x31\x6a\xbb\x18\x60\x8c\xb0\x49\x9c\x77\x13\x2c\xb5\x5d\xcb\xad\xeb\xb3\xea\xab\x74\xbd\x3f\xd4\x46\xd1\x76\x3c\x2d\x7a\x86\x90\x12\x13\x4e\x10\x69\x8b\x99\x05\x64\xce\xe4\xdd\x3f\xa4\x7d\xac\x62\x75\xa8\x41\xe0\x14\xbc\xcb\xfb\x88\xf2\x15\x1f\x48\x7e\x8f\xac\x61\xe5\xdd\x3b\x00\x00\xff\xff\xe6\xf3\x89\x88\xcb\x00\x00\x00")
func _0015_link_previewsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0015_link_previewsUpSql,
"0015_link_previews.up.sql",
)
}
func _0015_link_previewsUpSql() (*asset, error) {
bytes, err := _0015_link_previewsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0015_link_previews.up.sql", size: 203, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xb1, 0xf7, 0x38, 0x25, 0xa6, 0xfc, 0x6b, 0x9, 0xe4, 0xd9, 0xbf, 0x58, 0x7b, 0x80, 0xd8, 0x48, 0x63, 0xde, 0xa5, 0x5e, 0x30, 0xa3, 0xeb, 0x68, 0x8e, 0x6a, 0x9f, 0xfd, 0xf4, 0x46, 0x41, 0x34}}
return a, nil
}
var __0016_local_notifications_preferencesDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\xc8\xc9\x4f\x4e\xcc\x89\xcf\xcb\x2f\xc9\x4c\xcb\x4c\x4e\x2c\xc9\xcc\xcf\x2b\x8e\x2f\x28\x4a\x4d\x4b\x2d\x4a\xcd\x4b\x4e\x2d\xb6\x06\x04\x00\x00\xff\xff\xf0\xdb\xee\xaa\x2b\x00\x00\x00")
func _0016_local_notifications_preferencesDownSqlBytes() ([]byte, error) {
return bindataRead(
__0016_local_notifications_preferencesDownSql,
"0016_local_notifications_preferences.down.sql",
)
}
func _0016_local_notifications_preferencesDownSql() (*asset, error) {
bytes, err := _0016_local_notifications_preferencesDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0016_local_notifications_preferences.down.sql", size: 43, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe0, 0x50, 0xc7, 0xdd, 0x53, 0x9c, 0x5d, 0x1e, 0xb5, 0x71, 0x25, 0x50, 0x58, 0xcf, 0x6d, 0xbe, 0x5a, 0x8, 0x12, 0xc9, 0x13, 0xd, 0x9a, 0x3d, 0x4b, 0x7a, 0x2f, 0x1b, 0xe5, 0x23, 0x52, 0x78}}
return a, nil
}
var __0016_local_notifications_preferencesUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x5c\x8d\x41\x0b\x82\x30\x1c\x47\xef\x7e\x8a\xdf\x51\xc1\x6f\xd0\x69\xea\x42\xc9\x5a\x8c\x99\x78\x92\x35\xff\xc2\x60\xcc\xd8\xc4\xcf\x1f\x59\x10\x75\x7d\x3c\xde\x2b\x25\x67\x8a\x43\xb1\xa2\xe5\x70\x8b\xd1\x6e\xf4\xcb\x6a\x67\x6b\xf4\x6a\x17\x1f\xc7\x47\xa0\x99\x02\x79\x43\x11\x69\x02\x00\x91\xc2\x66\x0d\xe1\xc6\x64\x59\x33\x99\xef\x90\x36\xf2\xeb\x2f\xb2\x13\xf9\x57\x89\xc2\x9f\xea\xf5\xdd\xd1\x84\x42\x88\x96\xb3\x0b\x2a\x7e\x64\x5d\xab\x30\x6b\x17\xe9\xad\x5c\x65\x73\x66\x72\xc0\x89\x0f\xe9\x67\x97\xef\x87\xfc\x1b\xcd\x92\x0c\x7d\xa3\x6a\xd1\x29\x48\xd1\x37\xd5\xe1\x19\x00\x00\xff\xff\x02\x6d\x5e\xec\xcc\x00\x00\x00")
func _0016_local_notifications_preferencesUpSqlBytes() ([]byte, error) {
return bindataRead(
__0016_local_notifications_preferencesUpSql,
"0016_local_notifications_preferences.up.sql",
)
}
func _0016_local_notifications_preferencesUpSql() (*asset, error) {
bytes, err := _0016_local_notifications_preferencesUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0016_local_notifications_preferences.up.sql", size: 204, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x3f, 0x3a, 0x16, 0x25, 0xdf, 0xba, 0x62, 0xd3, 0x81, 0x73, 0xc, 0x10, 0x85, 0xbc, 0x8d, 0xe, 0x1d, 0x62, 0xcb, 0xb, 0x6d, 0x8c, 0x4f, 0x63, 0x5f, 0xe2, 0xd, 0xc5, 0x46, 0xa8, 0x35, 0x5b}}
return a, nil
}
var __0017_bookmarksDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\x48\xca\xcf\xcf\xce\x4d\x2c\xca\x2e\xb6\xe6\x02\x04\x00\x00\xff\xff\xfb\x69\xe4\xcd\x16\x00\x00\x00")
func _0017_bookmarksDownSqlBytes() ([]byte, error) {
return bindataRead(
__0017_bookmarksDownSql,
"0017_bookmarks.down.sql",
)
}
func _0017_bookmarksDownSql() (*asset, error) {
bytes, err := _0017_bookmarksDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0017_bookmarks.down.sql", size: 22, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x9a, 0x13, 0x2a, 0x44, 0xb0, 0x3, 0x18, 0x63, 0xb8, 0x33, 0xda, 0x3a, 0xeb, 0xb8, 0xcb, 0xd1, 0x98, 0x29, 0xa7, 0xf0, 0x6, 0x9d, 0xc9, 0x62, 0xe7, 0x89, 0x7f, 0x77, 0xaf, 0xec, 0x6b, 0x8f}}
return a, nil
}
var __0017_bookmarksUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x6c\x8b\x3d\xaa\xc2\x40\x10\xc7\xfb\x3d\xc5\xbf\x4c\xe0\xdd\xe0\x55\x63\x1c\xc9\xe0\x9a\x95\xc9\xc4\x98\x4a\x56\x08\x22\xc9\x1a\x58\xc9\xfd\x45\x2b\x0b\xdb\xdf\x47\xa5\x4c\xc6\x30\xda\x78\x86\xec\xd0\x04\x03\x9f\xa5\xb5\x16\xd7\x65\x99\x52\xcc\xd3\x13\x85\x03\xd6\x3c\xe3\x44\x5a\xd5\xa4\x9f\xa8\xe9\xbc\xff\x73\xc0\x23\xa6\xf1\xa7\xb8\xa7\x78\x1b\x2f\x5f\xdb\x1b\x1e\x55\x0e\xa4\x03\xf6\x3c\xa0\x58\xf3\x5c\xba\x12\xbd\x58\x1d\x3a\x83\x86\x5e\xb6\xff\xce\xbd\x02\x00\x00\xff\xff\x91\xa6\x3e\xcb\x93\x00\x00\x00")
func _0017_bookmarksUpSqlBytes() ([]byte, error) {
return bindataRead(
__0017_bookmarksUpSql,
"0017_bookmarks.up.sql",
)
}
func _0017_bookmarksUpSql() (*asset, error) {
bytes, err := _0017_bookmarksUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0017_bookmarks.up.sql", size: 147, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xbc, 0x47, 0xe1, 0xe3, 0xd8, 0xc6, 0x4, 0x6d, 0x5f, 0x2f, 0xa, 0x51, 0xa6, 0x8c, 0x6a, 0xe0, 0x3d, 0x8c, 0x91, 0x47, 0xbc, 0x1, 0x75, 0x46, 0x92, 0x2, 0x18, 0x6e, 0xe3, 0x4f, 0x18, 0x57}}
return a, nil
}
var __0018_profile_pictures_visibilityUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x04\xc0\x31\x0a\x42\x31\x0c\x06\xe0\xdd\x53\xfc\x57\x70\x76\x8a\xb6\x82\x10\x53\x90\x74\x2e\x28\x55\x02\x45\x4b\x13\x05\x6f\xff\x3e\x62\xcd\x37\x28\x1d\x39\xc3\x7b\x84\xbd\x5f\x0e\x4a\x09\xa7\xc2\xf5\x2a\x98\xeb\xf3\xb4\xd1\xdb\xb4\x47\x7c\x57\xf7\xf6\x33\xb7\xbb\x0d\x8b\x3f\x2e\xa2\x90\xa2\x90\xca\x8c\x94\xcf\x54\x59\xb1\x3f\xec\xb6\x00\x00\x00\xff\xff\xf9\x2a\x51\x1e\x54\x00\x00\x00")
func _0018_profile_pictures_visibilityUpSqlBytes() ([]byte, error) {
return bindataRead(
__0018_profile_pictures_visibilityUpSql,
"0018_profile_pictures_visibility.up.sql",
)
}
func _0018_profile_pictures_visibilityUpSql() (*asset, error) {
bytes, err := _0018_profile_pictures_visibilityUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0018_profile_pictures_visibility.up.sql", size: 84, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xc9, 0xe3, 0xc5, 0xec, 0x83, 0x55, 0x45, 0x57, 0x7a, 0xaa, 0xd2, 0xa7, 0x59, 0xa7, 0x87, 0xef, 0x63, 0x19, 0x9c, 0x46, 0x9c, 0xc5, 0x32, 0x89, 0xa4, 0x68, 0x70, 0xd8, 0x83, 0x43, 0xa4, 0x72}}
return a, nil
}
var __0019_blocks_ranges_extra_dataUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x48\xca\xc9\x4f\xce\x2e\x8e\x2f\x4a\xcc\x4b\x4f\x2d\x56\x70\x74\x71\x51\x48\x4a\xcc\x49\xcc\x4b\x4e\x55\x70\xf2\xf1\x77\xb2\xe6\xc2\xaf\x36\x2f\x1f\xa4\xd2\xd3\x2f\xc4\xd5\xdd\x35\xc8\x9a\x0b\x10\x00\x00\xff\xff\x83\x20\x4e\x94\x59\x00\x00\x00")
func _0019_blocks_ranges_extra_dataUpSqlBytes() ([]byte, error) {
return bindataRead(
__0019_blocks_ranges_extra_dataUpSql,
"0019_blocks_ranges_extra_data.up.sql",
)
}
func _0019_blocks_ranges_extra_dataUpSql() (*asset, error) {
bytes, err := _0019_blocks_ranges_extra_dataUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0019_blocks_ranges_extra_data.up.sql", size: 89, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xa3, 0x96, 0x32, 0x58, 0xf0, 0xb9, 0xe1, 0x70, 0x81, 0xca, 0x8d, 0x45, 0x57, 0x8a, 0x7, 0x5d, 0x9e, 0x2a, 0x30, 0xb, 0xad, 0x5f, 0xf8, 0xd4, 0x30, 0x94, 0x73, 0x37, 0x8d, 0xc1, 0x9a, 0xed}}
return a, nil
}
var __0020_metricsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x74\xcc\x4d\xaa\xc2\x30\x14\xc5\xf1\x79\x57\x71\x86\xef\x81\x9b\x88\xf5\x16\x0b\x69\x95\xf4\x56\x3a\x0b\xa1\x5e\xa4\x60\x3f\x48\x62\xc0\xdd\x8b\x8a\x4e\xa4\x67\xfa\x3b\xfc\x73\x43\x8a\x09\xac\xb6\x9a\x50\x16\xa8\x0f\x0c\xea\xca\x86\x1b\xb8\x65\xb1\xa3\x44\x3f\xf4\x01\x7f\x19\xde\x93\x24\x53\xc4\x49\x99\x7c\xaf\xcc\xeb\x5d\xb7\x5a\x6f\x3e\x9c\xdc\xf5\x26\x60\xea\xf8\xd7\x9e\xbd\x24\x3e\x0c\xf3\xb4\x1e\x98\x17\xf1\x2e\x0e\xd3\xc5\x86\x7b\x88\x32\xae\x3f\x7b\x2f\x2e\xca\xd9\xba\x08\x2e\x2b\x6a\x58\x55\x47\xec\xa8\x50\xad\x66\xe4\xad\x31\x54\xb3\xfd\x4a\xf6\x9f\x3d\x02\x00\x00\xff\xff\x95\xc5\x25\x15\xeb\x00\x00\x00")
func _0020_metricsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0020_metricsUpSql,
"0020_metrics.up.sql",
)
}
func _0020_metricsUpSql() (*asset, error) {
bytes, err := _0020_metricsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0020_metrics.up.sql", size: 235, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe8, 0x32, 0xbc, 0xb6, 0x9b, 0x5a, 0x8f, 0x9f, 0x4c, 0x90, 0x81, 0x3e, 0x2e, 0xd1, 0x23, 0xcd, 0xf1, 0x83, 0x35, 0xca, 0x66, 0x87, 0x52, 0x4e, 0x30, 0x3e, 0x4f, 0xa8, 0xfd, 0x30, 0x16, 0xbd}}
return a, nil
}
var __0021_add_session_id_to_metricsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x48\x2c\x28\x88\xcf\x4d\x2d\x29\xca\x4c\x2e\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x28\x4e\x2d\x2e\xce\xcc\xcf\x8b\xcf\x4c\x51\x08\x73\x0c\x72\xf6\x70\x0c\xb2\xe6\x02\x04\x00\x00\xff\xff\x10\x56\x8d\x9e\x37\x00\x00\x00")
func _0021_add_session_id_to_metricsUpSqlBytes() ([]byte, error) {
return bindataRead(
__0021_add_session_id_to_metricsUpSql,
"0021_add_session_id_to_metrics.up.sql",
)
}
func _0021_add_session_id_to_metricsUpSql() (*asset, error) {
bytes, err := _0021_add_session_id_to_metricsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0021_add_session_id_to_metrics.up.sql", size: 55, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xb7, 0x81, 0xfc, 0x97, 0xd1, 0x8b, 0xea, 0x8e, 0xd7, 0xc2, 0x53, 0x62, 0xe9, 0xbc, 0xf, 0x8c, 0x46, 0x41, 0x41, 0xb7, 0x6, 0x35, 0xf5, 0xba, 0xbb, 0x28, 0x50, 0x48, 0xbf, 0x36, 0x90, 0x5c}}
return a, nil
}
var __0022_pending_transfersUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x8c\x92\x4f\x6f\xe2\x30\x10\xc5\xef\xfe\x14\x73\x04\xc9\x87\xbd\x73\x72\xc0\x80\xb5\xc1\x46\xce\xb0\xc0\xc9\xf2\xe2\x14\xac\xe6\x9f\x62\xb7\x15\xdf\xbe\x22\xb4\x25\x50\x8a\x7a\x7d\xcf\xf3\x7b\xa3\xe7\x61\x29\x72\x0d\xc8\x92\x94\x43\x93\x57\xce\x57\x7b\x13\x5b\x5b\x05\xbb\x8b\xbe\xae\x02\x68\x2e\xd9\x82\x03\xaa\xbb\xb6\xa9\x0b\x37\x22\x64\xac\x39\x43\xfe\x81\x11\x53\x90\x0a\x81\x6f\x44\x86\xd9\x7d\xe8\x80\x00\x54\x79\x7c\xab\xdb\x67\xe3\x1d\xac\x64\x26\x66\x92\x4f\x20\x11\x33\x21\xb1\x1b\x97\xab\x34\xa5\x04\xe0\x60\xc3\x01\xfe\x31\x3d\x9e\x33\x7d\x65\x44\x5f\xe6\x21\xda\xb2\x79\x38\xfe\xd4\xd6\xa5\xb1\xce\xb5\x79\x08\xf7\x31\xf5\xad\x7d\x52\xc3\xb1\xfc\x5f\x17\x7d\x65\x6f\x83\x69\x5a\xbf\xcb\x21\x49\x55\xf2\xa9\x14\xbe\xf4\xf1\x4b\x79\xb5\xc5\xcb\xc5\x77\x36\x5a\x40\xbe\xc1\x2e\xe6\xd8\xe4\x7d\x9c\x75\xce\x9f\xba\xb0\x85\xb9\x7a\xb7\xd4\x62\xc1\xf4\x16\xfe\xf2\x2d\x0c\x2e\x15\xd1\xae\x87\x21\x19\xc2\x5a\xe0\x5c\xad\x10\xb4\x5a\x8b\xc9\x88\x10\x21\x33\xae\x11\x84\xfc\xe1\x87\xbe\x41\xe8\x55\x27\xb4\x57\x00\xed\xb6\xa4\xb7\xbb\xd1\x4b\xd5\x43\x92\xf1\x94\x8f\x11\xfa\xd0\x5e\x9a\xf9\x5d\xc0\x99\xfa\x07\xa6\x5a\x2d\x1e\x9d\xd5\x44\xab\xe5\x83\xdb\x3c\xbf\x7a\x0f\x00\x00\xff\xff\x05\xa2\xfe\xa8\xc2\x02\x00\x00")
func _0022_pending_transfersUpSqlBytes() ([]byte, error) {
return bindataRead(
__0022_pending_transfersUpSql,
"0022_pending_transfers.up.sql",
)
}
func _0022_pending_transfersUpSql() (*asset, error) {
bytes, err := _0022_pending_transfersUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "0022_pending_transfers.up.sql", size: 706, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x6a, 0x9, 0xe6, 0x6, 0xae, 0x60, 0xdd, 0xbb, 0x76, 0xac, 0xe0, 0x57, 0x30, 0x67, 0x37, 0x93, 0x40, 0x13, 0xec, 0xf2, 0x6e, 0x61, 0xa, 0x14, 0xb2, 0xb1, 0xbd, 0x91, 0xf8, 0x89, 0xb3, 0xe3}}
return a, nil
}
var __1618237885_settings_anon_metrics_should_sendUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x04\xc0\x31\x0e\x42\x21\x0c\x06\xe0\xdd\x53\xfc\xf7\x70\xea\x93\x3a\x55\x48\x0c\xcc\x84\x48\x55\x12\x2c\x89\xc5\xfb\xfb\x91\x64\xbe\x23\xd3\x21\x0c\xd7\xbd\x87\xbd\x1c\x14\x02\x2e\x49\xca\x2d\xa2\xd9\xb2\xfa\xd1\xfd\x1d\x0f\xaf\xfe\x5e\xbf\xd9\xab\xab\x75\x1c\x29\x09\x53\x44\xe0\x2b\x15\xc9\x78\xb6\xe9\x7a\x3e\xfd\x03\x00\x00\xff\xff\x9a\x73\xdd\x50\x50\x00\x00\x00")
func _1618237885_settings_anon_metrics_should_sendUpSqlBytes() ([]byte, error) {
return bindataRead(
__1618237885_settings_anon_metrics_should_sendUpSql,
"1618237885_settings_anon_metrics_should_send.up.sql",
)
}
func _1618237885_settings_anon_metrics_should_sendUpSql() (*asset, error) {
bytes, err := _1618237885_settings_anon_metrics_should_sendUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1618237885_settings_anon_metrics_should_send.up.sql", size: 80, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xea, 0x6c, 0x1d, 0x1f, 0x54, 0x62, 0x18, 0x22, 0x5c, 0xa7, 0x8c, 0x59, 0x24, 0xd3, 0x4d, 0x55, 0xc4, 0x2a, 0x9e, 0x4c, 0x37, 0x6b, 0xfd, 0xac, 0xec, 0xb7, 0x68, 0x21, 0x26, 0x26, 0xf3, 0x92}}
return a, nil
}
var __1618395756_contacts_onlyUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x7c\xcc\x31\x0e\xc2\x30\x0c\x05\xd0\xbd\xa7\xf8\x47\x60\xaf\x18\x5c\xe2\x4e\xa6\x41\xd4\x99\xa3\xaa\x0a\x15\x12\x49\x24\xec\x85\xdb\xb3\x32\x71\x81\x47\xa2\x7c\x87\xd2\x24\x0c\x2b\xee\xcf\x76\x18\x28\x04\x5c\xa2\xa4\xeb\x82\x5a\xcc\xb6\xa3\x58\x7e\xbc\x7b\xcd\x7b\x6f\xbe\xed\x6e\xb9\xb7\xd7\x07\x53\x8c\xc2\xb4\x20\xf0\x4c\x49\x14\x33\xc9\xca\xe3\x90\x6e\x81\xf4\x47\x5b\x59\xff\x32\x67\x9c\xc6\xe1\x1b\x00\x00\xff\xff\x99\x20\xa4\x1d\x88\x00\x00\x00")
func _1618395756_contacts_onlyUpSqlBytes() ([]byte, error) {
return bindataRead(
__1618395756_contacts_onlyUpSql,
"1618395756_contacts_only.up.sql",
)
}
func _1618395756_contacts_onlyUpSql() (*asset, error) {
bytes, err := _1618395756_contacts_onlyUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1618395756_contacts_only.up.sql", size: 136, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x1, 0xe3, 0xd0, 0xe7, 0xf2, 0x6e, 0xbf, 0x27, 0xf6, 0xe2, 0x2e, 0x16, 0x4b, 0x52, 0x3b, 0xcf, 0x63, 0x52, 0xfc, 0x1d, 0x43, 0xba, 0x42, 0xf9, 0x1e, 0x1e, 0x39, 0x40, 0xed, 0x0, 0x20, 0xa8}}
return a, nil
}
var __1622184614_add_default_sync_periodUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x48\x49\x4d\x4b\x2c\xcd\x29\x89\x2f\xae\xcc\x4b\x8e\x2f\x48\x2d\xca\xcc\x4f\x51\xf0\xf4\x0b\x71\x75\x77\x0d\x52\x70\x71\x75\x73\x0c\xf5\x09\x51\xb0\x30\x33\x31\x30\xb0\xe6\x0a\x0d\x70\x71\x0c\x41\x32\x25\xd8\x35\x04\xab\x76\x5b\x98\x06\x2e\x40\x00\x00\x00\xff\xff\xdd\x46\xb2\xc4\x7d\x00\x00\x00")
func _1622184614_add_default_sync_periodUpSqlBytes() ([]byte, error) {
return bindataRead(
__1622184614_add_default_sync_periodUpSql,
"1622184614_add_default_sync_period.up.sql",
)
}
func _1622184614_add_default_sync_periodUpSql() (*asset, error) {
bytes, err := _1622184614_add_default_sync_periodUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1622184614_add_default_sync_period.up.sql", size: 125, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x60, 0x39, 0xeb, 0x8f, 0xdc, 0x1, 0x56, 0xc1, 0x9b, 0xaa, 0xda, 0x44, 0xe0, 0xdb, 0xda, 0x2c, 0xe7, 0x71, 0x8d, 0xbc, 0xc1, 0x9a, 0x4f, 0x48, 0xe0, 0x5e, 0x81, 0x1e, 0x8e, 0x6a, 0x4d, 0x3}}
return a, nil
}
var __1625872445_user_statusUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x84\x8e\x41\x4b\xc3\x40\x10\x85\xef\xfb\x2b\x1e\x3d\x29\x78\xd0\x73\xf0\xb0\x49\xa6\x50\x9c\xee\x86\x75\x02\xf6\x14\x6a\xba\x48\x69\x4d\x43\x76\x16\xec\xbf\x97\x6a\x2b\x16\x04\xaf\x33\xdf\xf7\xde\xb3\x2c\x14\x20\xb6\x64\x42\x8a\xaa\xdb\xe1\x2d\xc1\xd6\x35\x2a\xcf\xed\xd2\xa1\xcf\xd3\x14\x07\xed\x72\x8a\x53\x97\x74\xad\x39\xa1\x64\x5f\x16\xe6\x3f\x33\xc5\x61\x73\x36\xba\x3c\x6e\xd6\x1a\x13\x4a\xef\x99\xac\x43\x4d\x73\xdb\xb2\x40\x42\x4b\x85\x69\x9b\xda\xca\xaf\x90\x67\x92\x3f\xed\x47\x3c\x14\xa6\x0a\x74\x82\xcf\xbd\xd7\xc0\x8d\x01\xc6\xfc\xba\xdf\xf6\xdd\x2e\x1e\x21\xf4\x22\x68\xc2\x62\x69\xc3\x0a\x4f\xb4\x82\x77\xa8\xbc\x9b\xf3\xa2\x12\x04\x6a\xd8\x56\x74\x67\x70\x49\xd1\xe3\x18\xb1\x70\x02\xe7\x05\xae\x65\xfe\x99\x79\x7f\xa2\xfa\xfd\xa1\xdf\x5d\xfd\xbf\xae\x39\xe9\xe1\xbd\xd3\xf8\xa1\xdf\x7d\x17\x67\x36\x33\xb7\x85\xf9\x0c\x00\x00\xff\xff\xa2\xed\xdb\xfc\x5f\x01\x00\x00")
func _1625872445_user_statusUpSqlBytes() ([]byte, error) {
return bindataRead(
__1625872445_user_statusUpSql,
"1625872445_user_status.up.sql",
)
}
func _1625872445_user_statusUpSql() (*asset, error) {
bytes, err := _1625872445_user_statusUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1625872445_user_status.up.sql", size: 351, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xf5, 0xa, 0xfe, 0x7a, 0xcc, 0x9e, 0x35, 0x26, 0xb, 0xc8, 0xf2, 0x7d, 0xfa, 0x4b, 0xcf, 0x53, 0x20, 0x76, 0xc7, 0xd, 0xbc, 0x78, 0x4f, 0x74, 0x2d, 0x2e, 0x2e, 0x7e, 0x62, 0xae, 0x78, 0x1f}}
return a, nil
}
var __1627983977_add_gif_to_settingsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x48\xcf\x4c\x8b\x2f\x4a\x4d\x4e\xcd\x2b\x29\x56\x70\xf2\xf1\x77\xb2\xe6\x22\x46\x47\x5a\x62\x59\x7e\x51\x66\x49\x2a\x4c\x0f\x20\x00\x00\xff\xff\x41\xe4\x6a\x80\x66\x00\x00\x00")
func _1627983977_add_gif_to_settingsUpSqlBytes() ([]byte, error) {
return bindataRead(
__1627983977_add_gif_to_settingsUpSql,
"1627983977_add_gif_to_settings.up.sql",
)
}
func _1627983977_add_gif_to_settingsUpSql() (*asset, error) {
bytes, err := _1627983977_add_gif_to_settingsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1627983977_add_gif_to_settings.up.sql", size: 102, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x63, 0xe6, 0xe1, 0x97, 0x64, 0x4c, 0xe2, 0x14, 0xb1, 0x96, 0x3a, 0xb0, 0xb9, 0xb7, 0xb5, 0x78, 0x4a, 0x39, 0x69, 0x89, 0xb7, 0x89, 0x19, 0xb8, 0x89, 0x1, 0xc5, 0xc2, 0x85, 0x53, 0xe2, 0x83}}
return a, nil
}
var __1628580203_add_hidden_accountUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x04\xc0\xc1\x0d\x80\x20\x0c\x05\xd0\x55\xfe\x1e\x9e\x8a\x94\xd3\x97\x26\xda\x0e\x60\xc0\x44\x2f\x78\x50\xf7\xf7\x09\x5d\x57\xb8\x24\x2a\xf6\xd6\xee\x6f\xbc\x0f\x24\x67\xcc\xc6\x58\x2a\xce\xab\xf7\x63\x20\x99\x11\xd5\x1c\x35\x48\x64\x2d\x12\x74\x14\xe1\xa6\xd3\x1f\x00\x00\xff\xff\xd8\xcf\x59\x5c\x43\x00\x00\x00")
func _1628580203_add_hidden_accountUpSqlBytes() ([]byte, error) {
return bindataRead(
__1628580203_add_hidden_accountUpSql,
"1628580203_add_hidden_account.up.sql",
)
}
func _1628580203_add_hidden_accountUpSql() (*asset, error) {
bytes, err := _1628580203_add_hidden_accountUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1628580203_add_hidden_account.up.sql", size: 67, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xcb, 0x30, 0xf1, 0xd4, 0x60, 0xe2, 0x28, 0x14, 0xcb, 0x16, 0xb, 0x9, 0xea, 0x17, 0xa, 0x9e, 0x89, 0xa8, 0x32, 0x32, 0xf8, 0x4d, 0xa0, 0xe1, 0xe5, 0x79, 0xbd, 0x7d, 0x79, 0xe9, 0x4c, 0x9e}}
return a, nil
}
var __1629123384_add_id_to_app_metricsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xac\x51\xcb\x6e\xc2\x40\x0c\xbc\xe7\x2b\x7c\x04\x29\x7f\xc0\xc9\x04\xd3\x46\xdd\x07\x72\x9c\xaa\x9c\x56\x11\xb1\xaa\x48\x05\xa2\xec\x16\xa9\x7f\x5f\x41\x5a\x14\x15\xb8\xf5\xea\xf1\xcc\x78\xc6\x68\x84\x18\x04\x97\x86\xa0\xe9\xfb\xb0\xd7\x34\x74\xbb\x08\x4c\x0e\x2d\x81\x78\x48\xba\xef\xc3\x04\x5a\x64\x05\x13\x0a\xdd\x21\xcd\x32\x00\x80\xae\x85\xd2\x09\x3d\x11\xc3\x86\x4b\x8b\xbc\x85\x17\xda\x02\xd6\xe2\x4b\x57\x30\x59\x72\x92\x5f\x36\xf5\xa4\x87\x04\xaf\xc8\xc5\x33\x32\x38\x2f\xe0\x6a\x63\x46\xec\xd4\x7c\x7c\x2a\x08\xbd\xc9\x1f\xe0\x6c\x78\xd2\x21\x76\xc7\xc3\x03\xea\xb1\xd7\xa1\x49\xdd\xe1\x3d\xc4\xaf\x98\x74\xff\x60\x6d\x37\x68\x93\xb4\x0d\x4d\x02\x29\x2d\x55\x82\x76\x03\x2b\x5a\x63\x6d\x04\x8a\x9a\x99\x9c\x84\x2b\x32\x72\xa2\xc6\xb3\x71\xe8\xda\x5f\xd1\x71\xde\x0f\xc7\x9d\xc6\xa8\x2d\x2c\xbd\x37\x84\xee\xea\x75\x55\x5c\xa3\xa9\x68\xbe\xc8\x4a\x57\x11\xcb\xb9\x22\x3f\x2d\x6f\x76\x29\x23\x1f\x73\xe7\xd3\x94\xf9\x4d\xa0\x7c\x72\x7b\x3e\xb9\x69\x9e\x55\x64\xa8\x10\xf8\x07\xad\x6c\xcd\xde\xde\x79\xfe\x8a\xfd\xe6\xe7\xf5\x37\xe0\x77\x00\x00\x00\xff\xff\x7c\x73\x3b\xdc\x4d\x02\x00\x00")
func _1629123384_add_id_to_app_metricsUpSqlBytes() ([]byte, error) {
return bindataRead(
__1629123384_add_id_to_app_metricsUpSql,
"1629123384_add_id_to_app_metrics.up.sql",
)
}
func _1629123384_add_id_to_app_metricsUpSql() (*asset, error) {
bytes, err := _1629123384_add_id_to_app_metricsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1629123384_add_id_to_app_metrics.up.sql", size: 589, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xdf, 0x66, 0xc0, 0x69, 0xb, 0xad, 0x49, 0x7c, 0x8c, 0x67, 0xb8, 0xd6, 0x8d, 0x5d, 0x86, 0x1f, 0xa4, 0x53, 0xf5, 0x8, 0x1, 0xfd, 0x38, 0x49, 0xee, 0x84, 0xc0, 0xd8, 0x17, 0x72, 0x3, 0xb3}}
return a, nil
}
var __1630401853_add_opensea_enabled_to_settingsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x04\xc0\x51\x0a\x80\x20\x0c\x06\xe0\xab\xfc\xf7\xe8\x69\xe6\x7a\x5a\x0a\xa1\xcf\x61\xb4\x22\x90\x15\xcc\xfb\xd3\x47\x52\x78\x43\xa1\x20\x0c\xd7\x31\x1e\xbb\x1d\x14\x23\xe6\x2c\x75\x4d\x78\x3f\x35\xd7\xb6\xab\xb5\xa3\xeb\x89\x90\xb3\x30\x25\x44\x5e\xa8\x4a\xc1\xd5\xba\xeb\xf4\x07\x00\x00\xff\xff\x0e\x97\x7a\x0e\x46\x00\x00\x00")
func _1630401853_add_opensea_enabled_to_settingsUpSqlBytes() ([]byte, error) {
return bindataRead(
__1630401853_add_opensea_enabled_to_settingsUpSql,
"1630401853_add_opensea_enabled_to_settings.up.sql",
)
}
func _1630401853_add_opensea_enabled_to_settingsUpSql() (*asset, error) {
bytes, err := _1630401853_add_opensea_enabled_to_settingsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1630401853_add_opensea_enabled_to_settings.up.sql", size: 70, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x6, 0x91, 0x86, 0x15, 0xc8, 0x99, 0xe3, 0xae, 0xa, 0x6e, 0x94, 0x48, 0x51, 0x5b, 0x18, 0xe0, 0xbc, 0xaf, 0x34, 0x75, 0x55, 0x61, 0xd4, 0xc1, 0x85, 0xc7, 0x3d, 0x99, 0x9e, 0x1f, 0x37, 0x56}}
return a, nil
}
var __1630464455_createSaved_addressesTableDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\x28\x4e\x2c\x4b\x4d\x89\x4f\x4c\x49\x29\x4a\x2d\x2e\x4e\x2d\xb6\xe6\x02\x04\x00\x00\xff\xff\xa5\x19\xd2\x59\x1c\x00\x00\x00")
func _1630464455_createSaved_addressesTableDownSqlBytes() ([]byte, error) {
return bindataRead(
__1630464455_createSaved_addressesTableDownSql,
"1630464455_create-saved_addresses-table.down.sql",
)
}
func _1630464455_createSaved_addressesTableDownSql() (*asset, error) {
bytes, err := _1630464455_createSaved_addressesTableDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1630464455_create-saved_addresses-table.down.sql", size: 28, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x23, 0x52, 0x39, 0xb5, 0x42, 0xac, 0xcb, 0xa1, 0x44, 0xb7, 0x94, 0x26, 0x24, 0xb2, 0x12, 0xc, 0xc5, 0xbf, 0x63, 0x13, 0x6f, 0x3c, 0x4, 0x7b, 0xf0, 0xd, 0xfa, 0x55, 0x9e, 0x51, 0xf9, 0x7a}}
return a, nil
}
var __1630464455_createSaved_addressesTableUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x54\xcd\xc1\xca\x82\x40\x14\x05\xe0\xfd\x3c\xc5\x59\x2a\xf8\x06\xff\x6a\xd4\xfb\xeb\x25\x1b\xe3\x7a\x4d\x5d\x89\x30\xb3\x88\xa8\xc0\x89\x7a\xfd\x20\x8a\x70\x79\x0e\x1f\xe7\x14\x42\x56\x09\x6a\xf3\x86\xc0\xff\x70\xad\x82\x46\xee\xb4\x43\x5c\x1e\xc1\xcf\x8b\xf7\x6b\x88\x31\x44\x24\x06\xf8\x24\x1c\xad\x14\xb5\x95\x37\x77\x7d\xd3\x64\x06\xb8\x86\xfb\xf3\xb6\x9e\xe7\x93\x47\xef\x3a\xae\x1c\x95\xc8\xb9\x62\xa7\x5b\xb6\x5c\x02\x94\xc6\x6d\x7b\x10\xde\x5b\x99\xb0\xa3\x09\xc9\x6f\x29\xfb\x3e\xa6\x26\xc5\xc0\x5a\xb7\xbd\x42\xda\x81\xcb\x3f\xf3\x0a\x00\x00\xff\xff\x02\x6b\x7f\x7c\xbb\x00\x00\x00")
func _1630464455_createSaved_addressesTableUpSqlBytes() ([]byte, error) {
return bindataRead(
__1630464455_createSaved_addressesTableUpSql,
"1630464455_create-saved_addresses-table.up.sql",
)
}
func _1630464455_createSaved_addressesTableUpSql() (*asset, error) {
bytes, err := _1630464455_createSaved_addressesTableUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1630464455_create-saved_addresses-table.up.sql", size: 187, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x32, 0xf, 0x56, 0x18, 0xeb, 0x4e, 0xac, 0xd8, 0xd6, 0x91, 0xae, 0x83, 0xcf, 0x91, 0x9e, 0x4, 0x4b, 0x2, 0x1f, 0x6d, 0xba, 0xf6, 0x3, 0xf2, 0x98, 0x72, 0xf6, 0x91, 0x29, 0x96, 0x0, 0x35}}
return a, nil
}
var __1630485153_networksDownSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\x09\xf2\x0f\x50\x08\x71\x74\xf2\x71\x55\xc8\x4b\x2d\x29\xcf\x2f\xca\x2e\xb6\xe6\x02\x04\x00\x00\xff\xff\xbd\xca\x6a\x0e\x15\x00\x00\x00")
func _1630485153_networksDownSqlBytes() ([]byte, error) {
return bindataRead(
__1630485153_networksDownSql,
"1630485153_networks.down.sql",
)
}
func _1630485153_networksDownSql() (*asset, error) {
bytes, err := _1630485153_networksDownSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1630485153_networks.down.sql", size: 21, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xbb, 0x3e, 0x57, 0xb7, 0xf7, 0x8, 0xbd, 0xb5, 0xc2, 0xea, 0xc, 0x45, 0xb7, 0x7, 0x9, 0xca, 0xe7, 0x48, 0x7e, 0x56, 0x4e, 0x44, 0x78, 0x8e, 0xe3, 0x87, 0x63, 0xaf, 0x16, 0x3f, 0xf9, 0x71}}
return a, nil
}
var __1630485153_networksUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x7c\x90\x4d\x4e\xc3\x30\x10\x85\xf7\x39\xc5\x2c\x5b\x89\x1b\xb0\x72\x5a\xd3\x8e\x08\x0e\x72\x1c\x4a\x57\x96\xe3\x8c\x84\x55\xc7\xae\xec\x14\xc8\xed\x51\x2a\xf1\x53\xa8\xd8\xbe\xef\x9b\x79\x9a\x59\x49\xce\x14\x07\xc5\xca\x8a\x03\xde\x81\xa8\x15\xf0\x67\x6c\x54\x03\x81\xc6\xb7\x98\x0e\x19\x16\x05\x80\x7d\x31\x2e\x68\xd7\x43\x2b\x1a\xdc\x08\xbe\x86\x12\x37\x28\xd4\x79\x40\xb4\x55\x75\xf3\x25\x05\x33\x10\x3c\x31\xb9\xda\x32\x79\x81\xd3\xd1\xea\x53\xf2\x57\x59\xe7\xa3\x3d\x68\x7a\x3f\xfa\x98\x28\xfd\xd4\x66\xea\x6c\x0c\xbf\xb3\x60\x46\xf7\x4a\xda\x9e\x52\xa2\x60\xa7\x8b\xda\x6b\x3c\x4f\x43\x17\xff\xdd\xd0\x93\x75\x83\xf1\xf9\xfb\x46\x14\xea\x5c\x9f\xf5\x48\x79\x84\xb2\xae\x2b\xce\xc4\x1c\x79\x33\x51\xfa\x23\x52\x30\x9d\xa7\x1e\xca\x18\x3d\x99\x30\x47\x8f\x12\x1f\x98\xdc\xc3\x3d\xdf\xc3\xe2\xf3\x8b\xcb\x62\x09\x3b\x54\xdb\xba\x55\x20\xeb\x1d\xae\x6f\x8b\xe2\x23\x00\x00\xff\xff\x47\xbc\x43\xc3\x8a\x01\x00\x00")
func _1630485153_networksUpSqlBytes() ([]byte, error) {
return bindataRead(
__1630485153_networksUpSql,
"1630485153_networks.up.sql",
)
}
func _1630485153_networksUpSql() (*asset, error) {
bytes, err := _1630485153_networksUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1630485153_networks.up.sql", size: 394, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xed, 0x9, 0x1d, 0x3, 0x86, 0xbd, 0xc5, 0xde, 0x3c, 0x1b, 0x40, 0x41, 0x7c, 0x61, 0x8, 0x80, 0x53, 0x87, 0x1b, 0x5a, 0x56, 0xd, 0x88, 0x1d, 0x60, 0x24, 0xce, 0x7b, 0x8f, 0xff, 0xaf, 0x36}}
return a, nil
}
var __1632262444_profile_pictures_show_toUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x72\xf4\x09\x71\x0d\x52\x08\x71\x74\xf2\x71\x55\x28\x4e\x2d\x29\xc9\xcc\x4b\x2f\x56\x70\x74\x71\x51\x70\xf6\xf7\x09\xf5\xf5\x53\x28\x28\xca\x4f\xcb\xcc\x49\x8d\x2f\xc8\x4c\x2e\x29\x2d\x4a\x2d\x8e\x2f\xce\xc8\x2f\x8f\x2f\xc9\x57\xf0\xf4\x0b\x51\xf0\xf3\x0f\x51\xf0\x0b\xf5\xf1\x51\x70\x71\x75\x73\x0c\xf5\x09\x51\x30\xb4\xe6\x02\x04\x00\x00\xff\xff\x2f\x7a\xa3\xb9\x51\x00\x00\x00")
func _1632262444_profile_pictures_show_toUpSqlBytes() ([]byte, error) {
return bindataRead(
__1632262444_profile_pictures_show_toUpSql,
"1632262444_profile_pictures_show_to.up.sql",
)
}
func _1632262444_profile_pictures_show_toUpSql() (*asset, error) {
bytes, err := _1632262444_profile_pictures_show_toUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1632262444_profile_pictures_show_to.up.sql", size: 81, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xc3, 0xa2, 0x5a, 0x94, 0xde, 0x86, 0x2a, 0x29, 0xf5, 0xb3, 0x36, 0xe7, 0x53, 0x81, 0x55, 0xc9, 0xb5, 0xc3, 0xf4, 0x8c, 0x65, 0x2c, 0x4c, 0x48, 0xfd, 0x3c, 0xb7, 0x14, 0xb4, 0xea, 0x7a, 0x13}}
return a, nil
}
var __1635942153_add_telemetry_server_url_to_settingsUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x6c\xcc\xb1\x0a\xc2\x40\x0c\x06\xe0\xdd\xa7\xf8\xe9\x6b\x88\x43\xec\x45\x1c\xe2\x55\xce\xc4\xb5\x53\x10\xa1\x3a\xe4\xa2\xe0\xdb\x8b\x9b\x43\xf7\x8f\x8f\x44\xb9\x41\x69\x2f\x8c\xee\x99\xf7\xe7\xad\x83\x4a\xc1\x38\x89\x9d\x2a\xd2\x17\x7f\x78\xc6\x67\xee\x1e\x6f\x8f\xf9\x15\x0b\xae\xd4\xc6\x23\x35\xd4\x49\x51\x4d\x04\x85\x0f\x64\xa2\x18\x86\xed\xc6\xce\x85\xf4\x2f\xbb\xb0\xae\x2f\xbb\x1f\xff\x06\x00\x00\xff\xff\xfb\x7a\x10\xdd\x80\x00\x00\x00")
func _1635942153_add_telemetry_server_url_to_settingsUpSqlBytes() ([]byte, error) {
return bindataRead(
__1635942153_add_telemetry_server_url_to_settingsUpSql,
"1635942153_add_telemetry_server_url_to_settings.up.sql",
)
}
func _1635942153_add_telemetry_server_url_to_settingsUpSql() (*asset, error) {
bytes, err := _1635942153_add_telemetry_server_url_to_settingsUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1635942153_add_telemetry_server_url_to_settings.up.sql", size: 128, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x6e, 0x9b, 0x1d, 0x39, 0x9c, 0x8d, 0x50, 0x86, 0xdf, 0xe5, 0x81, 0x55, 0xdc, 0x31, 0xcd, 0xb7, 0xc7, 0x5a, 0x67, 0x3b, 0x21, 0x99, 0xa5, 0x74, 0xb8, 0xd3, 0x58, 0xae, 0x29, 0x68, 0x2a, 0x8d}}
return a, nil
}
var __1635942154_add_backup_settingUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x84\xcf\xcb\x0a\xc2\x30\x10\x85\xe1\x7d\x9f\xe2\x3c\x82\xae\x4b\x17\x53\x33\x05\x61\x4c\xa4\x9d\xac\x4b\x5b\xe3\x05\x4b\x11\x12\xdf\x5f\xc4\x0b\x22\x62\xf7\xc3\x37\xff\x21\x51\xae\xa1\x54\x0a\x23\x86\x94\x4e\xd3\x21\x82\x8c\xc1\xca\x89\xdf\x58\xf4\xdd\x70\xbe\x5e\xda\x30\x75\xfd\x18\x76\x28\x9d\x13\x26\x0b\xc3\x15\x79\x51\x68\xed\x39\xcf\xe6\x90\xb1\x8b\xa9\x7d\x48\x58\x5b\x85\x75\x0a\xeb\x45\xde\xcc\x62\xde\x78\x86\xec\x43\x1a\x8e\x3f\x42\x2a\x92\x86\xf3\xcc\x6f\x0d\xe9\x07\xd0\xb0\x7e\x4f\x28\xb0\xfc\x7b\xf7\xfa\x50\xdc\xab\x6e\x01\x00\x00\xff\xff\x3f\xf3\xd1\x35\x1f\x01\x00\x00")
func _1635942154_add_backup_settingUpSqlBytes() ([]byte, error) {
return bindataRead(
__1635942154_add_backup_settingUpSql,
"1635942154_add_backup_setting.up.sql",
)
}
func _1635942154_add_backup_settingUpSql() (*asset, error) {
bytes, err := _1635942154_add_backup_settingUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1635942154_add_backup_setting.up.sql", size: 287, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xb7, 0xe7, 0xfb, 0x70, 0x80, 0x5, 0xb4, 0x7b, 0x67, 0x8, 0x6e, 0x5f, 0x45, 0x17, 0xd9, 0x5f, 0x18, 0x66, 0x2f, 0x8a, 0x4f, 0xd4, 0x15, 0xe5, 0x2b, 0xbb, 0x25, 0x7a, 0x30, 0xad, 0x4c, 0x1a}}
return a, nil
}
var __1637745568_add_auto_message_settingUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x6c\xcc\x31\x0e\xc2\x30\x0c\x05\xd0\xbd\xa7\xf8\x47\x60\xaf\x18\x5c\xec\x4e\xa6\x41\xd4\x99\xa3\x20\xac\x08\x09\xc2\xe0\x70\x7f\x56\x06\x0e\xf0\x1e\xa9\xc9\x15\x46\x8b\x0a\xc2\xc7\x78\xf4\x16\x20\x66\x9c\x92\xe6\xf3\x86\xfa\x19\xef\xf2\xf2\x88\xda\xbc\x78\xaf\xb7\xa7\xdf\xb1\xa4\xa4\x42\x1b\x58\x56\xca\x6a\x58\x49\x77\x99\xa7\x7c\x61\xb2\x9f\x66\x17\xfb\xef\x8f\x38\xcc\xd3\x37\x00\x00\xff\xff\x2f\xce\x8b\x0f\x7a\x00\x00\x00")
func _1637745568_add_auto_message_settingUpSqlBytes() ([]byte, error) {
return bindataRead(
__1637745568_add_auto_message_settingUpSql,
"1637745568_add_auto_message_setting.up.sql",
)
}
func _1637745568_add_auto_message_settingUpSql() (*asset, error) {
bytes, err := _1637745568_add_auto_message_settingUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1637745568_add_auto_message_setting.up.sql", size: 122, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x1d, 0xd8, 0xd2, 0xc2, 0x3a, 0xd7, 0xf1, 0x96, 0x6a, 0x35, 0xe5, 0x5c, 0xb9, 0xed, 0x4b, 0xf2, 0x5f, 0x80, 0x43, 0xca, 0x40, 0x57, 0x7e, 0xd7, 0x41, 0x9f, 0x70, 0x9f, 0xaf, 0x2a, 0xfc, 0x8f}}
return a, nil
}
var __1640111208_nodeconfigUpSql = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xbc\x59\x4f\x6f\xdb\xb8\x12\xbf\xe7\x53\x10\xbd\x34\x05\x7a\x78\xaf\x0f\xef\xb4\x27\xb7\x75\xdb\x60\xb3\xc9\x22\xeb\x6c\xd1\x13\x41\x91\x63\x79\x60\x8a\xe4\x92\x23\x39\xea\xa7\x5f\x50\x96\x6d\xc5\x92\x2d\xaa\x75\x7a\x33\xa0\x1f\xe7\x3f\x7f\x33\x43\x7f\x78\x98\xcf\x16\x73\xb6\x98\xbd\xbf\x9d\x33\x63\x15\x70\x69\xcd\x12\x73\x76\x7d\xc5\x98\x01\xda\x58\xbf\xe6\xa8\xd8\xe3\xdd\x5f\x37\x9f\xef\xe6\x1f\xd9\xcd\xdd\x82\xdd\xdd\x2f\xd8\xdd\xe3\xed\xed\xdb\x2b\xc6\x94\x20\xc1\x15\x7a\xf6\xf7\xec\xe1\xc3\x97\xd9\xc3\xb3\x8f\x6b\xa8\x03\x59\x0f\x27\x01\x8d\xc6\x35\xd4\xbd\x8f\xec\xe3\xfc\xd3\xec\xf1\x76\xc1\x5e\xbd\xda\xe2\xb8\xc2\x20\x6d\x05\xbe\x66\xef\xef\xef\x6f\xe7\xb3\xbb\x3d\x64\x29\x74\x80\x88\xf2\x60\x14\x7c\xaf\x6c\x19\x4e\x63\x34\x06\x02\xc3\x85\x52\x7d\x8b\x8e\x94\x0a\x55\x81\x27\x0c\x90\x84\x36\xa2\x80\x31\x4c\x05\x3e\xa0\x35\xa3\x8a\x1d\xf2\xc2\xaa\x52\x43\x18\x83\x92\x0e\x1c\x8c\xc8\x34\xa8\xd3\x3e\x17\xe2\x89\x3b\x00\x1f\x9e\xa5\xf1\xf0\xc5\x28\x34\xf9\x09\xc4\x56\x38\x0f\x24\xa8\x0c\x3c\x80\xaf\x50\xc2\x69\x55\x2d\xdc\x90\xe3\xa1\x36\xf2\x34\x70\x23\xd6\xe5\xb8\xe5\x11\xf5\xae\x0b\x3b\x81\xcb\x3c\xaa\x1c\x52\xe4\x69\x0d\x34\x8e\xd3\x56\x0a\xcd\x8d\x25\x5c\xa2\x14\x84\xd6\x24\x84\x39\xf3\x76\x13\xc0\x8f\x03\x1d\xf8\x02\x43\x48\x93\x5a\x08\xd4\x31\xec\xe0\x13\xc0\x61\x23\x7c\x31\x45\x26\xf7\x90\x63\x20\x5f\x37\x35\x0e\x61\xb4\xe0\x36\x90\xfd\xcf\x79\x5b\xa1\x4a\xf1\x14\x52\x3c\x0c\xb5\xa1\x15\x10\xca\x48\x34\x3b\xf5\x3b\xd4\x6b\x54\xaf\xd9\x9f\x0f\x37\x7f\xcc\x1e\xbe\xb1\xdf\xe7\xdf\xae\xde\xb0\xaf\x37\x8b\x2f\xf7\x8f\x0b\xf6\x70\xff\xf5\xe6\xe3\x6f\x57\x57\xcf\x38\x6c\x45\xe4\xba\x1c\x36\xaa\x7d\x65\x03\x8d\x39\xed\xac\xa7\xde\xcd\xb8\xbc\xd9\x15\x7a\x2a\x85\xe6\xd1\xa4\xd0\x58\x3f\x68\x5c\x92\xf2\x08\xea\xe8\xbf\x8e\x92\xde\x3e\x3b\xf6\x26\x31\x94\x7e\x6b\x4a\xf3\xe3\x22\xa6\x44\x49\x13\x4d\x41\x27\xf7\x49\x65\x2c\x25\xad\x4b\xd4\xa3\x74\x7c\xd1\x0c\x6a\x9b\x4f\xaa\xbb\xc2\x66\x18\x89\xb5\x0e\x04\xc5\x39\x26\xca\x07\xbb\xe7\x91\x2b\x29\xee\x46\x51\x1a\x2a\xd0\x67\x80\x37\x77\x9f\xee\x5f\xed\x3a\x43\x26\xe4\xba\x74\xc3\x5d\x23\xe0\x77\xe8\x7d\x90\xb6\x70\x91\x44\xb8\xb7\x24\xe8\x3c\xc1\xe6\x9c\x2c\x0f\xa4\xc0\xfb\x17\xa3\x06\xc6\x9e\xe7\xa8\x74\x81\x3c\x88\x62\x52\xa2\x4a\x7f\x2e\x60\x97\x2f\xa4\xdd\xd4\xd5\xb1\x51\xae\x04\x9a\x28\xbb\x1f\xf1\xe6\x4b\xca\xec\xe1\x9d\xe4\x09\xae\x64\xda\xca\x35\x87\x27\xa7\xad\x07\x9f\x72\x02\xa5\x35\x29\x38\x23\x08\x2b\xe0\xb2\xf4\x1e\x8c\xac\x93\x8c\x3e\x3e\x13\xea\x22\xb3\x93\x35\x29\x90\x58\x08\xdd\x2f\x64\x0c\x9c\x20\xd0\x99\x32\x15\x35\xf8\x13\x33\xd1\xcf\x74\xb4\x23\x4a\x64\xd7\xbb\x0c\x4f\xe4\x45\xa9\xcb\x40\xe0\x27\xd5\xf3\x52\x03\x8c\x76\xbc\xe9\x1e\xfc\x90\xdd\x71\xfe\xdf\xb6\x97\xf8\x6b\xb0\xbd\x50\xed\x80\x5d\xa4\xf1\xb0\xeb\xa8\xe4\x6d\x23\x71\x62\x9c\x35\xe6\x2b\xe2\x40\xab\x49\x91\x8e\xbb\x51\x26\x02\x70\x29\xe4\xaa\xcf\x96\x05\x1a\x4e\x3e\x06\x42\xf1\xa5\x17\x32\x8e\x99\x2f\x3b\x65\x1c\xdc\xd8\xe9\x3d\x24\xe0\x74\x06\xd2\x02\x3d\x14\xe9\x49\x31\xde\x4e\xa2\xe0\x39\x59\x87\x72\x6b\x54\xf3\xf3\x42\xc9\x6f\x64\x4d\xb6\xe9\x9f\x12\x3d\xa4\x99\x54\xa0\x19\xde\x93\xf7\x76\xfd\xa7\xed\x9e\x29\xb0\x97\x72\x90\x1d\x37\x45\x57\x86\xd5\xd1\xaa\xd3\xee\x06\x53\x6a\x1d\x15\x18\x42\x1a\xdd\xe4\x73\xeb\xa3\xbe\x97\x6f\xaa\xc7\x6e\x36\x0b\xe7\x14\x8f\xb6\x97\x45\x6a\x04\x73\xa6\x43\x2c\x4b\xad\x9b\x5b\x34\xba\x13\x1f\x96\xae\xf3\x8c\x91\x32\xee\x15\x68\xb0\x28\x0b\xee\xec\x86\x3d\xcc\x67\xb7\x47\x4b\x9d\x13\x21\x6c\xac\x57\xa3\x62\x3a\x7b\xa0\x20\xe0\x1a\x0b\xec\x2f\x3a\x1d\x54\x63\x9f\x07\x8a\xb9\x1e\x20\x2b\x22\x3d\x38\x2c\x16\x10\x82\xc8\x61\x78\x68\x6c\xc3\x73\x30\xe0\x5c\x80\x9c\x90\x6b\xa0\x0e\x98\xa3\xeb\x49\xec\x83\x1c\x80\x3f\x7e\xc6\x6a\xa6\x9d\x9a\x20\x8c\x48\xeb\x61\x4e\x09\xeb\x40\xc8\x6a\xf0\xc2\xc8\xbe\xb7\x99\xb6\xb6\xe0\x4b\xd4\x91\xe9\x8a\x94\xba\x69\x6a\xd6\x17\xdb\xab\xf9\x8b\xb6\xe8\xce\x65\xe1\x2a\xe3\x2e\x79\x54\xc6\x5f\x3a\x2a\x37\x66\x06\xbb\xa4\x4c\x0b\xb9\x6e\x9e\xf6\x54\xfb\x8e\x75\xdd\xbc\xb3\x6c\x13\x75\x99\xfe\xd1\x4a\x4b\x21\xd8\x21\xee\xa9\xde\xfd\x9a\x67\x89\x35\x80
func _1640111208_nodeconfigUpSqlBytes() ([]byte, error) {
return bindataRead(
__1640111208_nodeconfigUpSql,
"1640111208_nodeconfig.up.sql",
)
}
func _1640111208_nodeconfigUpSql() (*asset, error) {
bytes, err := _1640111208_nodeconfigUpSqlBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "1640111208_nodeconfig.up.sql", size: 7659, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0x8e, 0x5a, 0xc6, 0xed, 0x6, 0xcb, 0x51, 0x8b, 0x78, 0xe9, 0x10, 0x37, 0xd1, 0xad, 0x9b, 0x76, 0x9a, 0xb9, 0x72, 0x85, 0xe7, 0x8a, 0x7f, 0xf0, 0x81, 0xf8, 0x33, 0x59, 0x67, 0x8e, 0xeb, 0xb1}}
return a, nil
}
var _docGo = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\x2c\xca\xb1\x11\x85\x20\x0c\x06\xe0\x9e\x29\xb2\x00\x49\xff\xb6\xc9\x83\xf8\x1f\x87\x26\x08\x9c\xf3\xdb\xd8\x7f\x43\x4b\x57\x18\xad\xfb\x4c\x49\x04\xf1\x83\xb9\x4d\xdd\x46\x88\xfc\x6f\x5e\x75\x2b\xe5\xd1\x41\x57\xc3\xd4\xdd\xc2\xd7\x98\xf6\x78\x54\x2b\x07\x28\x07\x31\xcb\x07\x19\x41\x2c\xe9\x0d\x00\x00\xff\xff\x09\x8c\xb2\x2f\x55\x00\x00\x00")
func docGoBytes() ([]byte, error) {
return bindataRead(
_docGo,
"doc.go",
)
}
func docGo() (*asset, error) {
bytes, err := docGoBytes()
if err != nil {
return nil, err
}
URL unfurling (initial implementation) (#3471) This is the initial implementation for the new URL unfurling requirements. The most important one is that only the message sender will pay the privacy cost for unfurling and extracting metadata from websites. Once the message is sent, the unfurled data will be stored at the protocol level and receivers will just profit and happily decode the metadata to render it. Further development of this URL unfurling capability will be mostly guided by issues created on clients. For the moment in status-mobile: https://github.com/status-im/status-mobile/labels/url-preview - https://github.com/status-im/status-mobile/issues/15918 - https://github.com/status-im/status-mobile/issues/15917 - https://github.com/status-im/status-mobile/issues/15910 - https://github.com/status-im/status-mobile/issues/15909 - https://github.com/status-im/status-mobile/issues/15908 - https://github.com/status-im/status-mobile/issues/15906 - https://github.com/status-im/status-mobile/issues/15905 ### Terminology In the code, I've tried to stick to the word "unfurl URL" to really mean the process of extracting metadata from a website, sort of lower level. I use "link preview" to mean a higher level structure which is enriched by unfurled data. "link preview" is also how designers refer to it. ### User flows 1. Carol needs to see link previews while typing in the chat input field. Notice from the diagram nothing is persisted and that status-go endpoints are essentially stateless. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_getTextURLs Server-->>Client: Normalized URLs Client->>Client: Render cached unfurled URLs Client->>Server: Unfurl non-cached URLs.\nCall wakuext_unfurlURLs Server->>Website: Fetch metadata Website-->>Server: Metadata (thumbnail URL, title, etc) Server->>Website: Fetch thumbnail Server->>Website: Fetch favicon Website-->>Server: Favicon bytes Website-->>Server: Thumbnail bytes Server->>Server: Decode & process images Server-->>Client: Unfurled data (thumbnail data URI, etc) #+end_src ``` ``` ,------. ,------. ,-------. |Client| |Server| |Website| `--+---' `--+---' `---+---' | Call wakuext_getTextURLs | | | ---------------------------------------> | | | | | Normalized URLs | | | <- - - - - - - - - - - - - - - - - - - - | | | | |----. | | | | Render cached unfurled URLs | | |<---' | | | | | | Unfurl non-cached URLs. | | | Call wakuext_unfurlURLs | | | ---------------------------------------> | | | | | | Fetch metadata | | | ------------------------------------> | | | | | Metadata (thumbnail URL, title, etc)| | | <- - - - - - - - - - - - - - - - - - | | | | | Fetch thumbnail | | | ------------------------------------> | | | | | Fetch favicon | | | ------------------------------------> | | | | | Favicon bytes | | | <- - - - - - - - - - - - - - - - - - | | | | | Thumbnail bytes | | | <- - - - - - - - - - - - - - - - - - | | | | |----. | | | | Decode & process images | | |<---' | | | | | Unfurled data (thumbnail data URI, etc)| | | <- - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,---+---. |Client| |Server| |Website| `------' `------' `-------' ``` 2. Carol sends the text message with link previews in the RPC request wakuext_sendChatMessages. status-go assumes the link previews are good because it can't and shouldn't attempt to re-unfurl them. ``` #+begin_src plantuml :results verbatim Client->>Server: Call wakuext_sendChatMessages Server->>Server: Transform link previews to\nbe proto-marshalled Server->DB: Write link previews serialized as JSON Server-->>Client: Updated message response #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | Call wakuext_sendChatMessages| | | -----------------------------> | | | | | |----. | | | | Transform link previews to | | |<---' be proto-marshalled | | | | | | | | | Write link previews serialized as JSON| | | --------------------------------------> | | | | Updated message response | | | <- - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` 3. The message was sent over waku and persisted locally in Carol's device. She should now see the link previews in the chat history. There can be many link previews shared by other chat members, therefore it is important to serve the assets via the media server to avoid overloading the ReactNative bridge with lots of big JSON payloads containing base64 encoded data URIs (maybe this concern is meaningless for desktop). When a client is rendering messages with link previews, they will have the field linkPreviews, and the thumbnail URL will point to the local media server. ``` #+begin_src plantuml :results verbatim Client->>Server: GET /link-preview/thumbnail (media server) Server->>DB: Read from user_messages.unfurled_links Server->Server: Unmarshal JSON Server-->>Client: HTTP Content-Type: image/jpeg/etc #+end_src ``` ``` ,------. ,------. ,--. |Client| |Server| |DB| `--+---' `--+---' `+-' | GET /link-preview/thumbnail (media server)| | | ------------------------------------------> | | | | | | Read from user_messages.unfurled_links| | | --------------------------------------> | | | | |----. | | | | Unmarshal JSON | | |<---' | | | | | HTTP Content-Type: image/jpeg/etc | | | <- - - - - - - - - - - - - - - - - - - - - | ,--+---. ,--+---. ,+-. |Client| |Server| |DB| `------' `------' `--' ``` ### Some limitations of the current implementation The following points will become separate issues in status-go that I'll work on over the next couple weeks. In no order of importance: - Improve how multiple links are fetched; retries on failure and testing how unfurling behaves around the timeout limits (deterministically, not by making real HTTP calls as I did). https://github.com/status-im/status-go/issues/3498 - Unfurl favicons and store them in the protobuf too. - For this PR, I added unfurling support only for websites with OpenGraph https://ogp.me/ meta tags. Other unfurlers will be implemented on demand. The next one will probably be for oEmbed https://oembed.com/, the protocol supported by YouTube, for example. - Resize and/or compress thumbnails (and favicons). Often times, thumbnails are huge for the purposes of link previews. There is already support for compressing JPEGs in status-go, but I prefer to work with compression in a separate PR because I'd like to also solve the problem for PNGs (probably convert them to JPEGs, plus compress them). This would be a safe choice for thumbnails, favicons not so much because transparency is desirable. - Editing messages is not yet supported. - I haven't coded any artificial limit on the number of previews or on the size of the thumbnail payload. This will be done in a separate issue. I have heard the ideal solution may be to split messages into smaller chunks of ~125 KiB because of libp2p, but that might be too complicated at this stage of the product (?). - Link preview deletion. - For the moment, OpenGraph metadata is extracted by requesting data for the English language (and fallback to whatever is available). In the future, we'll want to unfurl by respecting the user's local device language. Some websites, like GoDaddy, are already localized based on the device's IP, but many aren't. - The website's description text should be limited by a certain number of characters, especially because it's outside our control. Exactly how much has not been decided yet, so it'll be done separately. - URL normalization can be tricky, so I implemented only the basics to help with caching. For example, the url https://status.im and HTTPS://status.im are considered identical. Also, a URL is considered valid for unfurling if its TLD exists according to publicsuffix.EffectiveTLDPlusOne. This was essential, otherwise the default Go url.Parse approach would consider many invalid URLs valid, and thus the server would waste resources trying to unfurl the unfurleable. ### Other requirements - If the message is edited, the link previews should reflect the edited text, not the original one. This has been aligned with the design team as well. - If the website's thumbnail or the favicon can't be fetched, just ignore them. The only mandatory piece of metadata is the website's title and URL. - Link previews in clients should be generated in near real-time, that is, as the user types, previews are updated. In mobile this performs very well, and it's what other clients like WhatsApp, Telegram, and Facebook do. ### Decisions - While the user typing in the input field, the client is constantly (debounced) asking status-go to parse the text and extract normalized URLs and then the client checks if they're already in its in-memory cache. If they are, no RPC call is made. I chose this approach to achieve the best possible performance in mobile and avoid the whole RPC overhead, since the chat experience is already not smooth enough. The mobile client uses URLs as cache keys in a hashmap, i.e. if the key is present, it means the preview is readily available (naive, but good enough for now). This decision also gave me more flexibility to find the best UX at this stage of the feature. - Due to the requirement that users should be able to see independent loading indicators for each link preview, when status-go can't unfurl a URL, it doesn't return it in the response. - As an initial implementation, I added the BLOB column unfurled_links to the user_messages table. The preview data is then serialized as JSON before being stored in this column. I felt that creating a separate table and the related code for this initial PR would be inconvenient. Is that reasonable to you? Once things stabilize I can create a proper table if we want to avoid this kind of solution with serialized columns.
2023-05-18 20:43:06 +02:00
info := bindataFileInfo{name: "doc.go", size: 85, mode: os.FileMode(0644), modTime: time.Unix(1662753054, 0)}
a := &asset{bytes: bytes, info: info, digest: [32]uint8{0xe5, 0xd2, 0xea, 0xc5, 0xd, 0xc4, 0x7f, 0x95, 0x8e, 0xd5, 0xf5, 0x96, 0xf2, 0x1b, 0xcb, 0xc7, 0xc2, 0x46, 0x1, 0x78, 0x1d, 0x5d, 0x59, 0x19, 0x99, 0xdd, 0x5b, 0xf5, 0x63, 0xa5, 0x25, 0xb8}}
return a, nil
}
// Asset loads and returns the asset for the given name.
// It returns an error if the asset could not be found or
// could not be loaded.
func Asset(name string) ([]byte, error) {
canonicalName := strings.Replace(name, "\\", "/", -1)
if f, ok := _bindata[canonicalName]; ok {
a, err := f()
if err != nil {
return nil, fmt.Errorf("Asset %s can't read by error: %v", name, err)
}
return a.bytes, nil
}
return nil, fmt.Errorf("Asset %s not found", name)
}
// AssetString returns the asset contents as a string (instead of a []byte).
func AssetString(name string) (string, error) {
data, err := Asset(name)
return string(data), err
}
// MustAsset is like Asset but panics when Asset would return an error.
// It simplifies safe initialization of global variables.
func MustAsset(name string) []byte {
a, err := Asset(name)
if err != nil {
panic("asset: Asset(" + name + "): " + err.Error())
}
return a
}
// MustAssetString is like AssetString but panics when Asset would return an
// error. It simplifies safe initialization of global variables.
func MustAssetString(name string) string {
return string(MustAsset(name))
}
// AssetInfo loads and returns the asset info for the given name.
// It returns an error if the asset could not be found or
// could not be loaded.
func AssetInfo(name string) (os.FileInfo, error) {
canonicalName := strings.Replace(name, "\\", "/", -1)
if f, ok := _bindata[canonicalName]; ok {
a, err := f()
if err != nil {
return nil, fmt.Errorf("AssetInfo %s can't read by error: %v", name, err)
}
return a.info, nil
}
return nil, fmt.Errorf("AssetInfo %s not found", name)
}
// AssetDigest returns the digest of the file with the given name. It returns an
// error if the asset could not be found or the digest could not be loaded.
func AssetDigest(name string) ([sha256.Size]byte, error) {
canonicalName := strings.Replace(name, "\\", "/", -1)
if f, ok := _bindata[canonicalName]; ok {
a, err := f()
if err != nil {
return [sha256.Size]byte{}, fmt.Errorf("AssetDigest %s can't read by error: %v", name, err)
}
return a.digest, nil
}
return [sha256.Size]byte{}, fmt.Errorf("AssetDigest %s not found", name)
}
// Digests returns a map of all known files and their checksums.
func Digests() (map[string][sha256.Size]byte, error) {
mp := make(map[string][sha256.Size]byte, len(_bindata))
for name := range _bindata {
a, err := _bindata[name]()
if err != nil {
return nil, err
}
mp[name] = a.digest
}
return mp, nil
}
// AssetNames returns the names of the assets.
func AssetNames() []string {
names := make([]string, 0, len(_bindata))
for name := range _bindata {
names = append(names, name)
}
return names
}
// _bindata is a table, holding each asset generator, mapped to its name.
var _bindata = map[string]func() (*asset, error){
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0001_app.down.sql": _0001_appDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0001_app.up.sql": _0001_appUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0002_tokens.down.sql": _0002_tokensDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0002_tokens.up.sql": _0002_tokensUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0003_settings.down.sql": _0003_settingsDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0003_settings.up.sql": _0003_settingsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0004_pending_stickers.down.sql": _0004_pending_stickersDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0004_pending_stickers.up.sql": _0004_pending_stickersUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0005_waku_mode.down.sql": _0005_waku_modeDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0005_waku_mode.up.sql": _0005_waku_modeUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0006_appearance.up.sql": _0006_appearanceUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0007_enable_waku_default.up.sql": _0007_enable_waku_defaultUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0008_add_push_notifications.up.sql": _0008_add_push_notificationsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0009_enable_sending_push_notifications.down.sql": _0009_enable_sending_push_notificationsDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0009_enable_sending_push_notifications.up.sql": _0009_enable_sending_push_notificationsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0010_add_block_mentions.down.sql": _0010_add_block_mentionsDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0010_add_block_mentions.up.sql": _0010_add_block_mentionsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0011_allow_webview_permission_requests.down.sql": _0011_allow_webview_permission_requestsDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0011_allow_webview_permission_requests.up.sql": _0011_allow_webview_permission_requestsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0012_pending_transactions.down.sql": _0012_pending_transactionsDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0012_pending_transactions.up.sql": _0012_pending_transactionsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0013_favourites.down.sql": _0013_favouritesDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0013_favourites.up.sql": _0013_favouritesUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0014_add_use_mailservers.down.sql": _0014_add_use_mailserversDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0014_add_use_mailservers.up.sql": _0014_add_use_mailserversUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0015_link_previews.down.sql": _0015_link_previewsDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0015_link_previews.up.sql": _0015_link_previewsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0016_local_notifications_preferences.down.sql": _0016_local_notifications_preferencesDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0016_local_notifications_preferences.up.sql": _0016_local_notifications_preferencesUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0017_bookmarks.down.sql": _0017_bookmarksDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0017_bookmarks.up.sql": _0017_bookmarksUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0018_profile_pictures_visibility.up.sql": _0018_profile_pictures_visibilityUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0019_blocks_ranges_extra_data.up.sql": _0019_blocks_ranges_extra_dataUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0020_metrics.up.sql": _0020_metricsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0021_add_session_id_to_metrics.up.sql": _0021_add_session_id_to_metricsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"0022_pending_transfers.up.sql": _0022_pending_transfersUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1618237885_settings_anon_metrics_should_send.up.sql": _1618237885_settings_anon_metrics_should_sendUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1618395756_contacts_only.up.sql": _1618395756_contacts_onlyUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1622184614_add_default_sync_period.up.sql": _1622184614_add_default_sync_periodUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1625872445_user_status.up.sql": _1625872445_user_statusUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1627983977_add_gif_to_settings.up.sql": _1627983977_add_gif_to_settingsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1628580203_add_hidden_account.up.sql": _1628580203_add_hidden_accountUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1629123384_add_id_to_app_metrics.up.sql": _1629123384_add_id_to_app_metricsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1630401853_add_opensea_enabled_to_settings.up.sql": _1630401853_add_opensea_enabled_to_settingsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1630464455_create-saved_addresses-table.down.sql": _1630464455_createSaved_addressesTableDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1630464455_create-saved_addresses-table.up.sql": _1630464455_createSaved_addressesTableUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1630485153_networks.down.sql": _1630485153_networksDownSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1630485153_networks.up.sql": _1630485153_networksUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1632262444_profile_pictures_show_to.up.sql": _1632262444_profile_pictures_show_toUpSql,
"1635942153_add_telemetry_server_url_to_settings.up.sql": _1635942153_add_telemetry_server_url_to_settingsUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1635942154_add_backup_setting.up.sql": _1635942154_add_backup_settingUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1637745568_add_auto_message_setting.up.sql": _1637745568_add_auto_message_settingUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"1640111208_nodeconfig.up.sql": _1640111208_nodeconfigUpSql,
feat: introduce messenger APIs to extract discord channels As part of the new Discord <-> Status Community Import functionality, we're adding an API that extracts all discord categories and channels from a previously exported discord export file. These APIs can be used in clients to show the user what categories and channels will be imported later on. There are two APIs: 1. `Messenger.ExtractDiscordCategoriesAndChannels(filesToimport []string) (*MessengerResponse, map[string]*discord.ImportError)` This takes a list of exported discord export (JSON) files (typically one per channel), reads them, and extracts the categories and channels into dedicated data structures (`[]DiscordChannel` and `[]DiscordCategory`) It also returns the oldest message timestamp found in all extracted channels. The API is synchronous and returns the extracted data as a `*MessengerResponse`. This allows to make the API available status-go's RPC interface. The error case is a `map[string]*discord.ImportError` where each key is a file path of a JSON file that we tried to extract data from, and the value a `discord.ImportError` which holds an error message and an error code, allowing for distinguishing between "critical" errors and "non-critical" errors. 2. `Messenger.RequestExtractDiscordCategoriesAndChannels(filesToImport []string)` This is the asynchronous counterpart to `ExtractDiscordCategoriesAndChannels`. The reason this API has been added is because discord servers can have a lot of message and channel data, which causes `ExtractDiscordCategoriesAndChannels` to block the thread for too long, making apps potentially feel like they are stuck. This API runs inside a go routine, eventually calls `ExtractDiscordCategoriesAndChannels`, and then emits a newly introduced `DiscordCategoriesAndChannelsExtractedSignal` that clients can react to. Failure of extraction has to be determined by the `discord.ImportErrors` emitted by the signal. **A note about exported discord history files** We expect users to export their discord histories via the [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter/wiki/GUI%2C-CLI-and-Formats-explained#exportguild) tool. The tool allows to export the data in different formats, such as JSON, HTML and CSV. We expect users to have their data exported as JSON. Closes: https://github.com/status-im/status-desktop/issues/6690
2022-07-13 11:33:53 +02:00
"doc.go": docGo,
}
// AssetDir returns the file names below a certain
// directory embedded in the file by go-bindata.
// For example if you run go-bindata on data/... and data contains the
// following hierarchy:
// data/
// foo.txt
// img/
// a.png
// b.png
// then AssetDir("data") would return []string{"foo.txt", "img"},
// AssetDir("data/img") would return []string{"a.png", "b.png"},
// AssetDir("foo.txt") and AssetDir("notexist") would return an error, and
// AssetDir("") will return []string{"data"}.
func AssetDir(name string) ([]string, error) {
node := _bintree
if len(name) != 0 {
canonicalName := strings.Replace(name, "\\", "/", -1)
pathList := strings.Split(canonicalName, "/")
for _, p := range pathList {
node = node.Children[p]
if node == nil {
return nil, fmt.Errorf("Asset %s not found", name)
}
}
}
if node.Func != nil {
return nil, fmt.Errorf("Asset %s not found", name)
}
rv := make([]string, 0, len(node.Children))
for childName := range node.Children {
rv = append(rv, childName)
}
return rv, nil
}
type bintree struct {
Func func() (*asset, error)
Children map[string]*bintree
}
var _bintree = &bintree{nil, map[string]*bintree{
"0001_app.down.sql": &bintree{_0001_appDownSql, map[string]*bintree{}},
"0001_app.up.sql": &bintree{_0001_appUpSql, map[string]*bintree{}},
"0002_tokens.down.sql": &bintree{_0002_tokensDownSql, map[string]*bintree{}},
"0002_tokens.up.sql": &bintree{_0002_tokensUpSql, map[string]*bintree{}},
"0003_settings.down.sql": &bintree{_0003_settingsDownSql, map[string]*bintree{}},
"0003_settings.up.sql": &bintree{_0003_settingsUpSql, map[string]*bintree{}},
"0004_pending_stickers.down.sql": &bintree{_0004_pending_stickersDownSql, map[string]*bintree{}},
"0004_pending_stickers.up.sql": &bintree{_0004_pending_stickersUpSql, map[string]*bintree{}},
"0005_waku_mode.down.sql": &bintree{_0005_waku_modeDownSql, map[string]*bintree{}},
"0005_waku_mode.up.sql": &bintree{_0005_waku_modeUpSql, map[string]*bintree{}},
"0006_appearance.up.sql": &bintree{_0006_appearanceUpSql, map[string]*bintree{}},
"0007_enable_waku_default.up.sql": &bintree{_0007_enable_waku_defaultUpSql, map[string]*bintree{}},
"0008_add_push_notifications.up.sql": &bintree{_0008_add_push_notificationsUpSql, map[string]*bintree{}},
"0009_enable_sending_push_notifications.down.sql": &bintree{_0009_enable_sending_push_notificationsDownSql, map[string]*bintree{}},
"0009_enable_sending_push_notifications.up.sql": &bintree{_0009_enable_sending_push_notificationsUpSql, map[string]*bintree{}},
"0010_add_block_mentions.down.sql": &bintree{_0010_add_block_mentionsDownSql, map[string]*bintree{}},
"0010_add_block_mentions.up.sql": &bintree{_0010_add_block_mentionsUpSql, map[string]*bintree{}},
"0011_allow_webview_permission_requests.down.sql": &bintree{_0011_allow_webview_permission_requestsDownSql, map[string]*bintree{}},
"0011_allow_webview_permission_requests.up.sql": &bintree{_0011_allow_webview_permission_requestsUpSql, map[string]*bintree{}},
"0012_pending_transactions.down.sql": &bintree{_0012_pending_transactionsDownSql, map[string]*bintree{}},
"0012_pending_transactions.up.sql": &bintree{_0012_pending_transactionsUpSql, map[string]*bintree{}},
"0013_favourites.down.sql": &bintree{_0013_favouritesDownSql, map[string]*bintree{}},
"0013_favourites.up.sql": &bintree{_0013_favouritesUpSql, map[string]*bintree{}},
"0014_add_use_mailservers.down.sql": &bintree{_0014_add_use_mailserversDownSql, map[string]*bintree{}},
"0014_add_use_mailservers.up.sql": &bintree{_0014_add_use_mailserversUpSql, map[string]*bintree{}},
"0015_link_previews.down.sql": &bintree{_0015_link_previewsDownSql, map[string]*bintree{}},
"0015_link_previews.up.sql": &bintree{_0015_link_previewsUpSql, map[string]*bintree{}},
"0016_local_notifications_preferences.down.sql": &bintree{_0016_local_notifications_preferencesDownSql, map[string]*bintree{}},
"0016_local_notifications_preferences.up.sql": &bintree{_0016_local_notifications_preferencesUpSql, map[string]*bintree{}},
"0017_bookmarks.down.sql": &bintree{_0017_bookmarksDownSql, map[string]*bintree{}},
"0017_bookmarks.up.sql": &bintree{_0017_bookmarksUpSql, map[string]*bintree{}},
"0018_profile_pictures_visibility.up.sql": &bintree{_0018_profile_pictures_visibilityUpSql, map[string]*bintree{}},
"0019_blocks_ranges_extra_data.up.sql": &bintree{_0019_blocks_ranges_extra_dataUpSql, map[string]*bintree{}},
"0020_metrics.up.sql": &bintree{_0020_metricsUpSql, map[string]*bintree{}},
"0021_add_session_id_to_metrics.up.sql": &bintree{_0021_add_session_id_to_metricsUpSql, map[string]*bintree{}},
"0022_pending_transfers.up.sql": &bintree{_0022_pending_transfersUpSql, map[string]*bintree{}},
"1618237885_settings_anon_metrics_should_send.up.sql": &bintree{_1618237885_settings_anon_metrics_should_sendUpSql, map[string]*bintree{}},
"1618395756_contacts_only.up.sql": &bintree{_1618395756_contacts_onlyUpSql, map[string]*bintree{}},
"1622184614_add_default_sync_period.up.sql": &bintree{_1622184614_add_default_sync_periodUpSql, map[string]*bintree{}},
"1625872445_user_status.up.sql": &bintree{_1625872445_user_statusUpSql, map[string]*bintree{}},
"1627983977_add_gif_to_settings.up.sql": &bintree{_1627983977_add_gif_to_settingsUpSql, map[string]*bintree{}},
"1628580203_add_hidden_account.up.sql": &bintree{_1628580203_add_hidden_accountUpSql, map[string]*bintree{}},
"1629123384_add_id_to_app_metrics.up.sql": &bintree{_1629123384_add_id_to_app_metricsUpSql, map[string]*bintree{}},
"1630401853_add_opensea_enabled_to_settings.up.sql": &bintree{_1630401853_add_opensea_enabled_to_settingsUpSql, map[string]*bintree{}},
"1630464455_create-saved_addresses-table.down.sql": &bintree{_1630464455_createSaved_addressesTableDownSql, map[string]*bintree{}},
"1630464455_create-saved_addresses-table.up.sql": &bintree{_1630464455_createSaved_addressesTableUpSql, map[string]*bintree{}},
"1630485153_networks.down.sql": &bintree{_1630485153_networksDownSql, map[string]*bintree{}},
"1630485153_networks.up.sql": &bintree{_1630485153_networksUpSql, map[string]*bintree{}},
"1632262444_profile_pictures_show_to.up.sql": &bintree{_1632262444_profile_pictures_show_toUpSql, map[string]*bintree{}},
"1635942153_add_telemetry_server_url_to_settings.up.sql": &bintree{_1635942153_add_telemetry_server_url_to_settingsUpSql, map[string]*bintree{}},
"1635942154_add_backup_setting.up.sql": &bintree{_1635942154_add_backup_settingUpSql, map[string]*bintree{}},
"1637745568_add_auto_message_setting.up.sql": &bintree{_1637745568_add_auto_message_settingUpSql, map[string]*bintree{}},
"1640111208_nodeconfig.up.sql": &bintree{_1640111208_nodeconfigUpSql, map[string]*bintree{}},
"doc.go": &bintree{docGo, map[string]*bintree{}},
}}
// RestoreAsset restores an asset under the given directory.
func RestoreAsset(dir, name string) error {
data, err := Asset(name)
if err != nil {
return err
}
info, err := AssetInfo(name)
if err != nil {
return err
}
err = os.MkdirAll(_filePath(dir, filepath.Dir(name)), os.FileMode(0755))
if err != nil {
return err
}
err = ioutil.WriteFile(_filePath(dir, name), data, info.Mode())
if err != nil {
return err
}
return os.Chtimes(_filePath(dir, name), info.ModTime(), info.ModTime())
}
// RestoreAssets restores an asset under the given directory recursively.
func RestoreAssets(dir, name string) error {
children, err := AssetDir(name)
// File
if err != nil {
return RestoreAsset(dir, name)
}
// Dir
for _, child := range children {
err = RestoreAssets(dir, filepath.Join(name, child))
if err != nil {
return err
}
}
return nil
}
func _filePath(dir, name string) string {
canonicalName := strings.Replace(name, "\\", "/", -1)
return filepath.Join(append([]string{dir}, strings.Split(canonicalName, "/")...)...)
}