CardDAV: workaround for Yahoo! Contacts encoding bug

A GET of a vCard returns data that seems to have gone through an
encoding step for transmission as part of HTML or XML: special
characters are replaced by entity codes. Worse, that step seems to be
done multiple times, leading to:
   backslash \ single quote ' double quote "
instead of:
   backslash \ single quote ' double quote "

Note the double encoding of \ as \ - really broken.

This patch works around that by unconditionally and repeatedly
replacing XML entities with the corresponding characters until no
entities are left in the item data. It would be nice to only do that
when such a broken server is detected, but detecting that is hard and
not implemented.

The downside obviously is that entities that the user (for whatever
reason) wants to have in his contact data will be replaced.
This commit is contained in:
Patrick Ohly 2011-01-26 14:40:12 +01:00
parent 14a5612134
commit 8d908f6f00
5 changed files with 200 additions and 0 deletions

View file

@ -27,6 +27,27 @@ std::string CardDAVSource::getDescription(const string &luid)
return "";
}
void CardDAVSource::readItem(const std::string &luid, std::string &item, bool raw)
{
WebDAVSource::readItem(luid, item, raw);
// Workaround for Yahoo! Contacts: it encodes
// backslash \ single quote ' double quote "
// as
// NOTE;CHARSET=utf-8;ENCODING=QUOTED-PRINTABLE: =
// backslash \ single quote ' double quote "
//
// This is just plain wrong. The backslash even seems to be
// encoded twice: \ -> \ -> \
//
// I don't see any way to detect this broken encoding reliably
// at runtime. In the meantime deal with it by always replacing
// HTML enties until none are left. Obviously that means that
// it is impossible to put HTML entities into a contact value.
// TODO: better detection of this server bug.
replaceHTMLEntities(item);
}
bool CardDAVSource::typeMatches(const StringMap &props) const
{
StringMap::const_iterator it = props.find("DAV::resourcetype");

View file

@ -33,6 +33,9 @@ class CardDAVSource : public WebDAVSource,
// implementation of SyncSourceLogging callback
virtual std::string getDescription(const string &luid);
// implements vCard specific conversions on top of generic WebDAV readItem()
void readItem(const std::string &luid, std::string &item, bool raw);
protected:
// implementation of WebDAVSource callbacks
virtual std::string serviceType() const { return "carddav"; }

View file

@ -136,6 +136,120 @@ WebDAVSource::WebDAVSource(const SyncSourceParams &params,
}
}
void WebDAVSource::replaceHTMLEntities(std::string &item)
{
while (true) {
bool found = false;
std::string decoded;
size_t last = 0; // last character copied
size_t next = 0; // next character to be looked at
while (true) {
next = item.find('&', next);
size_t start = next;
if (next == item.npos) {
// finish decoding
if (found) {
decoded.append(item, last, item.size() - last);
}
break;
}
next++;
size_t end = next;
while (end != item.size()) {
char c = item[end];
if ((c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
(c == '#')) {
end++;
} else {
break;
}
}
if (end == item.size() || item[end] != ';') {
// Invalid character between & and ; or no
// proper termination? No entity, continue
// decoding in next loop iteration.
next = end;
continue;
}
unsigned char c = 0;
if (next < end) {
if (item[next] == '#') {
// decimal or hexadecimal number
next++;
if (next < end) {
int base;
if (item[next] == 'x') {
// hex
base = 16;
next++;
} else {
base = 10;
}
while (next < end) {
unsigned char v = tolower(item[next]);
if (v >= '0' && v <= '9') {
next++;
c = c * base + (v - '0');
} else if (base == 16 && v >= 'a' && v <= 'f') {
next++;
c = c * base + (v - 'a') + 10;
} else {
// invalid character, abort scanning of this entity
break;
}
}
}
} else {
// check for entities
struct {
const char *m_name;
unsigned char m_character;
} entities[] = {
// core entries, extend as needed...
{ "quot", '"' },
{ "amp", '&' },
{ "apos", '\'' },
{ "lt", '<' },
{ "gt", '>' },
{ NULL, 0 }
};
int i = 0;
while (true) {
const char *name = entities[i].m_name;
if (!name) {
break;
}
if (!item.compare(next, end - next, name)) {
c = entities[i].m_character;
next += strlen(name);
break;
}
i++;
}
}
if (next == end) {
// swallowed all characters in entity, must be valid:
// copy all uncopied characters plus the new one
found = true;
decoded.reserve(item.size());
decoded.append(item, last, start - last);
decoded.append(1, c);
last = end + 1;
}
}
next = end + 1;
}
if (found) {
item = decoded;
} else {
break;
}
}
}
void WebDAVSource::open()
{
SE_LOG_DEBUG(NULL, NULL, "using libneon %s with %s",

View file

@ -34,6 +34,12 @@ class WebDAVSource : public TrackingSyncSource, private boost::noncopyable
WebDAVSource(const SyncSourceParams &params,
const boost::shared_ptr<Neon::Settings> &settings);
/**
* Utility function: replace HTML entities until none are left
* in the decoded string - for Yahoo! Contacts bug.
*/
static void replaceHTMLEntities(std::string &item);
protected:
/* implementation of SyncSource interface */
virtual void open();

View file

@ -73,6 +73,7 @@ static RegisterSyncSource registerMe("DAV",
class WebDAVTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(WebDAVTest);
CPPUNIT_TEST(testInstantiate);
CPPUNIT_TEST(testHTMLEntities);
CPPUNIT_TEST_SUITE_END();
protected:
@ -85,6 +86,61 @@ protected:
source.reset((TestingSyncSource *)SyncSource::createTestingSource("CardDAV", "CardDAV:text/vcard", true));
source.reset((TestingSyncSource *)SyncSource::createTestingSource("CardDAV", "CardDAV:text/x-vcard", true));
}
std::string decode(const char *item) {
std::string buffer = item;
CardDAVSource::replaceHTMLEntities(buffer);
return buffer;
}
void testHTMLEntities() {
// named entries
CPPUNIT_ASSERT_EQUAL(std::string("\" & ' < >"),
decode("&quot; &amp; &apos; &lt; &gt;"));
// decimal and hex, encoded in different ways
CPPUNIT_ASSERT_EQUAL(std::string("\" & ' < >"),
decode("&#x22; &#0038; &#x0027; &#x3C; &#x3e;"));
// no translation needed
CPPUNIT_ASSERT_EQUAL(std::string("hello world"),
decode("hello world"));
// entity at start
CPPUNIT_ASSERT_EQUAL(std::string("< "),
decode("&lt; "));
// entity at end
CPPUNIT_ASSERT_EQUAL(std::string(" <"),
decode(" &lt;"));
// double quotation
CPPUNIT_ASSERT_EQUAL(std::string("\\"),
decode("&amp;#92;"));
CPPUNIT_ASSERT_EQUAL(std::string("ampersand entity & less-than entity <"),
decode("ampersand entity &amp; less-than entity &amp;lt;"));
// invalid entities
CPPUNIT_ASSERT_EQUAL(std::string(" &"),
decode(" &"));
CPPUNIT_ASSERT_EQUAL(std::string("&"),
decode("&"));
CPPUNIT_ASSERT_EQUAL(std::string("& "),
decode("& "));
CPPUNIT_ASSERT_EQUAL(std::string("&;"),
decode("&;"));
CPPUNIT_ASSERT_EQUAL(std::string("&; "),
decode("&; "));
CPPUNIT_ASSERT_EQUAL(std::string(" &; "),
decode(" &; "));
CPPUNIT_ASSERT_EQUAL(std::string(" &;"),
decode(" &;"));
CPPUNIT_ASSERT_EQUAL(std::string("&xyz;"),
decode("&xyz;"));
CPPUNIT_ASSERT_EQUAL(std::string("&#1f;"),
decode("&#1f;"));
CPPUNIT_ASSERT_EQUAL(std::string("&#1f;"),
decode("&#1f;"));
CPPUNIT_ASSERT_EQUAL(std::string("&#x1f ;"),
decode("&#x1f ;"));
CPPUNIT_ASSERT_EQUAL(std::string("&#quot ;"),
decode("&#quot ;"));
}
};
SYNCEVOLUTION_TEST_SUITE_REGISTRATION(WebDAVTest);