PIM: relax phone number matching

Previously, the current default country was used to turn phone numbers
without an explicit country code into full E164 numbers, which then
had to match the search term when doing a caller ID lookup.

This was inconsistent with EDS, where a weaker
EQUALS_NATIONAL_PHONE_NUMBER was done. The difference is that a
comparison between a number with country code matches one without if
the national number of the same, regardless of the current default
country. This is better because it reduces the influence of the hard
to guess default country on matching.

SyncEvolution also differed from EDS by doing a prefix comparison,
which in theory might have also ignored differences caused by
extensions. It is uncertain whether that was useful, so for the sake
of consistency, a full number comparison of the national number is now
done.

Another advantage of this change is the lower memory consumption and
faster comparison, because strings are now stored in 4 + 8 byte
numbers instead of strings of varying length.
This commit is contained in:
Patrick Ohly 2013-09-30 13:01:38 +02:00
parent c472fd0af6
commit ed1f1f4aa2
4 changed files with 132 additions and 56 deletions

View File

@ -29,6 +29,7 @@
#include <phonenumbers/phonenumberutil.h>
#include <phonenumbers/logger.h>
#include <boost/locale.hpp>
#include <boost/lexical_cast.hpp>
#include <unicode/unistr.h>
#include <unicode/translit.h>
@ -775,10 +776,16 @@ public:
/**
* Search value must be a valid caller ID. The telephone numbers
* in the contacts may or may not be valid; only valid ones
* will match. The user is expected to clean up that data to get
* exact matches for the others.
* Search value must be a valid caller ID (with or without a country
* code). The telephone numbers in the contacts may or may not be
* valid; only valid ones will match. The user is expected to clean up
* that data to get exact matches for the others.
*
* The matching uses the same semantic as EQUALS_NATIONAL_PHONE_NUMBER:
* - If both numbers have an explicit country code, that code must be
* the same for a match.
* - If one or both numbers have no country code, matching the national
* part is enough for a match.
*/
class PhoneStartsWith : public IndividualFilter
{
@ -811,19 +818,19 @@ public:
break;
}
// Search based on full internal format, without formatting.
// For example: +41446681800
//
// A prefix match is good enough. That way a caller ID
// with suppressed extension still matches a contact with
// extension.
m_phoneNumberUtil.Format(number, i18n::phonenumbers::PhoneNumberUtil::E164, &m_tel);
m_number.m_countryCode = number.country_code();
m_number.m_nationalNumber = number.national_number();
}
virtual bool matches(const IndividualData &data) const
{
BOOST_FOREACH(const std::string &tel, data.m_precomputed.m_phoneNumbers) {
if (boost::starts_with(tel, m_tel)) {
BOOST_FOREACH(const SimpleE164 &number, data.m_precomputed.m_phoneNumbers) {
// National part must always match, country code only if
// set explicitly in both (NSN_MATCH in libphonenumber,
// EQUALS_NATIONAL_PHONE_NUMBER in EDS).
if (number.m_nationalNumber == m_number.m_nationalNumber &&
(!number.m_countryCode || !m_number.m_countryCode ||
number.m_countryCode == m_number.m_countryCode)) {
return true;
}
}
@ -832,12 +839,13 @@ public:
virtual std::string getEBookFilter() const
{
size_t len = std::min((size_t)4, m_tel.size());
std::string tel = m_number.toString();
size_t len = std::min((size_t)4, tel.size());
EBookQueryCXX query(m_simpleEDSSearch ?
// A suffix match with a limited number of digits is most
// likely to find the right contacts.
e_book_query_field_test(E_CONTACT_TEL, E_BOOK_QUERY_ENDS_WITH,
m_tel.substr(m_tel.size() - len, len).c_str()) :
tel.substr(tel.size() - len, len).c_str()) :
// We use EQUALS_NATIONAL_PHONE_NUMBER
// instead of EQUALS_PHONE_NUMBER here,
// because it will also match contacts
@ -851,19 +859,19 @@ public:
// check that and not return a false match
// if the country code is different.
//
// At the moment, we pass the E164
// formatted search term with a country
// code here. The country code is the
// current default one. We could think
// about passing the original search term
// instead, to allow matches where contact
// and search term have no country code,
// but it is uncertain whether EDS
// currently works that way. It looks like
// it always adds the default country code
// to the search term.
// We try to pass the E164 string here. If
// the search term had no country code,
// that's a bit difficult because we can't
// just add the default country code.
// That would break the
// NATIONAL_PHONE_NUMBER semantic because
// EDS wouldn't know that the search term
// had no country code. We resort to the
// format of SimpleE164.toString(), which
// is passing the national number
// formatted as string.
e_book_query_field_test(E_CONTACT_TEL, E_BOOK_QUERY_EQUALS_NATIONAL_PHONE_NUMBER,
m_tel.c_str()),
tel.c_str()),
TRANSFER_REF);
PlainGStr filter(e_book_query_to_string(query.get()));
return filter.get();
@ -873,7 +881,7 @@ private:
const i18n::phonenumbers::PhoneNumberUtil &m_phoneNumberUtil;
bool m_simpleEDSSearch;
std::string m_country;
std::string m_tel;
SimpleE164 m_number;
};
class PhoneNumberLogger : public i18n::phonenumbers::Logger
@ -1116,7 +1124,8 @@ public:
reinterpret_cast<const gchar *>(folks_abstract_field_details_get_value(phone));
if (value) {
if (m_edsSupportsPhoneNumbers) {
// TODO: check X-EVOLUTION-E164 (made lowercase by folks!).
// Check X-EVOLUTION-E164 (made lowercase by folks!).
//
// It has the format <local number>,<country code>,
// where <country code> happens to be in quotation marks.
// This ends up being split into individual values which
@ -1133,21 +1142,32 @@ public:
components.reserve(2);
BOOST_FOREACH (const gchar *component, GeeStringCollection(coll)) {
// Empty component represents an unset
// country code. Replace with the current
// country code to form the full number.
// Note that it is not certain whether we
// get to see the empty component. At the
// moment (EDS 3.7, folks 0.9.1), someone
// swallows it.
components.push_back(component[0] ? component : m_defaultCountryCode);
// country code. Note that it is not
// certain whether we get to see the empty
// component. At the moment (EDS 3.7,
// folks 0.9.1), someone swallows it.
components.push_back(component);
}
// Only one component? We must still miss the country code.
if (components.size() == 1) {
components.push_back(m_defaultCountryCode);
if (!components.empty()) {
// Only one component? We must still miss the country code.
if (components.size() == 1) {
components.push_back("");
}
std::sort(components.begin(), components.end());
try {
SimpleE164 number;
number.m_countryCode = components[0].empty() ?
0 :
boost::lexical_cast<SimpleE164::CountryCode_t>(components[0]);
number.m_nationalNumber = components[1].empty() ?
0 :
boost::lexical_cast<SimpleE164::NationalNumber_t>(components[1]);
precomputed.m_phoneNumbers.push_back(number);
} catch (const boost::bad_lexical_cast &ex) {
SE_LOG_WARNING(NULL, "ignoring malformed X-EVOLUTION-E164 (sorted): %s",
boost::join(components, ", ").c_str());
}
}
std::sort(components.begin(), components.end());
std::string normal = boost::join(components, "");
precomputed.m_phoneNumbers.push_back(normal);
}
// Either EDS had a normalized value or there is none because
// the value is not a phone number. No need to try parsing again.
@ -1158,9 +1178,10 @@ public:
i18n::phonenumbers::PhoneNumberUtil::ErrorType error =
m_phoneNumberUtil.Parse(value, m_country, &number);
if (error == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
std::string tel;
m_phoneNumberUtil.Format(number, i18n::phonenumbers::PhoneNumberUtil::E164, &tel);
precomputed.m_phoneNumbers.push_back(tel);
SimpleE164 e164;
e164.m_countryCode = number.country_code();
e164.m_nationalNumber = number.national_number();
precomputed.m_phoneNumbers.push_back(e164);
}
}
}

View File

@ -29,6 +29,18 @@
SE_BEGIN_CXX
std::string SimpleE164::toString() const
{
std::ostringstream out;
if (m_countryCode) {
out << "+" << m_countryCode;
}
if (m_nationalNumber) {
out << m_nationalNumber;
}
return out.str();
}
class Filter2StringVisitor : public boost::static_visitor<void>
{
std::ostringstream m_out;

View File

@ -39,6 +39,39 @@ SE_BEGIN_CXX
class IndividualCompare;
class IndividualFilter;
/**
* Normalized phone numbers (E164). A subset of the full
* i18n::phonenumbers::PhoneNumber data.
*/
class SimpleE164
{
public:
SimpleE164() : m_countryCode(0), m_nationalNumber(0) {}
typedef int32_t CountryCode_t;
typedef uint64_t NationalNumber_t;
/**
* Country code (for example, 49 for the +49 prefix of Germany),
* 0 if unknown/unset.
*/
CountryCode_t m_countryCode;
/**
* Phone number after the country code as it would appear in E.164
* after the +<country> prefix, again given as binary value,
* 0 if unknown/unset.
*/
NationalNumber_t m_nationalNumber;
/**
* +<country><national> if country code and national number are set,
* +<country> if only country code is set,
* <national> if only national number is set,
* empty string if both are unset.
*/
std::string toString() const;
};
/**
* Factory for everything related to the current locale: sorting and
@ -114,11 +147,7 @@ class LocaleFactory
*/
struct Precomputed
{
/**
* Normalized phone numbers (E164). Contains only + and digits.
* TODO (?): store in more compact format.
*/
std::vector<std::string> m_phoneNumbers;
std::vector<SimpleE164> m_phoneNumbers;
};
/**

View File

@ -2397,26 +2397,40 @@ END:VCARD''']):
# Find Abraham via caller ID for 089/7888-99 (country is Germany).
view = ContactsView(self.manager)
view.search([['phone', '+49897888']])
self.runUntil('"+49897888" search results',
view.search([['phone', '+4989788899']])
self.runUntil('"+4989788899" search results',
check=lambda: self.assertEqual([], view.errors),
until=lambda: view.quiescentCount > 0)
self.assertEqual(1, len(view.contacts))
view.read(0, 1)
self.runUntil('+49897888 data',
self.runUntil('+4989788899 data',
check=lambda: self.assertEqual([], view.errors),
until=lambda: view.haveData(0))
self.assertEqual(u'Abraham', view.contacts[0]['structured-name']['given'])
# Find Abraham via caller ID for +44 89 7888-99 (Abraham has no country code
# set and matches, whereas Benjamin has +1 as country code and does not match).
view = ContactsView(self.manager)
view.search([['phone', '+4489788899']])
self.runUntil('"+4489788899" search results',
check=lambda: self.assertEqual([], view.errors),
until=lambda: view.quiescentCount > 0)
self.assertEqual(1, len(view.contacts))
view.read(0, 1)
self.runUntil('+4489788899 data',
check=lambda: self.assertEqual([], view.errors),
until=lambda: view.haveData(0))
self.assertEqual(u'Abraham', view.contacts[0]['structured-name']['given'])
# Find Abraham via 089/7888-99 (not a full caller ID, but at least a valid phone number).
view = ContactsView(self.manager)
view.search([['phone', '0897888']])
self.runUntil('"0897888" search results',
view.search([['phone', '089788899']])
self.runUntil('"089788899" search results',
check=lambda: self.assertEqual([], view.errors),
until=lambda: view.quiescentCount > 0)
self.assertEqual(1, len(view.contacts))
view.read(0, 1)
self.runUntil('0897888 data',
self.runUntil('089788899 data',
check=lambda: self.assertEqual([], view.errors),
until=lambda: view.haveData(0))
self.assertEqual(u'Abraham', view.contacts[0]['structured-name']['given'])