PIM: Pinyin sorting for zh languages (part of FDO #64173)

Full interleaving of Pinyin transliterations of Chinese names with
Western names can be done by doing an explicit Pinyin transliteration
as part of computing the sort keys.

This is done using ICU's Transliteration("Han-Latin"), which we have
to call directly because boost::locale does not expose that API.

We hard-code this behavior for all "zh" languages (as identified by
boost::locale), because by default, ICU would sort Pinyin separately
from Western names when using the "pinyin" collation.
This commit is contained in:
Patrick Ohly 2013-05-14 17:37:44 +02:00
parent 542236bb5e
commit bab3906e12
3 changed files with 47 additions and 4 deletions

View File

@ -555,6 +555,8 @@ if test $enable_dbus_service = "yes"; then
# to go to the start of the link line).
DBUS_PIM_PLUGIN_LIBS='$(BOOST_LDFLAGS) $(BOOST_LOCALE_LIB)'
DBUS_PIM_PLUGIN_LDFLAGS=
# We need to call ICU directly for the Han->Latin transformation.
PKG_CHECK_MODULES(ICU, [icu-uc])
;;
esac
AC_SUBST(DBUS_PIM_PLUGIN_CFLAGS)

View File

@ -31,6 +31,10 @@
#include <boost/locale.hpp>
#include <boost/lexical_cast.hpp>
#include <unicode/unistr.h>
#include <unicode/translit.h>
#include <unicode/bytestream.h>
SE_GLIB_TYPE(EBookQuery, e_book_query)
SE_BEGIN_CXX
@ -51,9 +55,10 @@ SE_BEGIN_CXX
*/
static const boost::locale::collator_base::level_type DEFAULT_COLLATION_LEVEL = boost::locale::collator_base::secondary;
class CompareBoost : public IndividualCompare {
class CompareBoost : public IndividualCompare, private boost::noncopyable {
std::locale m_locale;
const boost::locale::collator<char> &m_collator;
std::auto_ptr<icu::Transliterator> m_trans;
public:
CompareBoost(const std::locale &locale);
@ -66,6 +71,31 @@ CompareBoost::CompareBoost(const std::locale &locale) :
m_locale(locale),
m_collator(std::use_facet< boost::locale::collator<char> >(m_locale))
{
std::string language = std::use_facet<boost::locale::info>(m_locale).language();
if (language == "zh") {
// Hard-code Pinyin sorting for all Chinese countries.
//
// There are three different ways of sorting Chinese and Western names:
// 1. Sort Chinese characters in pinyin order, but separate from Latin
// 2. Sort them interleaved with Latin, by the first character.
// 3. Sort them fully interleaved with Latin.
// Source: Mark Davis, ICU, http://sourceforge.net/mailarchive/forum.php?thread_name=CAJ2xs_GEnN-u3%3D%2B7P5puaF1%2BU__fX-4tuA-kEybThN9xsw577Q%40mail.gmail.com&forum_name=icu-support
//
// Either 2 or 3 is what apparently more people expect. Implementing 2 is
// harder, whereas 3 fits into the "generate keys, compare keys" concept
// of IndividualCompare, so we kind of arbitrarily implement that.
SE_LOG_DEBUG(NULL, "enabling Pinyin");
UErrorCode status = U_ZERO_ERROR;
icu::Transliterator *trans = icu::Transliterator::createInstance("Han-Latin", UTRANS_FORWARD, status);
m_trans.reset(trans);
if (U_FAILURE(status)) {
SE_LOG_WARNING(NULL, "creating ICU Han-Latin Transliterator for Pinyin failed, error code %s; falling back to normal collation", u_errorName(status));
m_trans.reset();
} else if (!trans) {
SE_LOG_WARNING(NULL, "creating ICU Han-Latin Transliterator for Pinyin failed, no error code; falling back to normal collation");
}
}
}
std::string CompareBoost::transform(const char *string) const
@ -78,7 +108,18 @@ std::string CompareBoost::transform(const char *string) const
std::string CompareBoost::transform(const std::string &string) const
{
return m_collator.transform(DEFAULT_COLLATION_LEVEL, string);
if (m_trans.get()) {
// std::string result;
// m_trans->transliterate(icu::StringPiece(string), icu::StringByteSink<std::string>(&result));
icu::UnicodeString buffer(string.c_str());
m_trans->transliterate(buffer);
std::string result;
buffer.toUTF8String(result);
result = m_collator.transform(DEFAULT_COLLATION_LEVEL, result);
return result;
} else {
return m_collator.transform(DEFAULT_COLLATION_LEVEL, string);
}
}
class CompareFirstLastBoost : public CompareBoost {

View File

@ -57,9 +57,9 @@ nodist_src_dbus_server_libsyncevodbusserver_la_SOURCES =
dist_pkgdata_DATA += src/dbus/server/bluetooth_products.ini
src_dbus_server_libsyncevodbusserver_la_LDFLAGS =
src_dbus_server_libsyncevodbusserver_la_LIBADD = $(LIBNOTIFY_LIBS) $(MLITE_LIBS) $(DBUS_LIBS) $(PCRECPP_LIBS)
src_dbus_server_libsyncevodbusserver_la_LIBADD = $(LIBNOTIFY_LIBS) $(MLITE_LIBS) $(DBUS_LIBS) $(PCRECPP_LIBS) $(ICU_LIBS)
src_dbus_server_libsyncevodbusserver_la_CPPFLAGS = -DHAVE_CONFIG_H -DSYNCEVOLUTION_LOCALEDIR=\"${SYNCEVOLUTION_LOCALEDIR}\" -I$(top_srcdir)/src -I$(top_srcdir)/test -I$(top_srcdir) -I$(gdbus_dir) $(BACKEND_CPPFLAGS)
src_dbus_server_libsyncevodbusserver_la_CXXFLAGS = $(SYNCEVOLUTION_CXXFLAGS) $(CORE_CXXFLAGS) $(SYNTHESIS_CFLAGS) $(GLIB_CFLAGS) $(DBUS_CFLAGS) $(LIBNOTIFY_CFLAGS) $(MLITE_CFLAGS) $(SYNCEVO_WFLAGS)
src_dbus_server_libsyncevodbusserver_la_CXXFLAGS = $(SYNCEVOLUTION_CXXFLAGS) $(CORE_CXXFLAGS) $(SYNTHESIS_CFLAGS) $(GLIB_CFLAGS) $(DBUS_CFLAGS) $(LIBNOTIFY_CFLAGS) $(MLITE_CFLAGS) $(SYNCEVO_WFLAGS) $(ICU_CFLAGS)
if COND_DBUS_PIM
src_dbus_server_server_cpp_files += \