sync sources: added support for avoiding listAllItems()

If a sync source can quickly determine that nothing has changed,
then SyncSourceRevisions::detectChanges() can use a shortcut and
simply copy the list of known uids => CHANGES_NONE mode.

Such a change detection will be possible in the WebDAV backends (using
the Calendar Collection Entity Tag (CTag) as "database revision"
string) and perhaps in the future also in Evolution Data Server (after
adding a new API).

This commit also adds a CHANGES_SLOW mode. This is meant as hint that
detecting changes is not necessary. Right now, this mode is the same
as CHANGES_FULL because the code changes should be minimized at this
time (in preparation for 1.2). More work will probably be needed
to distinguish between unit testing and real real slow syncs.

Finally, a way to pass information about the cached item list is added
with the SyncSourceRevision::setAllItems() call. setAllItems() and
listAllItems() are mutually exclusive: either the backend delivers
the information, or it receives it. The CalDAV backend depends on this
because it needs to maintain a cache with information about all items.
This commit is contained in:
Patrick Ohly 2011-06-22 11:53:53 +02:00
parent f0d45ac3a4
commit 415e33bada
6 changed files with 221 additions and 14 deletions

View File

@ -183,6 +183,22 @@ void MapSyncSource::listAllItems(SyncSourceRevisions::RevisionMap_t &revisions)
}
}
void MapSyncSource::setAllItems(const SyncSourceRevisions::RevisionMap_t &revisions)
{
SubSyncSource::SubRevisionMap_t subrevisions;
BOOST_FOREACH(const SyncSourceRevisions::RevisionMap_t::value_type &entry,
revisions) {
const std::string &luid = entry.first;
const std::string &rev = entry.second;
StringPair ids = splitLUID(luid);
pair <string, set<string> > &subitems = subrevisions[ids.first];
subitems.first = rev;
subitems.second.insert(ids.second);
}
m_sub->setAllSubItems(subrevisions);
}
SyncSourceRaw::InsertItemResult MapSyncSource::insertItem(const std::string &luid, const std::string &item, bool raw)
{
StringPair ids = splitLUID(luid);

View File

@ -35,8 +35,8 @@ class MapSyncSource;
* be wrapped by MapSyncSource.
*
* The original interface will only be used in "raw" mode, which
* should bypass any kind of cache used by the implementation and are
* guaranteed to be passed merged items.
* should bypass any kind of cache used by the implementation.
* They are guaranteed to be passed merged items.
*
* The new methods with uid and subid are using during a sync
* and should use the cache. They work on single items but modify
@ -100,7 +100,31 @@ class SubSyncSource : virtual public SyncSourceBase
/** called after a sync */
virtual void endSubSync(bool success) = 0;
/**
* A unique identifier for the current state of the complete database.
* The semantic is the following:
* - empty string implies "state unknown" or "identifier not supported"
* - id not empty and ID1 == ID2 implies "nothing has changed";
* the inverse is not true (ids may be different although nothing has changed)
*
* Matches TrackingSyncSource::databaseRevision().
*/
virtual std::string subDatabaseRevision() { return ""; }
/**
* Either listAllSubItems() or setAllSubItems() will be called after begin().
* In the first case, the sub source is expected to provide a full list
* of its items. In the second case, the caller was able to determine
* that its cached copy of that list is still correct and provides it
* the the source.
*/
virtual void listAllSubItems(SubRevisionMap_t &revisions) = 0;
/**
* Called instead of listAllSubItems().
*/
virtual void setAllSubItems(const SubRevisionMap_t &revisions) = 0;
virtual SubItemResult insertSubItem(const std::string &uid, const std::string &subid,
const std::string &item) = 0;
virtual void readSubItem(const std::string &uid, const std::string &subid, std::string &item) = 0;
@ -187,6 +211,8 @@ class MapSyncSource : public TrackingSyncSource,
virtual std::string endSync(bool success) { m_sub->endSubSync(success); return TrackingSyncSource::endSync(success); }
virtual bool isEmpty() { return dynamic_cast<SyncSource &>(*m_sub).getOperations().m_isEmpty(); }
virtual void listAllItems(SyncSourceRevisions::RevisionMap_t &revisions);
virtual void setAllItems(const SyncSourceRevisions::RevisionMap_t &revisions);
virtual std::string databaseRevision() { return m_sub->subDatabaseRevision(); }
virtual InsertItemResult insertItem(const std::string &luid, const std::string &item, bool raw);
virtual void readItem(const std::string &luid, std::string &item, bool raw);
virtual void removeItem(const string &luid);

View File

@ -918,8 +918,24 @@ void SyncSourceRevisions::restoreData(const SyncSource::Operations::ConstBackupI
}
}
void SyncSourceRevisions::detectChanges(ConfigNode &trackingNode)
void SyncSourceRevisions::detectChanges(ConfigNode &trackingNode, ChangeMode mode)
{
if (mode == CHANGES_NONE) {
// shortcut because nothing changed: just copy our known item list
ConfigProps props;
trackingNode.readProperties(props);
RevisionMap_t revisions;
BOOST_FOREACH(const StringPair &mapping, props) {
const string &uid = mapping.first;
const string &revision = mapping.second;
addItem(uid);
revisions[uid] = revision;
}
setAllItems(revisions);
return;
}
initRevisions();
// Delay setProperty calls until after checking all uids.
@ -936,6 +952,8 @@ void SyncSourceRevisions::detectChanges(ConfigNode &trackingNode)
// always remember the item, need full list
addItem(uid);
// TODO: avoid unnecessary work in CHANGES_SLOW mode
// Not done yet to avoid introducing bugs.
string serverRevision(trackingNode.readProperty(uid));
if (!serverRevision.size()) {
addItem(uid, NEW);

View File

@ -1666,8 +1666,8 @@ class SyncSourceRevisions : virtual public SyncSourceChanges, virtual public Syn
typedef map<string, string> RevisionMap_t;
/**
* fills the complete mapping from UID to revision string of all
* currently existing items
* Fills the complete mapping from UID to revision string of all
* currently existing items.
*
* Usually both UID and revision string must be non-empty. The
* only exception is a refresh-from-client: in that case the
@ -1678,11 +1678,66 @@ class SyncSourceRevisions : virtual public SyncSourceChanges, virtual public Syn
* a non-empty string is necessary and none was provided.
*
* This call is typically only invoked only once during the
* lifetime of a source. The result returned in that invocation is
* lifetime of a source, at the time when detectChanges() needs
* the information. The result returned in that invocation is
* used throught the session.
*
* When detectChanges() is called with CHANGES_NONE, listAllItems()
* is avoided. Instead the cached information is used. Sources
* may need to know that information, so in this case setAllItems()
* is called as part of detectChanges().
*/
virtual void listAllItems(RevisionMap_t &revisions) = 0;
/**
* Called by SyncSourceRevisions::detectChanges() to tell
* the derived class about the cached information if (and only
* if) listAllItems() was not called. The derived class
* might not need this information, so the default implementation
* simply ignores.
*
* A more complex API could have been defined to only prepare the
* information when needed, but that seemed unnecessarily complex.
*/
virtual void setAllItems(const RevisionMap_t &revisions) {}
/**
* Tells detectChanges() how to do its job.
*/
enum ChangeMode {
/**
* Call listAllItems() and use the list of previous items
* to calculate changes.
*/
CHANGES_FULL,
/**
* Don't rely on previous information. Will call
* listAllItems() and generate a full list of items based on
* the result.
*
* TODO: Added/updated/deleted information is still getting
* calculated based on the previous items although it is not
* needed. In other words, CHANGES_SLOW == CHANGES_FULL at the
* moment. Once we are sure that slow sync detection works,
* calculating changes in this mode can be removed.
*/
CHANGES_SLOW,
/**
* Caller has already determined that a) no items have changed
* and that b) the list of previous items is valid. For example,
* some backends have a way of getting a revision string for
* the whole database and can compare that against the value
* from the end of the previous sync.
*
* In this mode, listAllItems() doesn't have to be called.
* A list of all items will be created, with no items marked
* as added/updated/deleted.
*/
CHANGES_NONE
};
/**
* calculate changes, call when sync source is ready for
* listAllItems() and before changes are needed
@ -1692,8 +1747,12 @@ class SyncSourceRevisions : virtual public SyncSourceChanges, virtual public Syn
* the caller.
*
* @param trackingNode a config node for exclusive use by this class
* @param mode determines how changes are detected; if unsure,
* use CHANGES_FULL, which will always produce
* the required information, albeit more slowly
* than the other modes
*/
void detectChanges(ConfigNode &trackingNode);
void detectChanges(ConfigNode &trackingNode, ChangeMode mode);
/**
* record that an item was added or updated

View File

@ -33,10 +33,11 @@ TrackingSyncSource::TrackingSyncSource(const SyncSourceParams &params,
TestingSyncSource(params),
m_trackingNode(trackingNode)
{
boost::shared_ptr<ConfigNode> safeNode(new SafeConfigNode(params.m_nodes.getTrackingNode()));
if (!m_trackingNode) {
m_trackingNode.reset(new PrefixConfigNode("item-",
boost::shared_ptr<ConfigNode>(new SafeConfigNode(params.m_nodes.getTrackingNode()))));
m_trackingNode.reset(new PrefixConfigNode("item-", safeNode));
}
m_metaNode = safeNode;
m_operations.m_checkStatus = boost::bind(&TrackingSyncSource::checkStatus, this, _1);
m_operations.m_isEmpty = boost::bind(&TrackingSyncSource::isEmpty, this);
SyncSourceRevisions::init(this, this, granularitySeconds, m_operations);
@ -44,7 +45,28 @@ TrackingSyncSource::TrackingSyncSource(const SyncSourceParams &params,
void TrackingSyncSource::checkStatus(SyncSourceReport &changes)
{
detectChanges(*m_trackingNode);
// use the most reliable (and most expensive) method by default
ChangeMode mode = CHANGES_FULL;
// assume that we do a regular sync, with reusing stored information
// if possible
string oldRevision = m_metaNode->readProperty("databaseRevision");
if (!oldRevision.empty()) {
string newRevision = databaseRevision();
SE_LOG_DEBUG(this, NULL, "old database revision '%s', new revision '%s'",
oldRevision.c_str(),
newRevision.c_str());
if (newRevision == oldRevision) {
SE_LOG_DEBUG(this, NULL, "revisions match, no item changes");
mode = CHANGES_NONE;
}
}
if (mode == CHANGES_FULL) {
SE_LOG_DEBUG(this, NULL, "using full item scan to detect changes");
}
detectChanges(*m_trackingNode, mode);
// copy our item counts into the report
changes.setItemStat(ITEM_LOCAL, ITEM_ADDED, ITEM_TOTAL, getNewItems().size());
changes.setItemStat(ITEM_LOCAL, ITEM_UPDATED, ITEM_TOTAL, getUpdatedItems().size());
@ -54,7 +76,45 @@ void TrackingSyncSource::checkStatus(SyncSourceReport &changes)
void TrackingSyncSource::beginSync(const std::string &lastToken, const std::string &resumeToken)
{
detectChanges(*m_trackingNode);
// use the most reliable (and most expensive) method by default
ChangeMode mode = CHANGES_FULL;
// resume token overrides the normal token; safe to ignore in most
// cases and this detectChanges() is done independently of the
// token, but let's do it right here anyway
string token;
if (!resumeToken.empty()) {
token = resumeToken;
} else {
token = lastToken;
}
// slow sync if token is empty
if (token.empty()) {
SE_LOG_DEBUG(this, NULL, "slow sync or testing, do full item scan to detect changes");
mode = CHANGES_SLOW;
} else {
string oldRevision = m_metaNode->readProperty("databaseRevision");
if (!oldRevision.empty()) {
string newRevision = databaseRevision();
SE_LOG_DEBUG(this, NULL, "old database revision '%s', new revision '%s'",
oldRevision.c_str(),
newRevision.c_str());
if (newRevision == oldRevision) {
SE_LOG_DEBUG(this, NULL, "revisions match, no item changes");
mode = CHANGES_NONE;
}
// Reset old revision. If anything goes wrong, then we
// don't want to rely on a possibly incorrect optimization.
m_metaNode->setProperty("databaseRevision", "");
m_metaNode->flush();
}
}
if (mode == CHANGES_FULL) {
SE_LOG_DEBUG(this, NULL, "using full item scan to detect changes");
}
detectChanges(*m_trackingNode, mode);
}
std::string TrackingSyncSource::endSync(bool success)
@ -63,15 +123,22 @@ std::string TrackingSyncSource::endSync(bool success)
flush();
if (success) {
string updatedRevision = databaseRevision();
m_metaNode->setProperty("databaseRevision", updatedRevision);
// flush both nodes, just in case; in practice, the properties
// end up in the same file and only get flushed once
m_trackingNode->flush();
m_metaNode->flush();
} else {
// The Synthesis docs say that we should rollback in case of
// failure. Cannot do that for data, so lets at least keep
// the revision map unchanged.
}
// no token handling at the moment (not needed for clients)
return "";
// no token handling at the moment (not needed for clients):
// return a non-empty token to distinguish an incremental
// sync from a slow sync in beginSync()
return "1";
}
TrackingSyncSource::InsertItemResult TrackingSyncSource::insertItem(const std::string &luid, const std::string &item)

View File

@ -80,7 +80,9 @@ class TrackingSyncSource : public TestingSyncSource,
* @param trackingNode a ConfigNode instance which will be used to store
* luid/revision string pairs; if not set, TrackingSyncSource
* will create its own node with the tracking node
* in params as storage
* in params as storage; used by MapSyncSource to
* add its own semantic (revision string shared between
* items with the same uid)
*/
TrackingSyncSource(const SyncSourceParams &params,
int granularitySeconds = 1,
@ -125,6 +127,15 @@ class TrackingSyncSource : public TestingSyncSource,
*/
virtual bool isEmpty() = 0;
/**
* A unique identifier for the current state of the complete database.
* The semantic is the following:
* - empty string implies "state unknown" or "identifier not supported" (the default implementation)
* - id not empty and id_1 == id_2 implies "nothing has changed";
* the inverse is not true (ids may be different although nothing has changed)
*/
virtual std::string databaseRevision() { return ""; }
/**
* fills the complete mapping from LUID to revision string of all
* currently existing items
@ -218,6 +229,16 @@ class TrackingSyncSource : public TestingSyncSource,
void checkStatus(SyncSourceReport &changes);
boost::shared_ptr<ConfigNode> m_trackingNode;
/**
* Stores meta information besides the item list:
* - "databaseRevision" = result of databaseRevision() at end of last sync
*
* Shares the same key/value store as m_trackingNode,
* which uses the "item-" prefix in its keys to
* avoid name clashes.
*/
boost::shared_ptr<ConfigNode> m_metaNode;
protected:
/* implementations of SyncSource callbacks */
virtual void beginSync(const std::string &lastToken, const std::string &resumeToken);