--- a/toolkit/components/places/PlacesSyncUtils.jsm
+++ b/toolkit/components/places/PlacesSyncUtils.jsm
@@ -572,16 +572,75 @@ const BookmarkSyncUtils = PlacesSyncUtil
// Drop stale tombstones.
yield db.executeCached("DELETE FROM moz_bookmarks_deleted");
});
}
);
}),
/**
+ * Deduping looks for local siblings with the same attributes (title, URL,
+ * separator index, smart bookmark anno), and removes them if there are
+ * matching remote siblings. Only items that haven't been uploaded yet are
+ * candidates for deduping; we'd corrupt the server if we tried to dedupe
+ * items that already exist on other clients.
+ *
+ * In most cases, deduping won't be necessary. Item GUIDs are preserved in
+ * backups, so syncing a device with restored bookmarks should trigger Sync's
+ * `update` logic. While it's possible to create explicit dupes with the same
+ * parent and attributes, most users don't organize their bookmarks this way,
+ * and starring moves existing bookmarks instead of creating dupes. The only
+ * time we expect deduping to be necessary is on the first sync, where other
+ * devices might have similar, manually-added bookmarks with different GUIDs.
+ *
+ * During the download phase, we apply all incoming items, including dupes.
+ * Before uploading, we scan the tree and collapse similar `NEW` and `UNKNOWN`
+ * items.
+ *
+ * How it works:
+ *
+ * 1. Walk all syncable roots to find folders with the same name at the same
+ * level. We group by the level instead of the parent because items in
+ * local subtrees are still congruent with remote subtrees, but won't have
+ * the same parents.
+ * 2. Group the dupe folders by level and title.
+ * 3. Each group may have a different number of local (`NEW` and `UNKNOWN`)
+ * dupes and remote (`NORMAL`) matches. For example, if we have two local
+ * folders (A, AA) and two remote folders (B, BB) with the same name and
+ * level, we'll merge A into B and AA into BB. If we have (A, AA) locally,
+ * and (B) remotely, we'll merge A into B, and leave AA as a dupe.
+ * Similarly, if we have (A) locally, and (B, BB) remotely, we'll merge A
+ * into BB and take BB as a dupe.
+ * 4. Sort the local and remote dupes by position, then zip the two
+ * lists into a list of (local, remote) tuples. Sorting helps us pick the
+ * correct remote match if there are explicit dupes.
+ * 5. Move the contents of each local folder into the matching remote folder.
+ * For local bookmarks that don't exist on the server, this will preserve
+ * their order relative to other local bookmarks, but *not* any matches
+ * that exist on the server. This is a similar issue to bug 1352233.
+ * 6. Delete the (now-empty) local folder. If the user added new items to the
+ * folder in the meantime, this will fail, and we'll keep the old folder
+ * around so that they can manually move the items.
+ * 7. At this point, we have a single subtree with duplicate non-folder
+ * descendants: bookmarks, smart bookmarks, livemarks, and separators.
+ * 8. Group all items with similar attributes and the same parent.
+ * 9. For each group, remove as many duplicate local items as we have remote
+ * matches.
+ *
+ * And that's it. If the wind in my sail on the sea stays behind me, one day
+ * I'll know how far I'll go to dedupe all these bookmarks.
+ */
+ async dedupe(remoteIndices) {
+ let db = await PlacesUtils.promiseDBConnection();
+ await dedupeFolders(db, remoteIndices);
+ await dedupeItems(db, remoteIndices);
+ return pullSyncChanges(db);
+ },
+
+ /**
* Updates a bookmark with synced properties. Only Sync should call this
* method; other callers should use `Bookmarks.update`.
*
* The following properties are supported:
* - kind: Optional.
* - guid: Required.
* - parentGuid: Optional; reparents the bookmark if specified.
* - title: Optional.
@@ -1859,8 +1918,278 @@ function markChangesAsSyncing(db, change
var removeTombstones = Task.async(function* (db, guids) {
if (!guids.length) {
return Promise.resolve();
}
return db.execute(`
DELETE FROM moz_bookmarks_deleted
WHERE guid IN (${guids.map(guid => JSON.stringify(guid)).join(",")})`);
});
+
+// Merges local duplicate folders into matching remote folders.
+async function dedupeFolders(db, remoteIndices) {
+ let rows = await findDupeFolders(db);
+
+ let guidsToRemove = [];
+ for (let row of rows) {
+ let dupeGroup = row.getResultByName("dupeGroup");
+ let matches = matchDupeFolders(dupeGroup, remoteIndices);
+ for (let { oldGuid, newGuid } of matches) {
+ let childGuids = await fetchChildGuids(db, oldGuid);
+ for (let childGuid of childGuids) {
+ await PlacesUtils.bookmarks.update({
+ guid: childGuid,
+ parentGuid: newGuid,
+ index: PlacesUtils.bookmarks.DEFAULT_INDEX,
+ source: PlacesUtils.bookmarks.SOURCES.DEFAULT,
+ });
+ }
+ guidsToRemove.push(oldGuid);
+ }
+ }
+
+ BookmarkSyncLog.debug(`dedupeFolders: Removing deduped folders`,
+ guidsToRemove);
+
+ for (let guid of guidsToRemove) {
+ try {
+ await PlacesUtils.bookmarks.remove(guid, {
+ preventRemovalOfNonEmptyFolders: true,
+ // TODO: Consider a different source. We shouldn't write
+ // tombstones in any case, since the folder we're deleting
+ // is `NEW`.
+ source: PlacesUtils.bookmarks.SOURCES.DEFAULT,
+ });
+ } catch (ex) {
+ BookmarkSyncLog.debug(`dedupeFolders: Error removing deduped folder ` +
+ guid, ex);
+ }
+ }
+}
+
+// Removes all local duplicate bookmarks, queries, livemarks, and separators
+// that have remote matches.
+async function dedupeItems(db, remoteIndices) {
+ let rows = await findDupeItems(db);
+ for (let row of rows) {
+ let dupeGroup = row.getResultByName("dupeGroup");
+ let oldGuids = matchDupeItemGuids(dupeGroup, remoteIndices);
+ for (let guid of oldGuids) {
+ try {
+ await PlacesUtils.bookmarks.remove(guid, {
+ preventRemovalOfNonEmptyFolders: true,
+ source: PlacesUtils.bookmarks.SOURCES.DEFAULT,
+ });
+ } catch (ex) {
+ BookmarkSyncLog.debug(`dedupeItems: Error removing deduped item ` +
+ guid, ex);
+ }
+ }
+ }
+}
+
+// Returns an array of rows containing folder dupe group keys. Instead of
+// selecting the duplicate rows and re-grouping in JS, we encode the dupe
+// info into a string "dupe key", and then abuse SQLite's `GROUP_CONCAT` to join
+// the dupe keys into a "dupe group key". The dupe key is `{level} |
+// {position} | {guid} | {syncStatus}`, and the dupe group key is multiple dupe
+// keys joined with a comma.
+async function findDupeFolders(db) {
+ return db.executeCached(`
+ WITH RECURSIVE
+ descendants(id, guid, syncStatus, level, position, title) AS (
+ SELECT b.id, b.guid, b.syncStatus, 0, b.position, b.title
+ FROM moz_bookmarks b
+ WHERE b.guid IN ('menu________', 'toolbar_____', 'unfiled_____',
+ 'mobile______')
+ UNION ALL
+ SELECT b.id, b.guid, b.syncStatus, d.level + 1, b.position, b.title
+ FROM moz_bookmarks b
+ JOIN descendants d ON d.id = b.parent
+ WHERE b.type = :type AND NOT EXISTS (
+ SELECT 1
+ FROM moz_items_annos a
+ JOIN moz_anno_attributes n ON n.id = a.anno_attribute_id
+ WHERE a.item_id = b.id AND
+ n.name = :livemarksAnno
+ )
+ )
+ SELECT GROUP_CONCAT(d.position || '|' ||
+ d.guid || '|' ||
+ d.syncStatus, ',') AS dupeGroup
+ FROM descendants d
+ GROUP BY d.level, d.title
+ HAVING COUNT(*) > 1
+ `, { type: PlacesUtils.bookmarks.TYPE_FOLDER,
+ livemarksAnno: PlacesUtils.LMANNO_FEEDURI });
+}
+
+// Returns an array of rows containing smart bookmark, bookmark, livemark, and
+// separator dupe group keys. See `findDupeFolders` for an explanation of how
+// dupe keys and dupe group keys work. The dupe key for these items is just
+// `{guid} | {syncStatus}`; unlike folders, we don't need the level or position.
+async function findDupeItems(db) {
+ return db.executeCached(`
+ WITH
+ smartBookmarks(id, parent, smartBookmarkName, position, guid,
+ syncStatus) AS (
+ SELECT b.id, b.parent, a.content, b.position, b.guid,
+ b.syncStatus
+ FROM moz_bookmarks b
+ JOIN moz_items_annos a ON a.item_id = b.id
+ JOIN moz_anno_attributes n ON n.id = a.anno_attribute_id
+ WHERE b.type = :bookmarkType AND
+ n.name = :smartBookmarksAnno
+ )
+
+ /* Smart bookmarks: parent, query name. */
+ SELECT GROUP_CONCAT(s.position || '|' ||
+ s.guid || '|' ||
+ s.syncStatus, ',') as dupeGroup
+ FROM smartBookmarks s
+ GROUP BY s.parent, s.smartBookmarkName
+ HAVING COUNT(*) > 1
+ UNION ALL
+
+ /* Bookmarks: parent, title, URL. */
+ SELECT GROUP_CONCAT(b.position || '|' ||
+ b.guid || '|' ||
+ b.syncStatus, ',') as dupeGroup
+ FROM moz_bookmarks b
+ JOIN moz_places h ON h.id = b.fk
+ LEFT JOIN smartBookmarks s ON s.id = b.id
+ WHERE b.type = :bookmarkType AND
+ s.id IS NULL
+ GROUP BY b.parent, b.title, h.url_hash, h.url
+ HAVING COUNT(*) > 1
+ UNION ALL
+
+ /* Livemarks: parent, title. TODO: Feed URL? */
+ SELECT GROUP_CONCAT(b.position || '|' ||
+ b.guid || '|' ||
+ b.syncStatus, ',') as dupeGroup
+ FROM moz_bookmarks b
+ JOIN moz_items_annos a ON a.item_id = b.id
+ JOIN moz_anno_attributes n ON n.id = a.anno_attribute_id
+ WHERE b.type = :folderType AND
+ n.name = :livemarksAnno
+ GROUP BY b.parent, b.title
+ HAVING COUNT(*) > 1
+ UNION ALL
+
+ /* Separators: parent. TODO: Only dupe immediate siblings? */
+ SELECT GROUP_CONCAT(b.position || '|' ||
+ b.guid || '|' ||
+ b.syncStatus, ',') as dupeGroup
+ FROM moz_bookmarks b
+ WHERE b.type = :separatorType
+ GROUP BY b.parent
+ HAVING COUNT(*) > 1
+ `, { bookmarkType: PlacesUtils.bookmarks.TYPE_BOOKMARK,
+ smartBookmarksAnno: BookmarkSyncUtils.SMART_BOOKMARKS_ANNO,
+ folderType: PlacesUtils.bookmarks.TYPE_FOLDER,
+ livemarksAnno: PlacesUtils.LMANNO_FEEDURI,
+ separatorType: PlacesUtils.bookmarks.TYPE_SEPARATOR });
+}
+
+function dupeGroupKeyToInfos(dupeGroup, remoteIndices) {
+ let infos = { local: [], remote: [] };
+
+ // Extract the position, GUID, and sync status of every dupe in the group,
+ // and determine whether the item is local or remote.
+ let dupes = dupeGroup.split(",");
+ for (let dupe of dupes) {
+ let [index, guid, syncStatus] = dupe.split("|");
+ let info = {
+ index: +index,
+ guid,
+ syncStatus: +syncStatus,
+ };
+ switch (info.syncStatus) {
+ // `NEW` and `UNKNOWN` items are candidates for deduping. `NEW` items
+ // aren't on the server yet, so we won't write tombstones or update their
+ // change counters if we have matching `NORMAL` bookmarks. `UNKNOWN`
+ // bookmarks may be on the server; we'll still dedupe them, but we'll need
+ // to bump their change counters (TODO: Actually do this).
+ case PlacesUtils.bookmarks.SYNC_STATUS.NEW:
+ case PlacesUtils.bookmarks.SYNC_STATUS.UNKNOWN:
+ infos.local.push(info);
+ break;
+
+ // `NORMAL` bookmarks are on the server, so they're possible matches for
+ // local dupes. We'll always match local to remote; never local to local
+ // or remote to remote.
+ case PlacesUtils.bookmarks.SYNC_STATUS.NORMAL:
+ infos.remote.push(info);
+ break;
+
+ default:
+ BookmarkSyncLog.debug(`dupeGroupKeyToInfos: Item ${info.guid} not ` +
+ `duped because its sync status is ` +
+ info.syncStatus);
+ }
+ }
+
+ // Sort the local dupes and remote matches by their indices. This makes it
+ // more likely that we'll correctly match up explicit dupes. We may dedupe
+ // incorrectly if the matches are in a different order, but there's not much
+ // we can do in that situation.
+ infos.local.sort((a, b) => b.index - a.index);
+ infos.remote.sort((a, b) => {
+ let aSyncId = BookmarkSyncUtils.syncIdToGuid(a.guid);
+ let bSyncId = BookmarkSyncUtils.syncIdToGuid(b.guid);
+ if (!remoteIndices.has(aSyncId) && !remoteIndices.has(bSyncId)) {
+ return 0;
+ }
+ if (!remoteIndices.has(aSyncId)) {
+ return 1;
+ }
+ if (!remoteIndices.has(bSyncId)) {
+ return -1;
+ }
+ return remoteIndices.get(b.guid) - remoteIndices.get(a.guid);
+ });
+
+ return infos;
+}
+
+// Returns an array of matching `{ oldGuid, newGuid }` tuples for the dupe
+// group. We'll need to move the contents of `oldGuid` into `newGuid`, then
+// remove `oldGuid`.
+function matchDupeFolders(dupeGroup, remoteIndices) {
+ let infos = dupeGroupKeyToInfos(dupeGroup, remoteIndices);
+ BookmarkSyncLog.debug(`matchDupeFolders: Found candidates for deduping`,
+ infos);
+
+ // Map local folders to their matching remote folders. We can run out of local
+ // dupes or remote matches if there are explicit dupes. In that case, we'll
+ // leave the remaining candidates alone.
+ let matches = [];
+ let localIndex = 0;
+ let remoteIndex = 0;
+ while (localIndex < infos.local.length &&
+ remoteIndex < infos.remote.length) {
+ let oldGuid = infos.local[localIndex].guid;
+ let newGuid = infos.remote[remoteIndex].guid;
+ matches.push({ oldGuid, newGuid });
+ localIndex++;
+ remoteIndex++;
+ }
+
+ BookmarkSyncLog.debug(`matchDupeFolders: Deduping folders`, matches);
+ return matches;
+}
+
+// Returns an array of local non-folder dupe GUIDs that we should remove.
+function matchDupeItemGuids(dupeGroup, remoteIndices) {
+ let infos = dupeGroupKeyToInfos(dupeGroup, remoteIndices);
+ BookmarkSyncLog.debug(`matchDupeItemGuids: Found candidates for deduping`,
+ infos);
+
+ // The number of local dupes to remove. We can run out of matches if there
+ // are explicit dupes; in that case, we'll leave the remaining local dupes
+ // alone.
+ let maxLocalItemsToDedupe = Math.min(infos.local.length, infos.remote.length);
+ let guidsToRemove = infos.local.slice(0, maxLocalItemsToDedupe);
+
+ BookmarkSyncLog.debug(`matchDupeItemGuids: Deduping items`, guidsToRemove);
+ return guidsToRemove;
+}