Bug 1317223 (part 3) - A bookmark repair requestor. r=markh,tcsc draft
authorMark Hammond <mhammond@skippinet.com.au>
Mon, 20 Feb 2017 12:36:08 +1100
changeset 486825 92ccfc8be356153b78675e1ac156ce09f00f038d
parent 486824 2c5a78995d0dc65cad8afb82b43c89c3f079ccc6
child 486826 b3ffee0fc9f0f3fd64dfca68cb550a0732f35cf7
push id46067
push usermhammond@skippinet.com.au
push dateMon, 20 Feb 2017 02:02:46 +0000
reviewersmarkh, tcsc
bugs1317223
milestone54.0a1
Bug 1317223 (part 3) - A bookmark repair requestor. r=markh,tcsc This is where the fun actually starts :) The bookmark repair requestor takes the validation results, and if those results include missing children records (when the parent exists but a child doesn't) and orphans (when the child exists but the parent doesn't) it kicks off a repair request by sending a "repairRequest" command to other clients in a controlled manner (effectivly a round-robin of all suitable other devices, waiting for a period of time before giving up on that client and moving on to another) until either all requested IDs have been found or no other clients are suitable. The main TODOs in this patch are: * See if there are other obvious validation results which mean a simple "record is missing from the server". * Decide if we really do need to attempt each client twice given that part 1 changes the engine implementation such that the repairRequest command is not actually removed from the client record until a repairResponse has been written. (See XXX comment in _continueRepairs) * Possibly more tests of edge and error cases. MozReview-Commit-ID: 7rRNbBx8Vo3
services/sync/modules/bookmark_repair.js
services/sync/modules/collection_repair.js
services/sync/moz.build
services/sync/tests/unit/test_bookmark_repair_requestor.js
services/sync/tests/unit/xpcshell.ini
tools/lint/eslint/modules.json
new file mode 100644
--- /dev/null
+++ b/services/sync/modules/bookmark_repair.js
@@ -0,0 +1,432 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+const Cu = Components.utils;
+
+this.EXPORTED_SYMBOLS = ["BookmarkRepairRequestor"];
+
+Cu.import("resource://gre/modules/Services.jsm");
+Cu.import("resource://gre/modules/Preferences.jsm");
+Cu.import("resource://gre/modules/Log.jsm");
+Cu.import("resource://services-sync/collection_repair.js");
+Cu.import("resource://services-sync/constants.js");
+
+const log = Log.repository.getLogger("Sync.Engine.Bookmarks.Repair");
+
+const PREF_BRANCH = "services.sync.repairs.bookmarks.";
+
+// How long should we wait after sending a repair request before we give up?
+const RESPONSE_INTERVAL_TIMEOUT = 60 * 60 * 24 * 3; // 3 days
+
+class AbortRepairError extends Error {
+  constructor(reason) {
+    super();
+    this.reason = reason;
+  }
+}
+
+// The states we can be in.
+const STATE = Object.freeze({
+  NOT_REPAIRING: "",
+  // We need to try and find another client to use.
+  NEED_NEW_CLIENT: "repair.need-new-client",
+  // We've sent the first request to a client.
+  SENT_REQUEST: "repair.sent",
+  // We've retried a request to a client.
+  SENT_SECOND_REQUEST: "repair.sent-again",
+  // There were no problems, but we've gone as far as we can
+  FINISHED: "repair.finished",
+  // We've found an error that forces us to abort this entire repair cycle.
+  ABORTED: "repair.aborted",
+});
+
+// The preferences we use to hold our state.
+const PREF = Object.freeze({
+  // If a repair is in progress, this is the generated GUID for the "flow ID"
+  REPAIR_ID: "flowID",
+  // The IDs we are currently trying to obtain via the repair process, space sep'd.
+  REPAIR_MISSING_IDS: "ids",
+  // The IDs of the clients we've tried to get the missing items from, space sep'd.
+  // The final element in the array if the client we are currently expecting to
+  // see stuff from.
+  REPAIR_CLIENTS: "clients",
+  // The time, in seconds, when we initiated the most recent client request.
+  REPAIR_WHEN: "when",
+  // Our current state.
+  CURRENT_STATE: "state",
+});
+
+class BookmarkRepairRequestor extends CollectionRepairRequestor {
+  constructor(service = null) {
+    super(service);
+    this.prefs = new Preferences(PREF_BRANCH);
+  }
+
+  // Exposed incase another module needs to understand our state
+  get STATE() {
+    return STATE;
+  }
+
+  /* See if the repairer is willing and able to begin a repair process given
+     the specified validation information.
+     Returns true if a repair was started and false otherwise.
+  */
+  startRepairs(validationInfo, flowID) {
+    if (this._currentState != STATE.NOT_REPAIRING) {
+      log.info(`Can't start a repair - repair with ID ${this._flowID} is already in progress`);
+      return false;
+    }
+
+    // Set of ids of "known bad records". Many of the validation issues will
+    // report duplicates -- if the server is missing a record, it is unlikely
+    // to cause only a single problem.
+    let ids = new Set();
+
+    // Note that we allow any of the validation problem fields to be missing so
+    // that tests have a slightly easier time, hence the `|| []` in each loop.
+
+    // missing children records when the parent exists but a child doesn't.
+    for (let { child } of validationInfo.problems.missingChildren || []) {
+      ids.add(child);
+    }
+
+    // orphans are when the child exists but the parent doesn't.
+    // This could either be a problem in the child (it's wrong about the node
+    // that should be it's parent), or the parent could simply be missing.
+    for (let { parent, id } of validationInfo.problems.orphans || []) {
+      // Request both, to handle both cases
+      ids.add(id);
+      ids.add(parent);
+    }
+
+    // Entries where we have the parent but know for certain that the child was
+    // deleted.
+    for (let { parent } of validationInfo.problems.deletedChildren || []) {
+      ids.add(parent);
+    }
+
+    // Entries where the child references a parent that we don't have, but we
+    // know why: the parent was deleted.
+    for (let { child } of validationInfo.problems.deletedParents || []) {
+      ids.add(child);
+    }
+
+    // Cases where the parent and child disagree about who the parent is.
+    for (let { parent, child } of validationInfo.problems.parentChildMismatches || []) {
+      // Request both, since we don't know which is right.
+      ids.add(parent);
+      ids.add(child);
+    }
+
+    // Cases where multiple parents reference a child. We re-request both the
+    // child, and all the parents that think they own it. This may be more than
+    // we need, but I don't think we can safely make the assumption that the
+    // child is right.
+    for (let { parents, child } of validationInfo.problems.multipleParents || []) {
+      for (let parent of parents) {
+        ids.add(parent);
+      }
+      ids.add(child);
+    }
+
+    // XXX - any others we should consider?
+    if (ids.size == 0) {
+      log.info("Not starting a repair as there are no problems");
+      return false;
+    }
+
+    log.info(`Starting a repair, looking for ${ids.size} missing item(s)`);
+    // setup our prefs to indicate we are on our way.
+    this._flowID = flowID;
+    this._currentIDs = Array.from(ids);
+    this._currentState = STATE.NEED_NEW_CLIENT;
+    this.service.recordTelemetryEvent("repair", "started", undefined, { flowID, numIDs: ids.size.toString() });
+    return this.continueRepairs();
+  }
+
+  /* Work out what state our current repair request is in, and whether it can
+     proceed to a new state.
+     Returns true if we could continue the repair - even if the state didn't
+     actually move. Returns false if we aren't actually repairing.
+  */
+  continueRepairs(response = null) {
+    // Note that "ABORTED" and "FINISHED" should never be current when this
+    // function returns - the end up this function resets to NOT_REPAIRING
+    // in those cases.
+    if (this._currentState == STATE.NOT_REPAIRING) {
+      return false;
+    }
+
+    let state, newState;
+    // we loop until the state doesn't change - but enforce a max of 10 times
+    // to prevent errors causing infinite loops.
+    for (let i = 0; i < 10; i++) {
+      state = this._currentState;
+      log.info("continueRepairs starting with state", state);
+      try {
+        newState = this._continueRepairs(state, response);
+        log.info("continueRepairs has next state", newState);
+      } catch (ex) {
+        if (!(ex instanceof AbortRepairError)) {
+          throw ex;
+        }
+        log.info(`Repair has been aborted: ${ex.reason}`);
+        newState = STATE.ABORTED;
+      }
+
+      if (newState == STATE.ABORTED) {
+        break;
+      }
+
+      this._currentState = newState;
+      Services.prefs.savePrefFile(null); // flush prefs.
+      if (state == newState) {
+        break;
+      }
+    }
+    if (state != newState) {
+      log.error("continueRepairs spun without getting a new state");
+    }
+    if (newState == STATE.FINISHED || newState == STATE.ABORTED) {
+      let object = newState == STATE.FINISHED ? "finished" : "aborted";
+      let extra = { flowID: this._flowID, numIDs: this._currentIDs.length.toString() };
+      this.service.recordTelemetryEvent("repair", object, undefined, extra);
+      // reset our state and flush our prefs.
+      this.prefs.resetBranch();
+      Services.prefs.savePrefFile(null); // flush prefs.
+    }
+    return true;
+  }
+
+  _continueRepairs(state, response = null) {
+    switch (state) {
+      case STATE.SENT_REQUEST:
+      case STATE.SENT_SECOND_REQUEST:
+        let flowID = this._flowID;
+        let clientsUsed = this._clientsUsed;
+        if (!clientsUsed.length) {
+          throw new AbortRepairError(`In state ${state} but have no client IDs listed`);
+        }
+        let clientID = clientsUsed[clientsUsed.length - 1];
+        if (response) {
+          // We got an explicit response - let's see how we went.
+          if (response.flowID != flowID || response.clientID != clientID ||
+              response.request != "upload") {
+            log.info("got a response to a different repair request", response);
+            // hopefully just a stale request that finally came in (either from
+            // an entirely different repair flow, or from a client we've since
+            // given up on.) It doesn't mean we need to abort though...
+            break;
+          }
+          // Pull apart the response and see if it provided everything we asked for.
+          let fixedIDs = new Set(response.ids);
+          let remainingIDs = [];
+          for (let id of this._currentIDs) {
+            if (!fixedIDs.has(id)) {
+              remainingIDs.push(id);
+            }
+          }
+          log.info(`repair response from ${clientID} provided "${response.ids}", remaining now "${remainingIDs}"`);
+          this._currentIDs = remainingIDs;
+          if (remainingIDs.length) {
+            // try a new client for the remaining ones.
+            state = STATE.NEED_NEW_CLIENT;
+          } else {
+            state = STATE.FINISHED;
+          }
+          // record telemetry about this
+          let extra = {
+            deviceID: this.service.identity.hashedDeviceID(clientID),
+            flowID,
+            numIDs: response.ids.length.toString(),
+          }
+          this.service.recordTelemetryEvent("repair", "response", "upload", extra);
+          break;
+        }
+        // So we've sent a request - and don't yet have a response. See if the
+        // client we sent it to has removed it from its list (ie, whether it
+        // has synced since we wrote the request)
+        let client = this.service.clientsEngine.remoteClient(clientID);
+        if (!client) {
+          // hrmph - the client has disappeared.
+          log.info(`previously requested client "${clientID}" has vanished - moving to next step`);
+          state = STATE.NEED_NEW_CLIENT;
+          let extra = {
+            deviceID: this.service.identity.hashedDeviceID(clientID),
+            flowID,
+          }
+          this.service.recordTelemetryEvent("repair", "abandon", "missing", extra);
+         break;
+        }
+        if (this._isCommandPending(clientID, flowID)) {
+          // So the command we previously sent is still queued for the client
+          // (ie, that client is yet to have synced). Let's see if we should
+          // give up on that client.
+          let lastRequestSent = this.prefs.get(PREF.REPAIR_WHEN);
+          let timeLeft = lastRequestSent + RESPONSE_INTERVAL_TIMEOUT - this._now() / 1000;
+          if (timeLeft <= 0) {
+            log.info(`previous request to client ${clientID} is pending, but has taken too long`);
+            state = STATE.NEED_NEW_CLIENT;
+            // XXX - should we remove the command?
+            let extra = {
+              deviceID: this.service.identity.hashedDeviceID(clientID),
+              flowID,
+            }
+            this.service.recordTelemetryEvent("repair", "abandon", "silent", extra);
+            break;
+          }
+          // Let's continue to wait for that client to respond.
+          log.trace(`previous request to client ${clientID} has ${timeLeft} seconds before we give up on it`);
+          break;
+        }
+        // The command isn't pending - if this was the first request, we give
+        // it another go (as that client may have cleared the command but is yet
+        // to complete the sync)
+        // XXX - note that this is no longer true - the responders don't remove
+        // their command until they have written a response. This might mean
+        // we could drop the entire STATE.SENT_SECOND_REQUEST concept???
+        if (state == STATE.SENT_REQUEST) {
+          log.info(`previous request to client ${clientID} was removed - trying a second time`);
+          state = STATE.SENT_SECOND_REQUEST;
+          this._writeRequest(clientID);
+        } else {
+          // this was the second time around, so give up on this client
+          log.info(`previous 2 requests to client ${clientID} were removed - need a new client`);
+          state = STATE.NEED_NEW_CLIENT;
+        }
+        break;
+
+      case STATE.NEED_NEW_CLIENT:
+        // We need to try and find a new client to request.
+        let newClientID = this._findNextClient();
+        if (!newClientID) {
+          state = STATE.FINISHED;
+          break;
+        }
+        let newClientsUsed = this._clientsUsed;
+        newClientsUsed.push(newClientID);
+        this._clientsUsed = newClientsUsed;
+        this._writeRequest(newClientID);
+        state = STATE.SENT_REQUEST;
+        break;
+
+      case STATE.ABORTED:
+        break; // our caller will take the abort action.
+
+      case STATE.FINISHED:
+        break;
+
+      case NOT_REPAIRING:
+        // No repair is in progress. This is a common case, so only log trace.
+        log.trace("continue repairs called but no repair in progress.");
+        break;
+
+      default:
+        log.error(`continue repairs finds itself in an unknown state ${state}`);
+        state = STATE.ABORTED;
+        break;
+
+    }
+    return state;
+  }
+
+  // Issue a repair request to a specific client.
+  _writeRequest(clientID) {
+    log.trace("writing repair request to client", clientID);
+    let ids = this._currentIDs;
+    if (!ids) {
+      throw new AbortRepairError("Attempting to write a request, but there are no IDs");
+    }
+    let flowID = this._flowID;
+    // Post a command to that client.
+    let request = {
+      collection: "bookmarks",
+      request: "upload",
+      requestor: this.service.clientsEngine.localID,
+      ids,
+      flowID,
+    }
+    this.service.clientsEngine.sendCommand("repairRequest", [request], clientID, { flowID });
+    this.prefs.set(PREF.REPAIR_WHEN, Math.floor(this._now() / 1000));
+    // record telemetry about this
+    let extra = {
+      deviceID: this.service.identity.hashedDeviceID(clientID),
+      flowID,
+      numIDs: ids.length.toString(),
+    }
+    this.service.recordTelemetryEvent("repair", "request", "upload", extra);
+  }
+
+  _findNextClient() {
+    let alreadyDone = this._clientsUsed;
+    for (let client of this.service.clientsEngine.remoteClients) {
+      log.trace("findNextClient considering", client);
+      if (alreadyDone.indexOf(client.id) == -1 && this._isSuitableClient(client)) {
+        return client.id;
+      }
+    }
+    log.trace("findNextClient found no client");
+    return null;
+  }
+
+  // Is the passed client record suitable as a repair responder?
+  _isSuitableClient(client) {
+    // filter only desktop firefox running 53+
+    return (client.type == DEVICE_TYPE_DESKTOP &&
+            Services.vc.compare(client.version, 52) > 0);
+  }
+
+  // Is our command still in the "commands" queue for the specific client?
+  _isCommandPending(clientID, flowID) {
+    let commands = this.service.clientsEngine.getClientCommands(clientID);
+    for (let command of commands) {
+      if (command.command != "repairRequest" || command.args.length != 1) {
+        continue;
+      }
+      let arg = command.args[0];
+      if (arg.collection == "bookmarks" && arg.request == "upload" &&
+          arg.flowID == flowID) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // accessors for our prefs.
+  get _currentState() {
+    return this.prefs.get(PREF.CURRENT_STATE, STATE.NOT_REPAIRING);
+  }
+  set _currentState(newState) {
+    this.prefs.set(PREF.CURRENT_STATE, newState);
+  }
+
+  get _currentIDs() {
+    let ids = this.prefs.get(PREF.REPAIR_MISSING_IDS, "");
+    return ids.length ? ids.split(" ") : [];
+  }
+  set _currentIDs(arrayOfIDs) {
+    this.prefs.set(PREF.REPAIR_MISSING_IDS, arrayOfIDs.join(" "));
+  }
+
+  get _clientsUsed() {
+    let alreadyDone = this.prefs.get(PREF.REPAIR_CLIENTS, "");
+    return alreadyDone.length ? alreadyDone.split(" ") : [];
+  }
+  set _clientsUsed(arrayOfClientIDs) {
+    this.prefs.set(PREF.REPAIR_CLIENTS, arrayOfClientIDs.join(" "));
+  }
+
+  get _flowID() {
+    return this.prefs.get(PREF.REPAIR_ID);
+  }
+  set _flowID(val) {
+    this.prefs.set(PREF.REPAIR_ID, val);
+  }
+
+  // Used for test mocks.
+  _now() {
+    return Date.now();
+  }
+}
--- a/services/sync/modules/collection_repair.js
+++ b/services/sync/modules/collection_repair.js
@@ -3,23 +3,27 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 "use strict";
 
 const Cu = Components.utils;
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://services-sync/main.js");
 
+XPCOMUtils.defineLazyModuleGetter(this, "BookmarkRepairRequestor",
+  "resource://services-sync/bookmark_repair.js");
+
 this.EXPORTED_SYMBOLS = ["getRepairRequestor", "getAllRepairRequestors",
                          "CollectionRepairRequestor",
                          "getRepairResponder",
                          "CollectionRepairResponder"];
 
 // The individual requestors/responders, lazily loaded.
 const REQUESTORS = {
+  bookmarks: ["bookmark_repair.js", "BookmarkRepairRequestor"],
 }
 
 const RESPONDERS = {
 }
 
 // Should we maybe enforce the requestors being a singleton?
 function _getRepairConstructor(which, collection) {
   if (!(collection in which)) {
--- a/services/sync/moz.build
+++ b/services/sync/moz.build
@@ -14,16 +14,17 @@ XPCSHELL_TESTS_MANIFESTS += ['tests/unit
 EXTRA_COMPONENTS += [
     'SyncComponents.manifest',
     'Weave.js',
 ]
 
 EXTRA_JS_MODULES['services-sync'] += [
     'modules/addonsreconciler.js',
     'modules/addonutils.js',
+    'modules/bookmark_repair.js',
     'modules/bookmark_validator.js',
     'modules/browserid_identity.js',
     'modules/collection_repair.js',
     'modules/collection_validator.js',
     'modules/doctor.js',
     'modules/engines.js',
     'modules/keys.js',
     'modules/main.js',
new file mode 100644
--- /dev/null
+++ b/services/sync/tests/unit/test_bookmark_repair_requestor.js
@@ -0,0 +1,397 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+Cu.import("resource://services-sync/bookmark_repair.js");
+
+initTestLogging("Trace");
+
+function makeClientRecord(id, fields = {}) {
+  return {
+    id,
+    version: fields.version || "53.0a1",
+    type: fields.type || "desktop",
+    stale: fields.stale || false,
+  }
+}
+
+class MockClientsEngine {
+  constructor(clientList) {
+    this._clientList = clientList;
+    this._sentCommands = {}
+  }
+
+  get remoteClients() {
+    return Object.values(this._clientList);
+  }
+
+  remoteClient(id) {
+    return this._clientList[id];
+  }
+
+  sendCommand(command, args, clientID) {
+    let cc = this._sentCommands[clientID] || [];
+    cc.push({ command, args });
+    this._sentCommands[clientID] = cc;
+  }
+
+  getClientCommands(clientID) {
+    return this._sentCommands[clientID] || [];
+  }
+}
+
+class MockIdentity {
+  hashedDeviceID(did) {
+    return did; // don't hash it to make testing easier.
+  }
+}
+
+class MockService {
+  constructor(clientList) {
+    this.clientsEngine = new MockClientsEngine(clientList);
+    this.identity = new MockIdentity();
+    this._recordedEvents = [];
+  }
+
+  recordTelemetryEvent(object, method, value, extra = undefined) {
+    this._recordedEvents.push({ method, object, value, extra });
+  }
+}
+
+function checkState(expected) {
+  equal(Services.prefs.getCharPref("services.sync.repairs.bookmarks.state"), expected);
+}
+
+function checkRepairFinished() {
+  try {
+    let state = Services.prefs.getCharPref("services.sync.repairs.bookmarks.state");
+    ok(false, state);
+  } catch (ex) {
+    ok(true, "no repair preference exists");
+  }
+}
+
+function checkOutgoingCommand(service, clientID) {
+  let sent = service.clientsEngine._sentCommands;
+  equal(Object.values(sent).length, 1);
+  equal(sent[clientID].length, 1);
+  equal(sent[clientID][0].command, "repairRequest");
+}
+
+add_task(async function test_requestor_no_clients() {
+  let mockService = new MockService({ });
+  let requestor = new BookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+
+  requestor.startRepairs(validationInfo, flowID);
+  // there are no clients, so we should end up in "finished" (which we need to
+  // check via telemetry)
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_one_client_no_response() {
+  let mockService = new MockService({ "client-a": makeClientRecord("client-a") });
+  let requestor = new BookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+  // asking it to continue stays in that state until we timeout or the command
+  // is removed.
+  requestor.continueRepairs();
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  // now pretend that client synced.
+  mockService.clientsEngine._sentCommands = {};
+  requestor.continueRepairs();
+  checkState(requestor.STATE.SENT_SECOND_REQUEST);
+  // the command should be outgoing again.
+  checkOutgoingCommand(mockService, "client-a");
+
+  // pretend that client synced again without writing a command.
+  mockService.clientsEngine._sentCommands = {};
+  requestor.continueRepairs();
+  // There are no more clients, so we've given up.
+
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_one_client_no_sync() {
+  let mockService = new MockService({ "client-a": makeClientRecord("client-a") });
+  let requestor = new BookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  // pretend we are now in the future.
+  let theFuture = Date.now() + 300000000;
+  requestor._now = () => theFuture;
+
+  requestor.continueRepairs();
+
+  // We should be finished as we gave up in disgust.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "abandon",
+      value: "silent",
+      extra: { flowID, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_client_vanishes() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b"),
+  });
+  let requestor = new BookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  mockService.clientsEngine._sentCommands = {};
+  // Now let's pretend the client vanished.
+  delete mockService.clientsEngine._clientList["client-a"];
+
+  requestor.continueRepairs();
+  // We should have moved on to client-b.
+  checkState(requestor.STATE.SENT_REQUEST);
+  checkOutgoingCommand(mockService, "client-b");
+
+  // Now let's pretend client B wrote all missing IDs.
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    flowID: requestor._flowID,
+    clientID: "client-b",
+    ids: ["a", "b", "c"],
+  }
+  requestor.continueRepairs(response);
+
+  // We should be finished as we got all our IDs.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "abandon",
+      value: "missing",
+      extra: { flowID, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-b" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-b", numIDs: 3 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 0 },
+    }
+  ]);
+});
+
+add_task(async function test_requestor_success_responses() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b"),
+  });
+  let requestor = new BookmarkRepairRequestor(mockService);
+  let validationInfo = {
+    problems: {
+      missingChildren: [
+        {parent: "x", child: "a"},
+        {parent: "x", child: "b"},
+        {parent: "x", child: "c"}
+      ],
+      orphans: [],
+    }
+  }
+  let flowID = Utils.makeGUID();
+  requestor.startRepairs(validationInfo, flowID);
+  // the command should now be outgoing.
+  checkOutgoingCommand(mockService, "client-a");
+
+  checkState(requestor.STATE.SENT_REQUEST);
+
+  mockService.clientsEngine._sentCommands = {};
+  // Now let's pretend the client wrote a response.
+  let response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-a",
+    flowID: requestor._flowID,
+    ids: ["a", "b"],
+  }
+  requestor.continueRepairs(response);
+  // We should have moved on to client 2.
+  checkState(requestor.STATE.SENT_REQUEST);
+  checkOutgoingCommand(mockService, "client-b");
+
+  // Now let's pretend client B write the missing ID.
+  response = {
+    collection: "bookmarks",
+    request: "upload",
+    clientID: "client-b",
+    flowID: requestor._flowID,
+    ids: ["c"],
+  }
+  requestor.continueRepairs(response);
+
+  // We should be finished as we got all our IDs.
+  checkRepairFinished();
+  deepEqual(mockService._recordedEvents, [
+    { object: "repair",
+      method: "started",
+      value: undefined,
+      extra: { flowID, numIDs: 3 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 3, deviceID: "client-a" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-a", numIDs: 2 },
+    },
+    { object: "repair",
+      method: "request",
+      value: "upload",
+      extra: { flowID, numIDs: 1, deviceID: "client-b" },
+    },
+    { object: "repair",
+      method: "response",
+      value: "upload",
+      extra: { flowID, deviceID: "client-b", numIDs: 1 },
+    },
+    { object: "repair",
+      method: "finished",
+      value: undefined,
+      extra: { flowID, numIDs: 0 },
+    }
+  ]);
+});
+
+add_task(async function test_client_suitability() {
+  let mockService = new MockService({
+    "client-a": makeClientRecord("client-a"),
+    "client-b": makeClientRecord("client-b", { type: "mobile" }),
+    "client-c": makeClientRecord("client-c", { version: "52.0a1" }),
+    "client-d": makeClientRecord("client-c", { version: "54.0a1" }),
+  });
+  let requestor = new BookmarkRepairRequestor(mockService);
+  ok(requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-a")));
+  ok(!requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-b")));
+  ok(!requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-c")));
+  ok(requestor._isSuitableClient(mockService.clientsEngine.remoteClient("client-d")));
+});
--- a/services/sync/tests/unit/xpcshell.ini
+++ b/services/sync/tests/unit/xpcshell.ini
@@ -131,16 +131,17 @@ tags = addons
 [test_bookmark_duping.js]
 [test_bookmark_engine.js]
 [test_bookmark_invalid.js]
 [test_bookmark_legacy_microsummaries_support.js]
 [test_bookmark_livemarks.js]
 [test_bookmark_order.js]
 [test_bookmark_places_query_rewriting.js]
 [test_bookmark_record.js]
+[test_bookmark_repair_requestor.js]
 [test_bookmark_smart_bookmarks.js]
 [test_bookmark_store.js]
 # Too many intermittent "ASSERTION: thread pool wasn't shutdown: '!mPool'" (bug 804479)
 skip-if = debug
 [test_bookmark_tracker.js]
 requesttimeoutfactor = 4
 [test_bookmark_validator.js]
 [test_clients_engine.js]
--- a/tools/lint/eslint/modules.json
+++ b/tools/lint/eslint/modules.json
@@ -15,16 +15,17 @@
   "assertions.js": ["Assert", "Expect"],
   "async.js": ["Async"],
   "AsyncSpellCheckTestHelper.jsm": ["onSpellCheck"],
   "AutoMigrate.jsm": ["AutoMigrate"],
   "Battery.jsm": ["GetBattery", "Battery"],
   "blocklist-clients.js": ["AddonBlocklistClient", "GfxBlocklistClient", "OneCRLBlocklistClient", "PluginBlocklistClient", "FILENAME_ADDONS_JSON", "FILENAME_GFX_JSON", "FILENAME_PLUGINS_JSON"],
   "blocklist-updater.js": ["checkVersions", "addTestBlocklistClient"],
   "bogus_element_type.jsm": [],
+  "bookmark_repair.js": ["BookmarkRepairRequestor"],
   "bookmark_validator.js": ["BookmarkValidator", "BookmarkProblemData"],
   "bookmarks.js": ["BookmarksEngine", "PlacesItem", "Bookmark", "BookmarkFolder", "BookmarkQuery", "Livemark", "BookmarkSeparator"],
   "bookmarks.jsm": ["PlacesItem", "Bookmark", "Separator", "Livemark", "BookmarkFolder", "DumpBookmarks"],
   "BootstrapMonitor.jsm": ["monitor"],
   "browser-loader.js": ["BrowserLoader"],
   "browserid_identity.js": ["BrowserIDManager", "AuthenticationError"],
   "CertUtils.jsm": ["BadCertHandler", "checkCert", "readCertPrefs", "validateCert"],
   "clients.js": ["ClientEngine", "ClientsRec"],