Patchwork D6345: rust-discovery: optionally don't randomize at all, for tests

login
register
mail settings
Submitter phabricator
Date May 6, 2019, 3:31 p.m.
Message ID <differential-rev-PHID-DREV-rm4edf5i53nt4y2vg32b-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/39967/
State Superseded
Headers show

Comments

phabricator - May 6, 2019, 3:31 p.m.
gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  As seen from Python, this is a new `randomize` kwarg in init of the
  discovery object. It replaces random picking by some arbitrary yet
  deterministic strategy.
  
  This is the same as what test-setdiscovery.t does, with the added
  benefit to be usable both in Python and Rust implementations.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6345

AFFECTED FILES
  mercurial/setdiscovery.py
  rust/hg-core/src/discovery.rs
  rust/hg-cpython/src/discovery.rs
  tests/test-rust-discovery.py

CHANGE DETAILS




To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
phabricator - May 10, 2019, 1:46 p.m.
kevincox accepted this revision.
kevincox added a comment.


  The code and rust style looks fine. I'm not sure that taking the N smallest elements is the best approach for testing though. I can imagine it hiding some issues with larger values.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6345

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
phabricator - May 22, 2019, 4:36 p.m.
gracinet added a comment.


  @kevincox I don't think the potential masking in tests due to this strategy is a big problem. That test is pretty recent, compared to setdiscovery, at least, and doesn't really test that sampling is very relevant. We could do better though by introducing several non random selecting strategies if we need it in the future, but this was good enough for me for the time being.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6345

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
phabricator - May 22, 2019, 4:38 p.m.
gracinet abandoned this revision.
gracinet added a comment.


  I'm abandoning this revision because I have to resubmit the whole series after partial incorrect application last week and subsequent droping from the actual repo.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6345

To: gracinet, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel

Patch

diff --git a/tests/test-rust-discovery.py b/tests/test-rust-discovery.py
--- a/tests/test-rust-discovery.py
+++ b/tests/test-rust-discovery.py
@@ -106,6 +106,10 @@ 
         self.assertTrue(disco.iscomplete())
         self.assertEqual(disco.commonheads(), {1})
 
+    def testinitnorandom(self):
+        idx = self.parseindex()
+        PartialDiscovery(idx, [3], randomize=False)
+
 if __name__ == '__main__':
     import silenttestrunner
     silenttestrunner.main(__name__)
diff --git a/rust/hg-cpython/src/discovery.rs b/rust/hg-cpython/src/discovery.rs
--- a/rust/hg-cpython/src/discovery.rs
+++ b/rust/hg-cpython/src/discovery.rs
@@ -30,13 +30,15 @@ 
     def __new__(
         _cls,
         index: PyObject,
-        targetheads: PyObject
+        targetheads: PyObject,
+        randomize: bool = true
     ) -> PyResult<PartialDiscovery> {
         Self::create_instance(
             py,
             RefCell::new(Box::new(CorePartialDiscovery::new(
                 Index::new(py, index)?,
                 rev_pyiter_collect(py, &targetheads)?,
+                randomize,
             )))
         )
     }
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -29,6 +29,7 @@ 
     children_cache: Option<HashMap<Revision, Vec<Revision>>>,
     missing: HashSet<Revision>,
     rng: Rng,
+    randomize: bool,
 }
 
 pub struct DiscoveryStats {
@@ -144,16 +145,32 @@ 
     /// If we want to make the signature more flexible,
     /// we'll have to make it a type argument of `PartialDiscovery` or a trait
     /// object since we'll keep it in the meanwhile
-    pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
+    ///
+    /// The `randomize` boolean affects sampling, and specifically how
+    /// limiting or last-minute expanding is been done:
+    ///
+    /// If `true`, both will perform random picking from `self.undecided`.
+    /// This is currently the best for actual discoveries.
+    ///
+    /// If `false`, a reproductible picking strategy is performed. This is
+    /// useful for integration tests.
+    pub fn new(
+        graph: G,
+        target_heads: Vec<Revision>,
+        randomize: bool,
+    ) -> Self {
         let mut seed: [u8; 16] = [0; 16];
-        thread_rng().fill_bytes(&mut seed);
-        Self::new_with_seed(graph, target_heads, seed)
+        if randomize {
+            thread_rng().fill_bytes(&mut seed);
+        }
+        Self::new_with_seed(graph, target_heads, seed, randomize)
     }
 
     pub fn new_with_seed(
         graph: G,
         target_heads: Vec<Revision>,
         seed: [u8; 16],
+        randomize: bool,
     ) -> Self {
         PartialDiscovery {
             undecided: None,
@@ -163,6 +180,7 @@ 
             common: MissingAncestors::new(graph, vec![]),
             missing: HashSet::new(),
             rng: Rng::from_seed(seed),
+            randomize: randomize,
         }
     }
 
@@ -173,6 +191,11 @@ 
         mut sample: Vec<Revision>,
         size: usize,
     ) -> Vec<Revision> {
+        if !self.randomize {
+            sample.sort();
+            sample.truncate(size);
+            return sample
+        }
         let sample_len = sample.len();
         if sample_len <= size {
             return sample;
@@ -413,20 +436,22 @@ 
 
     /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
     ///
-    /// To avoid actual randomness in tests, we give it a fixed random seed.
+    /// To avoid actual randomness in these tests, we give it a fixed
+    /// random seed, but by default we'll test the random version.
     fn full_disco() -> PartialDiscovery<SampleGraph> {
         PartialDiscovery::new_with_seed(
             SampleGraph,
             vec![10, 11, 12, 13],
             [0; 16],
+            true,
         )
     }
 
     /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
     ///
     /// To avoid actual randomness in tests, we give it a fixed random seed.
     fn disco12() -> PartialDiscovery<SampleGraph> {
-        PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16])
+        PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true)
     }
 
     fn sorted_undecided(
@@ -511,6 +536,14 @@ 
     }
 
     #[test]
+    fn test_limit_sample_no_random() {
+        let mut disco = full_disco();
+        disco.randomize = false;
+        assert_eq!(disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
+                   vec![1, 3, 5, 7]);
+    }
+
+    #[test]
     fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
         let mut disco = full_disco();
         disco.undecided = Some((1..=13).collect());
diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py
--- a/mercurial/setdiscovery.py
+++ b/mercurial/setdiscovery.py
@@ -92,11 +92,19 @@ 
                 dist.setdefault(p, d + 1)
                 visit.append(p)
 
-def _limitsample(sample, desiredlen):
-    """return a random subset of sample of at most desiredlen item"""
-    if len(sample) > desiredlen:
-        sample = set(random.sample(sample, desiredlen))
-    return sample
+def _limitsample(sample, desiredlen, randomize=True):
+    """return a random subset of sample of at most desiredlen item.
+
+    If randomize is False, though, a deterministic subset is returned.
+    This is meant for integration tests.
+    """
+    if len(sample) <= desiredlen:
+        return sample
+    if randomize:
+        return set(random.sample(sample, desiredlen))
+    sample = list(sample)
+    sample.sort()
+    return set(sample[:desiredlen])
 
 class partialdiscovery(object):
     """an object representing ongoing discovery
@@ -110,13 +118,14 @@ 
     (all tracked revisions are known locally)
     """
 
-    def __init__(self, repo, targetheads):
+    def __init__(self, repo, targetheads, randomize=True):
         self._repo = repo
         self._targetheads = targetheads
         self._common = repo.changelog.incrementalmissingrevs()
         self._undecided = None
         self.missing = set()
         self._childrenmap = None
+        self.randomize = randomize
 
     def addcommons(self, commons):
         """register nodes known as common"""
@@ -221,7 +230,7 @@ 
         sample = set(self._repo.revs('heads(%ld)', revs))
 
         if len(sample) >= size:
-            return _limitsample(sample, size)
+            return _limitsample(sample, size, randomize=self.randomize)
 
         _updatesample(None, headrevs, sample, self._parentsgetter(),
                       quicksamplesize=size)
@@ -246,10 +255,15 @@ 
 
         _updatesample(revs, revsroots, sample, childrenrevs)
         assert sample
-        sample = _limitsample(sample, size)
+        sample = _limitsample(sample, size, randomize=self.randomize)
         if len(sample) < size:
             more = size - len(sample)
-            sample.update(random.sample(list(revs - sample), more))
+            takefrom = list(revs - sample)
+            if self.randomize:
+                sample.update(random.sample(takefrom, more))
+            else:
+                takefrom.sort()
+                sample.update(takefrom[:more])
         return sample
 
 def findcommonheads(ui, local, remote,