Patchwork [01,of,17,V4] dirstate: separate 'lookup' status field from others

login
register
mail settings
Submitter Martin von Zweigbergk
Date Oct. 12, 2014, 5:44 a.m.
Message ID <6b0dd84f33b7ce6a932e.1413092658@handduk2.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/6219/
State Accepted
Headers show

Comments

Martin von Zweigbergk - Oct. 12, 2014, 5:44 a.m.
# HG changeset patch
# User Martin von Zweigbergk <martinvonz@gmail.com>
# Date 1412397850 25200
#      Fri Oct 03 21:44:10 2014 -0700
# Node ID 6b0dd84f33b7ce6a932e9a46f6751eeda381ecfc
# Parent  1533e642262de32c5a2445789710f49237019fd6
dirstate: separate 'lookup' status field from others

The status tuple returned from dirstate.status() has an additional
field compared to the other status tuples: lookup/unsure. This field
is just an optimization and not something most callers care about
(they want the resolved value of 'modified' or 'clean'). To prepare
for a single future status type, let's separate out the 'lookup' field
from the rest by having dirstate.status() return a pair: (lookup,
status).
Mads Kiilerich - Oct. 12, 2014, 11:54 a.m.
This series LGTM.

The biggest question remains whether we want the status class to be this 
kind of named tuple and whether the use of __slots__ is an unnecessary 
micro optimization. The implementation can however easily be changed 
later if there should be a need for that.

/Mads


On 10/12/2014 07:44 AM, Martin von Zweigbergk wrote:
> # HG changeset patch
> # User Martin von Zweigbergk <martinvonz@gmail.com>
> # Date 1412976756 25200
> #      Fri Oct 10 14:32:36 2014 -0700
> # Node ID cd8ad3e2855507f93bc5a2fb645806c22be7b946
> # Parent  7309c75e96a9a2f6d0e0f796a4d13e595e9ea958
> dirstate: create class for status lists
>
> Callers of various status() methods (on dirstate, context, repo) get a
> tuple of 7 elements, where each element is a list of files. This
> results in lots of uses of indexes where names would be much more
> readable. For example, "status.ignored" seems clearer than "status[4]"
> [1]. So, let's introduce a simple named tuple containing the 7 status
> fields: modified, added, removed, deleted, unknown, ignored, clean.
>
> This patch introduces the class and updates the status methods to
> return instances of it. Later patches will update the callers.
>
>   [1] Did you even notice that it should have been "status[5]"?
>
> diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
> --- a/hgext/largefiles/overrides.py
> +++ b/hgext/largefiles/overrides.py
> @@ -11,6 +11,7 @@
>   import os
>   import copy
>   
> +from mercurial import dirstate
>   from mercurial import hg, commands, util, cmdutil, scmutil, match as match_, \
>           archival, pathutil, revset
>   from mercurial.i18n import _
> @@ -1149,7 +1150,8 @@
>           modified, added, removed, deleted, unknown, ignored, clean = r
>           unknown = [f for f in unknown if lfdirstate[f] == '?']
>           ignored = [f for f in ignored if lfdirstate[f] == '?']
> -        return modified, added, removed, deleted, unknown, ignored, clean
> +        return dirstate.status(modified, added, removed, deleted, unknown,
> +                               ignored, clean)
>       repo.status = overridestatus
>       orig(ui, repo, *dirs, **opts)
>       repo.status = oldstatus
> diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
> --- a/hgext/largefiles/reposetup.py
> +++ b/hgext/largefiles/reposetup.py
> @@ -10,6 +10,7 @@
>   import copy
>   import os
>   
> +from mercurial import dirstate
>   from mercurial import error, manifest, match as match_, util
>   from mercurial.i18n import _
>   from mercurial import localrepo
> @@ -242,7 +243,7 @@
>                       wlock.release()
>   
>               self.lfstatus = True
> -            return result
> +            return dirstate.status(*result)
>   
>           # As part of committing, copy all of the largefiles into the
>           # cache.
> diff --git a/mercurial/context.py b/mercurial/context.py
> --- a/mercurial/context.py
> +++ b/mercurial/context.py
> @@ -7,6 +7,7 @@
>   
>   from node import nullid, nullrev, short, hex, bin
>   from i18n import _
> +import dirstate
>   import mdiff, error, util, scmutil, subrepo, patch, encoding, phases
>   import match as matchmod
>   import os, errno, stat
> @@ -340,8 +341,7 @@
>           for l in r:
>               l.sort()
>   
> -        # we return a tuple to signify that this list isn't changing
> -        return tuple(r)
> +        return dirstate.status(*r)
>   
>   
>   def makememctx(repo, parents, text, user, date, branch, files, store,
> @@ -1482,7 +1482,7 @@
>           # (s[1] is 'added' and s[2] is 'removed')
>           s = list(s)
>           s[1], s[2] = s[2], s[1]
> -        return tuple(s)
> +        return dirstate.status(*s)
>   
>   class committablefilectx(basefilectx):
>       """A committablefilectx provides common functionality for a file context
> diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
> --- a/mercurial/dirstate.py
> +++ b/mercurial/dirstate.py
> @@ -26,6 +26,50 @@
>       def join(self, obj, fname):
>           return obj._join(fname)
>   
> +class status(tuple):
> +    '''Named tuple with a list of files per status. The 'deleted', 'unknown'
> +       and 'ignored' properties are only relevant to the working copy.
> +    '''
> +
> +    __slots__ = ()
> +
> +    def __new__(cls, modified, added, removed, deleted, unknown, ignored,
> +                clean):
> +        return tuple.__new__(cls, (modified, added, removed, deleted, unknown,
> +                                   ignored, clean))
> +
> +    @property
> +    def modified(self):
> +        return self[0]
> +
> +    @property
> +    def added(self):
> +        return self[1]
> +
> +    @property
> +    def removed(self):
> +        return self[2]
> +
> +    @property
> +    def deleted(self):
> +        return self[3]
> +
> +    @property
> +    def unknown(self):
> +        return self[4]
> +
> +    @property
> +    def ignored(self):
> +        return self[5]
> +
> +    @property
> +    def clean(self):
> +        return self[6]
> +
> +    def __repr__(self, *args, **kwargs):
> +        return (('<status modified=%r, added=%r, removed=%r, deleted=%r, '
> +                 'unknown=%r, ignored=%r, clean=%r>') % self)
> +
>   class dirstate(object):
>   
>       def __init__(self, opener, ui, root, validate):
> @@ -901,8 +945,8 @@
>               elif state == 'r':
>                   radd(fn)
>   
> -        return (lookup, (modified, added, removed, deleted, unknown, ignored,
> -                         clean))
> +        return (lookup, status(modified, added, removed, deleted, unknown,
> +                               ignored, clean))
>   
>       def matches(self, match):
>           '''
> diff --git a/mercurial/subrepo.py b/mercurial/subrepo.py
> --- a/mercurial/subrepo.py
> +++ b/mercurial/subrepo.py
> @@ -5,6 +5,7 @@
>   # This software may be used and distributed according to the terms of the
>   # GNU General Public License version 2 or any later version.
>   
> +import dirstate
>   import errno, os, re, shutil, posixpath, sys
>   import xml.dom.minidom
>   import stat, subprocess, tarfile
> @@ -448,7 +449,7 @@
>           return 1
>   
>       def status(self, rev2, **opts):
> -        return [], [], [], [], [], [], []
> +        return dirstate.status([], [], [], [], [], [], [])
>   
>       def diff(self, ui, diffopts, node2, match, prefix, **opts):
>           pass
> @@ -650,7 +651,7 @@
>           except error.RepoLookupError, inst:
>               self._repo.ui.warn(_('warning: error "%s" in subrepository "%s"\n')
>                                  % (inst, subrelpath(self)))
> -            return [], [], [], [], [], [], []
> +            return dirstate.status([], [], [], [], [], [], [])
>   
>       @annotatesubrepoerror
>       def diff(self, ui, diffopts, node2, match, prefix, **opts):
> @@ -1562,7 +1563,7 @@
>           rev1 = self._state[1]
>           if self._gitmissing() or not rev1:
>               # if the repo is missing, return no results
> -            return [], [], [], [], [], [], []
> +            return dirstate.status([], [], [], [], [], [], [])
>           modified, added, removed = [], [], []
>           self._gitupdatestat()
>           if rev2:
> @@ -1583,7 +1584,8 @@
>                   removed.append(f)
>   
>           deleted = unknown = ignored = clean = []
> -        return modified, added, removed, deleted, unknown, ignored, clean
> +        return dirstate.status(modified, added, removed, deleted, unknown,
> +                               ignored, clean)
>   
>       def shortid(self, revid):
>           return revid[:7]
> diff --git a/tests/test-context.py.out b/tests/test-context.py.out
> --- a/tests/test-context.py.out
> +++ b/tests/test-context.py.out
> @@ -2,7 +2,7 @@
>   ASCII   : Gr?ezi!
>   Latin-1 : Grüezi!
>   UTF-8   : Grüezi!
> -(['foo'], [], [], [], [], [], [])
> +<status modified=['foo'], added=[], removed=[], deleted=[], unknown=[], ignored=[], clean=[]>
>   diff --git a/foo b/foo
>   
>   --- a/foo
Martin von Zweigbergk - Oct. 12, 2014, 9:08 p.m.
On Sun, Oct 12, 2014 at 4:54 AM, Mads Kiilerich <mads@kiilerich.com> wrote:
> This series LGTM.

Thanks for reviewing.

> The biggest question remains whether we want the status class to be this
> kind of named tuple and whether the use of __slots__ is an unnecessary micro
> optimization. The implementation can however easily be changed later if
> there should be a need for that.

Agreed, we can change it later if necessary.
Matt Mackall - Oct. 14, 2014, 5:51 a.m.
On Sat, 2014-10-11 at 22:44 -0700, Martin von Zweigbergk wrote:
> # HG changeset patch
> # User Martin von Zweigbergk <martinvonz@gmail.com>
> # Date 1412976756 25200
> #      Fri Oct 10 14:32:36 2014 -0700
> # Node ID cd8ad3e2855507f93bc5a2fb645806c22be7b946
> # Parent  7309c75e96a9a2f6d0e0f796a4d13e595e9ea958
> dirstate: create class for status lists
> 
> Callers of various status() methods (on dirstate, context, repo) get a
> tuple of 7 elements, where each element is a list of files. This
> results in lots of uses of indexes where names would be much more
> readable. For example, "status.ignored" seems clearer than "status[4]"
> [1]. So, let's introduce a simple named tuple containing the 7 status
> fields: modified, added, removed, deleted, unknown, ignored, clean.
> 
> This patch introduces the class and updates the status methods to
> return instances of it. Later patches will update the callers.
> 
>  [1] Did you even notice that it should have been "status[5]"?

To move things along, I've tweaked this a bit in flight:

- I've put the class definition in scmutil as status is not actually
specific to the working copy (and I don't want to have a bunch of new
things importing dirstate)

- I've put _only_ the single change to dirstate.status() in that
changeset, which:
  - gives a smaller patch with fewer distractions
  - proves API compatibility by introducing no test changes

- I've put all the other users in a separate patch
Martin von Zweigbergk - Oct. 14, 2014, 6:01 a.m.
On Mon, Oct 13, 2014 at 10:51 PM, Matt Mackall <mpm@selenic.com> wrote:
> On Sat, 2014-10-11 at 22:44 -0700, Martin von Zweigbergk wrote:
>> # HG changeset patch
>> # User Martin von Zweigbergk <martinvonz@gmail.com>
>> # Date 1412976756 25200
>> #      Fri Oct 10 14:32:36 2014 -0700
>> # Node ID cd8ad3e2855507f93bc5a2fb645806c22be7b946
>> # Parent  7309c75e96a9a2f6d0e0f796a4d13e595e9ea958
>> dirstate: create class for status lists
>>
>> Callers of various status() methods (on dirstate, context, repo) get a
>> tuple of 7 elements, where each element is a list of files. This
>> results in lots of uses of indexes where names would be much more
>> readable. For example, "status.ignored" seems clearer than "status[4]"
>> [1]. So, let's introduce a simple named tuple containing the 7 status
>> fields: modified, added, removed, deleted, unknown, ignored, clean.
>>
>> This patch introduces the class and updates the status methods to
>> return instances of it. Later patches will update the callers.
>>
>>  [1] Did you even notice that it should have been "status[5]"?
>
> To move things along, I've tweaked this a bit in flight:
>
> - I've put the class definition in scmutil as status is not actually
> specific to the working copy (and I don't want to have a bunch of new
> things importing dirstate)

Thanks. It didn't quite seem like the right place to me either, but I
didn't know where it belonged. Moving it out of dirstate also removes
the confusion with the method named dirstate.status().

> - I've put _only_ the single change to dirstate.status() in that
> changeset, which:
>   - gives a smaller patch with fewer distractions
>   - proves API compatibility by introducing no test changes
>
> - I've put all the other users in a separate patch

Fine with me. Thanks for picking up the patches.

Patch

diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py
--- a/hgext/largefiles/lfutil.py
+++ b/hgext/largefiles/lfutil.py
@@ -136,8 +136,8 @@ 
 
 def lfdirstatestatus(lfdirstate, repo, rev):
     match = match_.always(repo.root, repo.getcwd())
-    s = lfdirstate.status(match, [], False, False, False)
-    unsure, modified, added, removed, missing, unknown, ignored, clean = s
+    unsure, s = lfdirstate.status(match, [], False, False, False)
+    modified, added, removed, missing, unknown, ignored, clean = s
     for lfile in unsure:
         try:
             fctx = repo[rev][standin(lfile)]
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -351,9 +351,9 @@ 
     wlock = repo.wlock()
     try:
         lfdirstate = lfutil.openlfdirstate(ui, repo)
-        s = lfdirstate.status(match_.always(repo.root, repo.getcwd()),
-            [], False, False, False)
-        (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+        unsure, s = lfdirstate.status(match_.always(repo.root, repo.getcwd()),
+                                      [], False, False, False)
+        modified = s[0]
 
         if opts['check']:
             mod = len(modified) > 0
@@ -1110,9 +1110,9 @@ 
         return orig(repo, pats, opts, dry_run, similarity)
     # Get the list of missing largefiles so we can remove them
     lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
-    s = lfdirstate.status(match_.always(repo.root, repo.getcwd()), [], False,
-        False, False)
-    (unsure, modified, added, removed, missing, unknown, ignored, clean) = s
+    unsure, s = lfdirstate.status(match_.always(repo.root, repo.getcwd()), [],
+                                  False, False, False)
+    missing = s[3]
 
     # Call into the normal remove code, but the removing of the standin, we want
     # to have handled by original addremove.  Monkey patching here makes sure
@@ -1288,9 +1288,10 @@ 
             # update standins for linear-merge or force-branch-merge,
             # because largefiles in the working directory may be modified
             lfdirstate = lfutil.openlfdirstate(repo.ui, repo)
-            s = lfdirstate.status(match_.always(repo.root, repo.getcwd()),
-                                  [], False, False, False)
-            unsure, modified, added = s[:3]
+            unsure, s = lfdirstate.status(match_.always(repo.root,
+                                                        repo.getcwd()),
+                                          [], False, False, False)
+            modified, added = s[:2]
             for lfile in unsure + modified + added:
                 lfutil.updatestandin(repo, lfutil.standin(lfile))
 
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -159,10 +159,10 @@ 
                                     if sfindirstate(f)]
                     # Don't waste time getting the ignored and unknown
                     # files from lfdirstate
-                    s = lfdirstate.status(match, [], False,
-                            listclean, False)
-                    (unsure, modified, added, removed, missing, _unknown,
-                            _ignored, clean) = s
+                    unsure, s = lfdirstate.status(match, [], False, listclean,
+                                                  False)
+                    (modified, added, removed, missing, _unknown, _ignored,
+                     clean) = s
                     if parentworking:
                         for lfile in unsure:
                             standin = lfutil.standin(lfile)
@@ -296,9 +296,9 @@ 
                     # large.
                     lfdirstate = lfutil.openlfdirstate(ui, self)
                     dirtymatch = match_.always(self.root, self.getcwd())
-                    s = lfdirstate.status(dirtymatch, [], False, False, False)
-                    (unsure, modified, added, removed, _missing, _unknown,
-                            _ignored, _clean) = s
+                    unsure, s = lfdirstate.status(dirtymatch, [], False, False,
+                                                  False)
+                    modified, added, removed = s[:3]
                     modifiedfiles = unsure + modified + added + removed
                     lfiles = lfutil.listlfiles(self)
                     # this only loops through largefiles that exist (not
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1418,9 +1418,9 @@ 
         subrepos = []
         if '.hgsub' in self:
             subrepos = sorted(self.substate)
-        s = self._repo.dirstate.status(match, subrepos, listignored,
-                                       listclean, listunknown)
-        cmp, modified, added, removed, deleted, unknown, ignored, clean = s
+        cmp, s = self._repo.dirstate.status(match, subrepos, listignored,
+                                            listclean, listunknown)
+        modified, added, removed, deleted, unknown, ignored, clean = s
 
         # check for any possibly clean files
         if cmp:
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -802,8 +802,8 @@ 
 
     def status(self, match, subrepos, ignored, clean, unknown):
         '''Determine the status of the working copy relative to the
-        dirstate and return a tuple of lists (unsure, modified, added,
-        removed, deleted, unknown, ignored, clean), where:
+        dirstate and return a nested tuple of lists (unsure, (modified, added,
+        removed, deleted, unknown, ignored, clean)), where:
 
           unsure:
             files that might have been modified since the dirstate was
@@ -901,8 +901,8 @@ 
             elif state == 'r':
                 radd(fn)
 
-        return (lookup, modified, added, removed, deleted, unknown, ignored,
-                clean)
+        return (lookup, (modified, added, removed, deleted, unknown, ignored,
+                         clean))
 
     def matches(self, match):
         '''