contrib/perf-utils/search-discovery-case
changeset 46771 6b26e6432554
child 48875 6000f5b25c9b
equal deleted inserted replaced
46770:b6ac612445e0 46771:6b26e6432554
       
     1 #!/usr/bin/env python3
       
     2 # Search for interesting discovery instance
       
     3 #
       
     4 #  search-discovery-case REPO [REPO]…
       
     5 #
       
     6 # This use a subsetmaker extension (next to this script) to generate a steam of
       
     7 # random discovery instance. When interesting case are discovered, information
       
     8 # about them are print on the stdout.
       
     9 from __future__ import print_function
       
    10 
       
    11 import json
       
    12 import os
       
    13 import queue
       
    14 import random
       
    15 import signal
       
    16 import subprocess
       
    17 import sys
       
    18 import threading
       
    19 
       
    20 this_script = os.path.abspath(sys.argv[0])
       
    21 this_dir = os.path.dirname(this_script)
       
    22 hg_dir = os.path.join(this_dir, '..', '..')
       
    23 HG_REPO = os.path.normpath(hg_dir)
       
    24 HG_BIN = os.path.join(HG_REPO, 'hg')
       
    25 
       
    26 JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))
       
    27 
       
    28 
       
    29 SLICING = ('scratch', 'randomantichain', 'rev')
       
    30 
       
    31 
       
    32 def nb_revs(repo_path):
       
    33     cmd = [
       
    34         HG_BIN,
       
    35         '--repository',
       
    36         repo_path,
       
    37         'log',
       
    38         '--template',
       
    39         '{rev}',
       
    40         '--rev',
       
    41         'tip',
       
    42     ]
       
    43     s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
       
    44     out, err = s.communicate()
       
    45     return int(out)
       
    46 
       
    47 
       
    48 repos = []
       
    49 for repo in sys.argv[1:]:
       
    50     size = nb_revs(repo)
       
    51     repos.append((repo, size))
       
    52 
       
    53 
       
    54 def pick_one(repo):
       
    55     pick = random.choice(SLICING)
       
    56     seed = random.randint(0, 100000)
       
    57     if pick == 'scratch':
       
    58         start = int(repo[1] * 0.3)
       
    59         end = int(repo[1] * 0.7)
       
    60         nb = random.randint(start, end)
       
    61         return ('scratch', nb, seed)
       
    62     elif pick == 'randomantichain':
       
    63         return ('randomantichain', seed)
       
    64     elif pick == 'rev':
       
    65         start = int(repo[1] * 0.3)
       
    66         end = int(repo[1])
       
    67         rev = random.randint(start, end)
       
    68         return ('rev', rev)
       
    69     else:
       
    70         assert False
       
    71 
       
    72 
       
    73 done = threading.Event()
       
    74 cases = queue.Queue(maxsize=10 * JOB)
       
    75 results = queue.Queue()
       
    76 
       
    77 
       
    78 def worker():
       
    79     while not done.is_set():
       
    80         c = cases.get()
       
    81         if c is None:
       
    82             return
       
    83         try:
       
    84             res = process(c)
       
    85             results.put((c, res))
       
    86         except Exception as exc:
       
    87             print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
       
    88         c = (c[0], c[2], c[1])
       
    89         try:
       
    90             res = process(c)
       
    91             results.put((c, res))
       
    92         except Exception as exc:
       
    93             print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
       
    94 
       
    95 
       
    96 SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')
       
    97 
       
    98 
       
    99 CMD_BASE = (
       
   100     HG_BIN,
       
   101     'debugdiscovery',
       
   102     '--template',
       
   103     'json',
       
   104     '--config',
       
   105     'extensions.subset=%s' % SUBSET_PATH,
       
   106 )
       
   107 #    '--local-as-revs "$left" --local-as-revs "$right"'
       
   108 #    > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
       
   109 #        )
       
   110 
       
   111 
       
   112 def to_revsets(case):
       
   113     t = case[0]
       
   114     if t == 'scratch':
       
   115         return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
       
   116     elif t == 'randomantichain':
       
   117         return '::randomantichain(all(), "%d")' % case[1]
       
   118     elif t == 'rev':
       
   119         return '::%d' % case[1]
       
   120     else:
       
   121         assert False
       
   122 
       
   123 
       
   124 def process(case):
       
   125     (repo, left, right) = case
       
   126     cmd = list(CMD_BASE)
       
   127     cmd.append('-R')
       
   128     cmd.append(repo[0])
       
   129     cmd.append('--local-as-revs')
       
   130     cmd.append(to_revsets(left))
       
   131     cmd.append('--remote-as-revs')
       
   132     cmd.append(to_revsets(right))
       
   133     s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
       
   134     out, err = s.communicate()
       
   135     return json.loads(out)[0]
       
   136 
       
   137 
       
   138 def interesting_boundary(res):
       
   139     """check if a case is interesting or not
       
   140 
       
   141     For now we are mostly interrested in case were we do multiple roundstrip
       
   142     and where the boundary is somewhere in the middle of the undecided set.
       
   143 
       
   144     Ideally, we would make this configurable, but this is not a focus for now
       
   145 
       
   146     return None or (round-trip, undecided-common, undecided-missing)
       
   147     """
       
   148     roundtrips = res["total-roundtrips"]
       
   149     if roundtrips <= 1:
       
   150         return None
       
   151     undecided_common = res["nb-ini_und-common"]
       
   152     undecided_missing = res["nb-ini_und-missing"]
       
   153     if undecided_common == 0:
       
   154         return None
       
   155     if undecided_missing == 0:
       
   156         return None
       
   157     return (roundtrips, undecided_common, undecided_missing)
       
   158 
       
   159 
       
   160 def end(*args, **kwargs):
       
   161     done.set()
       
   162 
       
   163 
       
   164 def format_case(case):
       
   165     return '-'.join(str(s) for s in case)
       
   166 
       
   167 
       
   168 signal.signal(signal.SIGINT, end)
       
   169 
       
   170 for i in range(JOB):
       
   171     threading.Thread(target=worker).start()
       
   172 
       
   173 nb_cases = 0
       
   174 while not done.is_set():
       
   175     repo = random.choice(repos)
       
   176     left = pick_one(repo)
       
   177     right = pick_one(repo)
       
   178     cases.put((repo, left, right))
       
   179     while not results.empty():
       
   180         # results has a single reader so this is fine
       
   181         c, res = results.get_nowait()
       
   182         boundary = interesting_boundary(res)
       
   183         if boundary is not None:
       
   184             print(c[0][0], format_case(c[1]), format_case(c[2]), *boundary)
       
   185             sys.stdout.flush()
       
   186 
       
   187     nb_cases += 1
       
   188     if not nb_cases % 100:
       
   189         print('[%d cases generated]' % nb_cases, file=sys.stderr)
       
   190 
       
   191 for i in range(JOB):
       
   192     try:
       
   193         cases.put_nowait(None)
       
   194     except queue.Full:
       
   195         pass
       
   196 
       
   197 print('[%d cases generated]' % nb_cases, file=sys.stderr)
       
   198 print('[ouput generation is over]' % nb_cases, file=sys.stderr)