rust/hg-core/src/revlog/index.rs
changeset 51224 43241f31cf5b
parent 51223 42c8dbdb88ad
child 51225 89ce6a49bfeb
equal deleted inserted replaced
51223:42c8dbdb88ad 51224:43241f31cf5b
  1042 
  1042 
  1043     /// Given a disjoint set of revs, return all candidates for the
  1043     /// Given a disjoint set of revs, return all candidates for the
  1044     /// greatest common ancestor. In revset notation, this is the set
  1044     /// greatest common ancestor. In revset notation, this is the set
  1045     /// `heads(::a and ::b and ...)`
  1045     /// `heads(::a and ::b and ...)`
  1046     #[allow(dead_code)]
  1046     #[allow(dead_code)]
  1047     fn find_gca_candidates(
  1047     fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
  1048         &self,
  1048         &self,
  1049         revs: &[Revision],
  1049         revs: &[Revision],
  1050     ) -> Result<Vec<Revision>, GraphError> {
  1050     ) -> Result<Vec<Revision>, GraphError> {
  1051         if revs.is_empty() {
  1051         if revs.is_empty() {
  1052             return Ok(vec![]);
  1052             return Ok(vec![]);
  1053         }
  1053         }
  1054         let revcount = revs.len();
  1054         let revcount = revs.len();
  1055         let mut candidates = vec![];
  1055         let mut candidates = vec![];
  1056         let all_seen = 1u64 << (revcount - 1);
       
  1057         let poison = 1u64 << revs.len();
       
  1058         let max_rev = revs.iter().max().unwrap();
  1056         let max_rev = revs.iter().max().unwrap();
  1059         let mut seen = vec![0u64; (max_rev.0 + 1) as usize];
  1057 
       
  1058         let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize);
  1060 
  1059 
  1061         for (idx, rev) in revs.iter().enumerate() {
  1060         for (idx, rev) in revs.iter().enumerate() {
  1062             seen[rev.0 as usize] = 1 << idx;
  1061             seen[rev.0 as usize].add(idx);
  1063         }
  1062         }
  1064         let mut current_rev = *max_rev;
  1063         let mut current_rev = *max_rev;
  1065         // Number of revisions whose inspection in the main loop
  1064         // Number of revisions whose inspection in the main loop
  1066         // will give a result or trigger inspection of other revisions
  1065         // will give a result or trigger inspection of other revisions
  1067         let mut interesting = revcount;
  1066         let mut interesting = revcount;
  1087         // - The `interesting` counter allows to break early
  1086         // - The `interesting` counter allows to break early
  1088         // - The loop starts from `max(revs)`
  1087         // - The loop starts from `max(revs)`
  1089         // - Early return in case it is detected that one of the incoming revs
  1088         // - Early return in case it is detected that one of the incoming revs
  1090         //   is a common ancestor of all of them.
  1089         //   is a common ancestor of all of them.
  1091         while current_rev.0 >= 0 && interesting > 0 {
  1090         while current_rev.0 >= 0 && interesting > 0 {
  1092             let mut current_seen = seen[current_rev.0 as usize];
  1091             let mut current_seen = seen[current_rev.0 as usize].clone();
  1093 
  1092 
  1094             if current_seen == 0 {
  1093             if current_seen.is_empty() {
  1095                 current_rev = Revision(current_rev.0 - 1);
  1094                 current_rev = Revision(current_rev.0 - 1);
  1096                 continue;
  1095                 continue;
  1097             }
  1096             }
  1098             if current_seen < poison {
  1097             if !current_seen.is_poisoned() {
  1099                 interesting -= 1;
  1098                 interesting -= 1;
  1100                 if current_seen == all_seen {
  1099                 if current_seen.is_full_range(revcount) {
  1101                     candidates.push(current_rev);
  1100                     candidates.push(current_rev);
  1102                     current_seen |= poison;
  1101                     seen[current_rev.0 as usize].poison();
       
  1102                     current_seen.poison(); // avoid recloning
  1103 
  1103 
  1104                     // Being a common ancestor, if `current_rev` is among
  1104                     // Being a common ancestor, if `current_rev` is among
  1105                     // the input revisions, it is *the* answer.
  1105                     // the input revisions, it is *the* answer.
  1106                     for rev in revs {
  1106                     for rev in revs {
  1107                         if *rev == current_rev {
  1107                         if *rev == current_rev {
  1112             }
  1112             }
  1113             for parent in self.parents(current_rev)? {
  1113             for parent in self.parents(current_rev)? {
  1114                 if parent == NULL_REVISION {
  1114                 if parent == NULL_REVISION {
  1115                     continue;
  1115                     continue;
  1116                 }
  1116                 }
  1117                 let parent_seen = seen[parent.0 as usize];
  1117                 let parent_seen = &seen[parent.0 as usize];
  1118                 if current_seen < poison {
  1118                 if !current_seen.is_poisoned() {
  1119                     // Without the `interesting` accounting, this block would
  1119                     // Without the `interesting` accounting, this block would
  1120                     // be logically equivalent to: parent_seen |= current_seen
  1120                     // be logically equivalent to: parent_seen |= current_seen
  1121                     // The parent counts as interesting if it was not already
  1121                     // The parent counts as interesting if it was not already
  1122                     // known to be an ancestor (would already have counted)
  1122                     // known to be an ancestor (would already have counted)
  1123                     if parent_seen == 0 {
  1123                     if parent_seen.is_empty() {
  1124                         seen[parent.0 as usize] = current_seen;
  1124                         seen[parent.0 as usize] = current_seen.clone();
  1125                         interesting += 1;
  1125                         interesting += 1;
  1126                     } else if parent_seen != current_seen {
  1126                     } else if *parent_seen != current_seen {
  1127                         seen[parent.0 as usize] |= current_seen;
  1127                         seen[parent.0 as usize].union(&current_seen);
  1128                     }
  1128                     }
  1129                 } else {
  1129                 } else {
  1130                     // this block is logically equivalent to poisoning parent
  1130                     // this block is logically equivalent to poisoning parent
  1131                     // and counting it as non interesting if it
  1131                     // and counting it as non interesting if it
  1132                     // has been seen before (hence counted then as interesting)
  1132                     // has been seen before (hence counted then as interesting)
  1133                     if parent_seen != 0 && parent_seen < poison {
  1133                     if !parent_seen.is_empty() && !parent_seen.is_poisoned() {
  1134                         interesting -= 1;
  1134                         interesting -= 1;
  1135                     }
  1135                     }
  1136                     seen[parent.0 as usize] = current_seen;
  1136                     // equivalent to poisoning parent, which we should do to
       
  1137                     // avoid the cloning.
       
  1138                     seen[parent.0 as usize] = current_seen.clone();
  1137                 }
  1139                 }
  1138             }
  1140             }
  1139 
  1141 
  1140             current_rev = Revision(current_rev.0 - 1);
  1142             current_rev = Revision(current_rev.0 - 1);
  1141         }
  1143         }
  1240             })
  1242             })
  1241             .collect())
  1243             .collect())
  1242     }
  1244     }
  1243 }
  1245 }
  1244 
  1246 
       
  1247 /// The kind of functionality needed by find_gca_candidates
       
  1248 ///
       
  1249 /// This is a bit mask which can be declared to be "poisoned", which callers
       
  1250 /// interpret to break out of some loops.
       
  1251 ///
       
  1252 /// The maximum capacity of the bit mask is up to the actual implementation
       
  1253 trait PoisonableBitSet: Sized + PartialEq {
       
  1254     /// Return a vector of exactly n elements, initialized to be empty.
       
  1255     ///
       
  1256     /// Optimization can vastly depend on implementation. Those being `Copy`
       
  1257     /// and having constant capacity typically can have a very simple
       
  1258     /// implementation.
       
  1259     fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>;
       
  1260 
       
  1261     /// The size of the bit mask in memory
       
  1262     fn size(&self) -> usize;
       
  1263 
       
  1264     /// The number of elements that can be represented in the set.
       
  1265     ///
       
  1266     /// Another way to put it is that it is the highest integer `C` such that
       
  1267     /// the set is guaranteed to always be a subset of the integer range
       
  1268     /// `[0, C)`
       
  1269     fn capacity(&self) -> usize;
       
  1270 
       
  1271     /// Declare `n` to belong to the set
       
  1272     fn add(&mut self, n: usize);
       
  1273 
       
  1274     /// Declare `n` not to belong to the set
       
  1275     fn discard(&mut self, n: usize);
       
  1276 
       
  1277     /// Replace this bit set by its union with other
       
  1278     fn union(&mut self, other: &Self);
       
  1279 
       
  1280     /// Poison the bit set
       
  1281     ///
       
  1282     /// Interpretation up to the caller
       
  1283     fn poison(&mut self);
       
  1284 
       
  1285     /// Is the bit set poisoned?
       
  1286     ///
       
  1287     /// Interpretation is up to the caller
       
  1288     fn is_poisoned(&self) -> bool;
       
  1289 
       
  1290     /// Is the bit set empty?
       
  1291     fn is_empty(&self) -> bool;
       
  1292 
       
  1293     /// return `true` if and only if the bit is the full range `[0, n)`
       
  1294     /// of integers
       
  1295     fn is_full_range(&self, n: usize) -> bool;
       
  1296 }
       
  1297 
       
  1298 const U64_POISON: u64 = 1 << 63;
       
  1299 
       
  1300 impl PoisonableBitSet for u64 {
       
  1301     fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
       
  1302         vec![0u64; vec_len]
       
  1303     }
       
  1304 
       
  1305     fn size(&self) -> usize {
       
  1306         8
       
  1307     }
       
  1308 
       
  1309     fn capacity(&self) -> usize {
       
  1310         63
       
  1311     }
       
  1312 
       
  1313     fn add(&mut self, n: usize) {
       
  1314         (*self) |= 1u64 << n;
       
  1315     }
       
  1316 
       
  1317     fn discard(&mut self, n: usize) {
       
  1318         (*self) &= u64::MAX - (1u64 << n);
       
  1319     }
       
  1320 
       
  1321     fn union(&mut self, other: &Self) {
       
  1322         (*self) |= *other;
       
  1323     }
       
  1324 
       
  1325     fn is_full_range(&self, n: usize) -> bool {
       
  1326         *self + 1 == (1u64 << n)
       
  1327     }
       
  1328 
       
  1329     fn is_empty(&self) -> bool {
       
  1330         *self == 0
       
  1331     }
       
  1332 
       
  1333     fn poison(&mut self) {
       
  1334         *self = U64_POISON;
       
  1335     }
       
  1336 
       
  1337     fn is_poisoned(&self) -> bool {
       
  1338         // equality comparison would be tempting but would not resist
       
  1339         // operations after poisoning (even if these should be bogus).
       
  1340         *self >= U64_POISON
       
  1341     }
       
  1342 }
       
  1343 
       
  1344 /// A poisonable bit set whose capacity is not known at compile time but
       
  1345 /// is constant after initial construction
       
  1346 ///
       
  1347 /// This can be way further optimized if performance assessments (speed
       
  1348 /// and/or RAM) require it.
       
  1349 /// As far as RAM is concerned, for large vectors of these, the main problem
       
  1350 /// would be the repetition of set_size in each item. We would need a trait
       
  1351 /// to abstract over the idea of a vector of such bit sets to do better.
       
  1352 #[derive(Clone, PartialEq)]
       
  1353 struct NonStaticPoisonableBitSet {
       
  1354     set_size: usize,
       
  1355     bit_set: Vec<u64>,
       
  1356 }
       
  1357 
       
  1358 /// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size
       
  1359 fn non_static_poisonable_inner_len(set_size: usize) -> usize {
       
  1360     1 + (set_size + 1) / 64
       
  1361 }
       
  1362 
       
  1363 impl NonStaticPoisonableBitSet {
       
  1364     /// The index of the sub-bit set for the given n, and the index inside
       
  1365     /// the latter
       
  1366     fn index(&self, n: usize) -> (usize, usize) {
       
  1367         (n / 64, n % 64)
       
  1368     }
       
  1369 }
       
  1370 
       
  1371 /// Mock implementation to ensure that the trait makes sense
       
  1372 impl PoisonableBitSet for NonStaticPoisonableBitSet {
       
  1373     fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> {
       
  1374         let tmpl = Self {
       
  1375             set_size,
       
  1376             bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)],
       
  1377         };
       
  1378         vec![tmpl; vec_len]
       
  1379     }
       
  1380 
       
  1381     fn size(&self) -> usize {
       
  1382         8 + self.bit_set.len() * 8
       
  1383     }
       
  1384 
       
  1385     fn capacity(&self) -> usize {
       
  1386         self.set_size
       
  1387     }
       
  1388 
       
  1389     fn add(&mut self, n: usize) {
       
  1390         let (sub_bs, bit_pos) = self.index(n);
       
  1391         self.bit_set[sub_bs] |= 1 << bit_pos
       
  1392     }
       
  1393 
       
  1394     fn discard(&mut self, n: usize) {
       
  1395         let (sub_bs, bit_pos) = self.index(n);
       
  1396         self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos)
       
  1397     }
       
  1398 
       
  1399     fn union(&mut self, other: &Self) {
       
  1400         assert!(
       
  1401             self.set_size == other.set_size,
       
  1402             "Binary operations on bit sets can only be done on same size"
       
  1403         );
       
  1404         for i in 0..self.bit_set.len() - 1 {
       
  1405             self.bit_set[i] |= other.bit_set[i]
       
  1406         }
       
  1407     }
       
  1408 
       
  1409     fn is_full_range(&self, n: usize) -> bool {
       
  1410         let (sub_bs, bit_pos) = self.index(n);
       
  1411         self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX)
       
  1412             && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1
       
  1413     }
       
  1414 
       
  1415     fn is_empty(&self) -> bool {
       
  1416         self.bit_set.iter().all(|bs| *bs == 0u64)
       
  1417     }
       
  1418 
       
  1419     fn poison(&mut self) {
       
  1420         let (sub_bs, bit_pos) = self.index(self.set_size);
       
  1421         self.bit_set[sub_bs] = 1 << bit_pos;
       
  1422     }
       
  1423 
       
  1424     fn is_poisoned(&self) -> bool {
       
  1425         let (sub_bs, bit_pos) = self.index(self.set_size);
       
  1426         self.bit_set[sub_bs] >= 1 << bit_pos
       
  1427     }
       
  1428 }
       
  1429 
  1245 /// Set of roots of all non-public phases
  1430 /// Set of roots of all non-public phases
  1246 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
  1431 pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()];
  1247 
  1432 
  1248 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
  1433 #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
  1249 pub enum Phase {
  1434 pub enum Phase {