Patchwork D7797: rust-nodemap: pure Rust example

login
register
mail settings
Submitter phabricator
Date Jan. 27, 2020, 3:50 p.m.
Message ID <6473dcf32da9e6ea697868648ba7210d@localhost.localdomain>
Download mbox | patch
Permalink /patch/44691/
State Not Applicable
Headers show

Comments

phabricator - Jan. 27, 2020, 3:50 p.m.
gracinet edited the summary of this revision.
gracinet updated this revision to Diff 19637.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7797?vs=19046&id=19637

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7797/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7797

AFFECTED FILES
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/examples/nodemap/index.rs
  rust/hg-core/examples/nodemap/main.rs

CHANGE DETAILS




To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-core/examples/nodemap/main.rs b/rust/hg-core/examples/nodemap/main.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/examples/nodemap/main.rs
@@ -0,0 +1,150 @@ 
+// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+extern crate clap;
+extern crate hg;
+extern crate memmap;
+
+use clap::*;
+use hg::revlog::node::*;
+use hg::revlog::nodemap::*;
+use hg::revlog::*;
+use memmap::MmapOptions;
+use rand::Rng;
+use std::fs::File;
+use std::io;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+use std::time::Instant;
+
+mod index;
+use index::Index;
+
+fn mmap_index(repo_path: &Path) -> Index {
+    let mut path = PathBuf::from(repo_path);
+    path.extend([".hg", "store", "00changelog.i"].iter());
+    Index::load_mmap(path)
+}
+
+fn mmap_nodemap(path: &Path) -> NodeTree {
+    let file = File::open(path).unwrap();
+    let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
+    let len = mmap.len();
+    NodeTree::load_bytes(Box::new(mmap), len)
+}
+
+/// Scan the whole index and create the corresponding nodemap file at `path`
+fn create(index: &Index, path: &Path) -> io::Result<()> {
+    let mut file = File::create(path)?;
+    let start = Instant::now();
+    let mut nm = NodeTree::default();
+    for rev in 0..index.len() {
+        let rev = rev as Revision;
+        nm.insert(index, index.node(rev).unwrap(), rev).unwrap();
+    }
+    eprintln!("Nodemap constructed in RAM in {:?}", start.elapsed());
+    file.write(&nm.into_readonly_and_added_bytes().1)?;
+    eprintln!("Nodemap written to disk");
+    Ok(())
+}
+
+fn query(index: &Index, nm: &NodeTree, prefix: &str) {
+    let start = Instant::now();
+    let res = nm.find_hex(index, prefix);
+    println!("Result found in {:?}: {:?}", start.elapsed(), res);
+}
+
+fn bench(index: &Index, nm: &NodeTree, queries: usize) {
+    let len = index.len() as u32;
+    let mut rng = rand::thread_rng();
+    let nodes: Vec<Node> = (0..queries)
+        .map(|_| {
+            index
+                .node((rng.gen::<u32>() % len) as Revision)
+                .unwrap()
+                .clone()
+        })
+        .collect();
+    if queries < 10 {
+        let nodes_hex: Vec<String> =
+            nodes.iter().map(|n| n.encode_hex()).collect();
+        println!("Nodes: {:?}", nodes_hex);
+    }
+    let mut last: Option<Revision> = None;
+    let start = Instant::now();
+    for node in nodes.iter() {
+        last = nm.find_bin(index, node.into()).unwrap();
+    }
+    let elapsed = start.elapsed();
+    println!(
+        "Did {} queries in {:?} (mean {:?}), last was {:?} with result {:?}",
+        queries,
+        elapsed,
+        elapsed / (queries as u32),
+        nodes.last().unwrap().encode_hex(),
+        last
+    );
+}
+
+fn main() {
+    let matches = App::new("Nodemap pure Rust example")
+        .arg(
+            Arg::with_name("REPOSITORY")
+                .help("Path to the repository, always necessary for its index")
+                .required(true),
+        )
+        .arg(
+            Arg::with_name("NODEMAP_FILE")
+                .help("Path to the nodemap file, independent of REPOSITORY")
+                .required(true),
+        )
+        .subcommand(
+            SubCommand::with_name("create")
+                .about("Create NODEMAP_FILE by scanning repository index"),
+        )
+        .subcommand(
+            SubCommand::with_name("query")
+                .about("Query NODEMAP_FILE for PREFIX")
+                .arg(Arg::with_name("PREFIX").required(true)),
+        )
+        .subcommand(
+            SubCommand::with_name("bench")
+                .about(
+                    "Perform #QUERIES random successful queries on NODEMAP_FILE")
+                .arg(Arg::with_name("QUERIES").required(true)),
+        )
+        .get_matches();
+
+    let repo = matches.value_of("REPOSITORY").unwrap();
+    let nm_path = matches.value_of("NODEMAP_FILE").unwrap();
+
+    let index = mmap_index(&Path::new(repo));
+
+    if let Some(_) = matches.subcommand_matches("create") {
+        println!("Creating nodemap file {} for repository {}", nm_path, repo);
+        create(&index, &Path::new(nm_path)).unwrap();
+        return;
+    }
+
+    let nm = mmap_nodemap(&Path::new(nm_path));
+    if let Some(matches) = matches.subcommand_matches("query") {
+        let prefix = matches.value_of("PREFIX").unwrap();
+        println!(
+            "Querying {} in nodemap file {} of repository {}",
+            prefix, nm_path, repo
+        );
+        query(&index, &nm, prefix);
+    }
+    if let Some(matches) = matches.subcommand_matches("bench") {
+        let queries =
+            usize::from_str(matches.value_of("QUERIES").unwrap()).unwrap();
+        println!(
+            "Doing {} random queries in nodemap file {} of repository {}",
+            queries, nm_path, repo
+        );
+        bench(&index, &nm, queries);
+    }
+}
diff --git a/rust/hg-core/examples/nodemap/index.rs b/rust/hg-core/examples/nodemap/index.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/examples/nodemap/index.rs
@@ -0,0 +1,89 @@ 
+// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+/// Minimal `RevlogIndex`, readable from standard Mercurial file format
+use hg::*;
+use memmap::*;
+use std::fs::File;
+use std::ops::Deref;
+use std::path::Path;
+use std::slice;
+
+pub struct Index {
+    data: Box<dyn Deref<Target = [IndexEntry]> + Send>,
+}
+
+/// A fixed sized index entry. All numbers are big endian
+#[repr(C)]
+pub struct IndexEntry {
+    not_used_yet: [u8; 24],
+    p1: Revision,
+    p2: Revision,
+    node: Node,
+    unused_node: [u8; 12],
+}
+
+pub const INDEX_ENTRY_SIZE: usize = 64;
+
+impl IndexEntry {
+    fn parents(&self) -> [Revision; 2] {
+        [Revision::from_be(self.p1), Revision::from_be(self.p1)]
+    }
+}
+
+impl RevlogIndex for Index {
+    fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    fn node<'a>(&'a self, rev: Revision) -> Option<&'a Node> {
+        if rev == NULL_REVISION {
+            return None;
+        }
+        let i = rev as usize;
+        if i >= self.len() {
+            None
+        } else {
+            Some(&self.data[i].node)
+        }
+    }
+}
+
+impl Graph for &Index {
+    fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
+        let [p1, p2] = (*self).data[rev as usize].parents();
+        let len = (*self).len();
+        if p1 < NULL_REVISION
+            || p2 < NULL_REVISION
+            || p1 as usize >= len
+            || p2 as usize >= len
+        {
+            return Err(GraphError::ParentOutOfRange(rev));
+        }
+        Ok([p1, p2])
+    }
+}
+
+struct IndexMmap(Mmap);
+
+impl Deref for IndexMmap {
+    type Target = [IndexEntry];
+
+    fn deref(&self) -> &[IndexEntry] {
+        let ptr = self.0.as_ptr() as *const IndexEntry;
+        // Any misaligned data will be ignored.
+        unsafe { slice::from_raw_parts(ptr, self.0.len() / INDEX_ENTRY_SIZE) }
+    }
+}
+
+impl Index {
+    pub fn load_mmap(path: impl AsRef<Path>) -> Self {
+        let file = File::open(path).unwrap();
+        let mmap = unsafe { MmapOptions::new().map(&file).unwrap() };
+        Self {
+            data: Box::new(IndexMmap(mmap)),
+        }
+    }
+}
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -18,3 +18,7 @@ 
 rayon = "1.2.0"
 regex = "1.1.0"
 twox-hash = "1.5.0"
+
+[dev-dependencies]
+clap = "*"
+memmap = "0.7.0"
\ No newline at end of file
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -9,6 +9,14 @@ 
 ]
 
 [[package]]
+name = "ansi_term"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "arrayvec"
 version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -17,6 +25,15 @@ 
 ]
 
 [[package]]
+name = "atty"
+version = "0.2.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "autocfg"
 version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -46,6 +63,20 @@ 
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
+name = "clap"
+version = "2.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "cloudabi"
 version = "0.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -133,9 +164,11 @@ 
 version = "0.1.0"
 dependencies = [
  "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -176,6 +209,15 @@ 
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
+name = "memmap"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "memoffset"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -433,6 +475,19 @@ 
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "thread_local"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -449,6 +504,16 @@ 
 ]
 
 [[package]]
+name = "unicode-width"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "vec_map"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "wasi"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -474,12 +539,15 @@ 
 
 [metadata]
 "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
+"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 "checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
+"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
 "checksum autocfg 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "b671c8fb71b457dd4ae18c4ba1e59aa81793daacc361d82fcd410cef0d491875"
 "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
 "checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101"
 "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
+"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
 "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
 "checksum cpython 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "85532c648315aeb0829ad216a6a29aa3212cf9319bc7f6daf1404aa0bdd1485f"
 "checksum crossbeam-deque 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b18cd2e169ad86297e6bc0ad9aa679aee9daa4f19e8163860faf7c164e4f5a71"
@@ -493,6 +561,7 @@ 
 "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 "checksum libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)" = "74dfca3d9957906e8d1e6a0b641dc9a59848e793f1da2165889fd4f62d10d79c"
 "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
+"checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
 "checksum memoffset 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ce6075db033bbbb7ee5a0bbd3a3186bbae616f57fb001c485c7ff77955f8177f"
 "checksum nodrop 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 "checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
@@ -523,8 +592,12 @@ 
 "checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d"
 "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
 "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
+"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
 "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
 "checksum twox-hash 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3bfd5b7557925ce778ff9b9ef90e3ade34c524b5ff10e239c69a42d546d2af56"
+"checksum unicode-width 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
+"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
 "checksum wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d"
 "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
 "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"