Accelerated get
that doesn't traverse sub-indices; roadmap update
This commit is contained in:
parent
f4403379bb
commit
0f605d1fe8
7 changed files with 202 additions and 91 deletions
|
@ -31,14 +31,16 @@
|
|||
-[x] `ls` to show Ghee's view of the world
|
||||
-[x] `init` to initialize a directory as a full-fledged Ghee table (with specified key names)
|
||||
-[x] Sensible default destination for `idx`
|
||||
-[ ] Make `get` stop listing nested indices
|
||||
-[ ] Make `get` accelerate traversal using the best available index (a la `del`)
|
||||
-[x] Make `get` stop listing nested indices
|
||||
-[x] Make `get` accelerate traversal using the best available index (a la `del`)
|
||||
-[ ] Make `get` return paths from original index rather than the one used to accelerate
|
||||
-[x] Fix output order, making commands reproducible
|
||||
|
||||
## 0.5
|
||||
-[ ] `create` to `init` and `ins` simultaneously
|
||||
-[ ] Integrate with Btrfs snapshots?
|
||||
-[ ] Integrate with Btrfs / Bcachefs / ZFS snapshots?
|
||||
-[ ] Unit test remaining commands
|
||||
-[ ] Existence predicates, e.g. `-w name` matches any record for which `name` is set
|
||||
|
||||
## Future
|
||||
|
||||
|
|
|
@ -22,9 +22,7 @@ Linked ./people/:state:id/NM/4 -> ./people/Lilly
|
|||
Initialized ./people/Darrel
|
||||
Linked ./people:id/5 -> ./people/Darrel
|
||||
Linked ./people/:state:id/MI/5 -> ./people/Darrel
|
||||
+ ghee get -w state=CA -f name ./people
|
||||
./people/Sandeep user.name Sandeep
|
||||
./people/Wulfrum user.name Wulfrum
|
||||
+ ghee get -a -w state=CA -f name ./people
|
||||
./people/:state:id/CA/0 user.name Wulfrum
|
||||
./people/:state:id/CA/2 user.name Sandeep
|
||||
+ ghee del -v ./people -w name=Sofia
|
||||
|
@ -37,6 +35,6 @@ Removed ./people/Janella
|
|||
+ ghee del -v ./people/:state:id CA 0
|
||||
Removed ./people/Wulfrum
|
||||
Removed ./people/:state:id/CA/0
|
||||
+ ghee get -w state=CA -f name ./people/:state:id
|
||||
+ ghee get -a -w state=CA -f name ./people/:state:id
|
||||
./people/:state:id/CA/2 user.name Sandeep
|
||||
+ cd ..
|
||||
|
|
|
@ -78,6 +78,8 @@ enum Commands {
|
|||
where_: Vec<Predicate>,
|
||||
#[arg(long, help = "Process paths nonrecursively; defaults to false")]
|
||||
flat: bool,
|
||||
#[arg(short, long, help = "Include user.ghee prefix in output")]
|
||||
all: bool,
|
||||
},
|
||||
|
||||
/// Set xattr values
|
||||
|
@ -244,7 +246,10 @@ fn run_command(cmd: &Commands) {
|
|||
json,
|
||||
where_,
|
||||
flat,
|
||||
} => get(paths, fields, *json, where_, !*flat),
|
||||
all,
|
||||
} => get(paths, fields, *json, where_, !*flat, *all).unwrap_or_else(|e| {
|
||||
panic!("Error getting record(s): {}", e);
|
||||
}),
|
||||
Commands::Set {
|
||||
paths,
|
||||
field_assignments,
|
||||
|
|
|
@ -81,7 +81,10 @@ pub fn del(
|
|||
debug_assert!(!where_.is_empty());
|
||||
|
||||
walk(
|
||||
PathOrIndices::Indices(&all_indices),
|
||||
PathOrIndices::PathAndIndices {
|
||||
path: table_path,
|
||||
indices: &all_indices,
|
||||
},
|
||||
where_,
|
||||
true,
|
||||
false,
|
||||
|
|
119
src/cmd/get.rs
119
src/cmd/get.rs
|
@ -1,9 +1,9 @@
|
|||
use anyhow::Result;
|
||||
use serde::Serialize;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::{
|
||||
parser::{predicate::Predicate, value::Value, xattr::Xattr},
|
||||
xattr_values,
|
||||
walk, PathOrIndices,
|
||||
};
|
||||
|
||||
use std::{collections::BTreeMap, io::Write, path::PathBuf};
|
||||
|
@ -20,69 +20,66 @@ pub fn get(
|
|||
json: bool,
|
||||
where_: &Vec<Predicate>,
|
||||
recursive: bool,
|
||||
) {
|
||||
let max_depth = if recursive { usize::MAX } else { 0 };
|
||||
|
||||
let paths = paths
|
||||
.iter()
|
||||
.flat_map(|p| WalkDir::new(p).max_depth(max_depth))
|
||||
.map(|e| e.unwrap().into_path());
|
||||
|
||||
'outer: for path in paths {
|
||||
let all_field_values = xattr_values(&path).unwrap();
|
||||
|
||||
for where_clause in where_ {
|
||||
if !where_clause.satisfied(&all_field_values) {
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
|
||||
// Fields that will be output
|
||||
let projected_fields: Vec<Xattr> = if fields.is_empty() {
|
||||
all_field_values.keys().cloned().collect()
|
||||
} else {
|
||||
fields.clone()
|
||||
};
|
||||
if json {
|
||||
let mut xattrs: BTreeMap<&Xattr, &Value> = BTreeMap::new();
|
||||
|
||||
for field in projected_fields.iter() {
|
||||
let value = &all_field_values[field];
|
||||
|
||||
xattrs.insert(field, value);
|
||||
}
|
||||
|
||||
if !xattrs.is_empty() {
|
||||
let file_xattrs = FileXattrs {
|
||||
path: path.to_string_lossy().to_string(),
|
||||
xattrs,
|
||||
all: bool,
|
||||
) -> Result<()> {
|
||||
for path in paths {
|
||||
walk(
|
||||
PathOrIndices::Path(path),
|
||||
where_,
|
||||
recursive,
|
||||
all,
|
||||
&|record| {
|
||||
// Fields that will be output
|
||||
let projected_fields: Vec<Xattr> = if fields.is_empty() {
|
||||
record.xattr_values.keys().cloned().collect()
|
||||
} else {
|
||||
fields.clone()
|
||||
};
|
||||
if json {
|
||||
let mut xattrs: BTreeMap<&Xattr, &Value> = BTreeMap::new();
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
serde_json::to_string(&file_xattrs)
|
||||
.unwrap_or_else(|e| panic!("Could not serialize as JSON: {}", e))
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for field in projected_fields.iter() {
|
||||
if let Some(value) = all_field_values.get(field) {
|
||||
print!("{}\t{}\t", path.display(), field);
|
||||
for field in projected_fields.iter() {
|
||||
let value = &record.xattr_values[field];
|
||||
|
||||
{
|
||||
let mut stdout = std::io::stdout();
|
||||
stdout
|
||||
.write(value.as_bytes().as_slice())
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Could not write xattr {} value {:?} to stdout: {}",
|
||||
field, value, e
|
||||
)
|
||||
});
|
||||
xattrs.insert(field, value);
|
||||
}
|
||||
|
||||
if !xattrs.is_empty() {
|
||||
let file_xattrs = FileXattrs {
|
||||
path: record.path.to_string_lossy().to_string(),
|
||||
xattrs,
|
||||
};
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
serde_json::to_string(&file_xattrs)
|
||||
.unwrap_or_else(|e| panic!("Could not serialize as JSON: {}", e))
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for field in projected_fields.iter() {
|
||||
if let Some(value) = record.xattr_values.get(field) {
|
||||
print!("{}\t{}\t", record.path.display(), field);
|
||||
|
||||
{
|
||||
let mut stdout = std::io::stdout();
|
||||
stdout
|
||||
.write(value.as_bytes().as_slice())
|
||||
.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Could not write xattr {} value {:?} to stdout: {}",
|
||||
field, value, e
|
||||
)
|
||||
});
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -116,7 +116,7 @@ mod test {
|
|||
|
||||
use crate::{
|
||||
cmd::init,
|
||||
get_index_info, get_key,
|
||||
get_index_info, get_key, indices,
|
||||
parser::{index::IndexInfo, key::Key},
|
||||
};
|
||||
|
||||
|
@ -128,10 +128,14 @@ mod test {
|
|||
|
||||
let dir2 = TempDir::new("ghee-test-idx:2").unwrap().into_path();
|
||||
|
||||
let dir3 = TempDir::new("ghee-test-idx:3").unwrap().into_path();
|
||||
|
||||
let key1 = Key::from(vec!["test1"]);
|
||||
|
||||
let key2 = Key::from(vec!["test2"]);
|
||||
|
||||
let key3 = Key::from_string("test3");
|
||||
|
||||
init(&dir1, &key1, false).unwrap();
|
||||
|
||||
idx(&dir1, Some(&dir2), &key2, false);
|
||||
|
@ -157,6 +161,17 @@ mod test {
|
|||
);
|
||||
|
||||
let idx_key = get_key(&dir2).unwrap();
|
||||
assert_eq!(idx_key, Some(key2));
|
||||
assert_eq!(idx_key.as_ref(), Some(&key2));
|
||||
|
||||
idx(&dir1, Some(&dir3), &key3, false);
|
||||
|
||||
// Make sure the indices are updated properly after a second index
|
||||
// (no overwriting of the previous)
|
||||
let indices = indices(&dir1).unwrap();
|
||||
|
||||
assert_eq!(indices.len(), 3);
|
||||
assert!(indices.contains_key(&key1));
|
||||
assert!(indices.contains_key(&key2));
|
||||
assert!(indices.contains_key(&key3));
|
||||
}
|
||||
}
|
||||
|
|
129
src/lib.rs
129
src/lib.rs
|
@ -195,31 +195,60 @@ pub fn indices(path: &PathBuf) -> Result<BTreeMap<Key, PathBuf>> {
|
|||
/// Get the index which places the predicate xattrs earliest in its primary key order
|
||||
/// The idea is that this will maximally speed up traversal of records, but this may
|
||||
/// depend on the cardinality / distribution of the subkey values
|
||||
pub fn best_index<'a>(
|
||||
pub fn best_index<'a, 'b>(
|
||||
indices: &'a BTreeMap<Key, PathBuf>,
|
||||
where_: &Vec<Predicate>,
|
||||
tie_breaker_key: &'a Key,
|
||||
) -> (&'a Key, &'a PathBuf) {
|
||||
let predicate_xattrs: Vec<Xattr> = where_.iter().map(|pred| pred.xattr.clone()).collect();
|
||||
|
||||
indices
|
||||
.iter()
|
||||
.min_by_key(|(key, _path)| {
|
||||
predicate_xattrs
|
||||
let earliest_subkey_indices: Vec<Option<usize>> = indices
|
||||
.keys()
|
||||
.map(|key| {
|
||||
let x: Option<usize> = predicate_xattrs
|
||||
.iter()
|
||||
.map(|xattr| {
|
||||
key.subkeys
|
||||
.iter()
|
||||
.position(|subkey| *subkey == *xattr)
|
||||
.unwrap_or(UNINDEXED_PREDICATE_PENALTY)
|
||||
.map(|xattr| key.subkeys.iter().position(|subkey| *subkey == *xattr))
|
||||
.reduce(|a, b| {
|
||||
if let Some(a) = a {
|
||||
if let Some(b) = b {
|
||||
Some(a + b)
|
||||
} else {
|
||||
Some(a)
|
||||
}
|
||||
} else {
|
||||
if let Some(b) = b {
|
||||
Some(b)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.reduce(|a, b| a + b)
|
||||
.unwrap_or(None);
|
||||
x
|
||||
})
|
||||
.unwrap()
|
||||
.collect();
|
||||
|
||||
if earliest_subkey_indices.iter().all(|idx| idx.is_none()) {
|
||||
let path = &indices[tie_breaker_key];
|
||||
(tie_breaker_key, path)
|
||||
} else {
|
||||
indices
|
||||
.iter()
|
||||
.enumerate()
|
||||
.min_by_key(|(idx, (_key, _path))| {
|
||||
earliest_subkey_indices[*idx].unwrap_or(UNINDEXED_PREDICATE_PENALTY)
|
||||
})
|
||||
.unwrap()
|
||||
.1
|
||||
}
|
||||
}
|
||||
|
||||
pub enum PathOrIndices<'a, 'b> {
|
||||
Path(&'a PathBuf),
|
||||
Indices(&'b BTreeMap<Key, PathBuf>),
|
||||
PathAndIndices {
|
||||
path: &'a PathBuf,
|
||||
indices: &'b BTreeMap<Key, PathBuf>,
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -252,12 +281,14 @@ pub fn walk<F: Fn(PathVisit) -> Result<()>>(
|
|||
None
|
||||
};
|
||||
|
||||
let indices = match path_or_table_indices {
|
||||
PathOrIndices::Path(_path) => loaded_indices.as_ref().unwrap(),
|
||||
PathOrIndices::Indices(indices) => indices,
|
||||
let (path, indices) = match path_or_table_indices {
|
||||
PathOrIndices::Path(path) => (path, loaded_indices.as_ref().unwrap()),
|
||||
PathOrIndices::PathAndIndices { path, indices } => (path, indices),
|
||||
};
|
||||
|
||||
let (key, path) = best_index(indices, where_);
|
||||
let key = indices.iter().find(|(_key, p)| *p == path).unwrap().0;
|
||||
|
||||
let (key, path) = best_index(indices, where_, key);
|
||||
|
||||
let path_len = path.components().count();
|
||||
|
||||
|
@ -272,6 +303,11 @@ pub fn walk<F: Fn(PathVisit) -> Result<()>>(
|
|||
return true;
|
||||
}
|
||||
|
||||
// Always ignore nested indices
|
||||
if is_hidden(e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let values = xattr_values_from_path(&key, &path, &path).unwrap();
|
||||
|
||||
// Of all xattr values set on this path, if a relevant predicate is contradicted, proceed no further
|
||||
|
@ -485,11 +521,13 @@ mod test {
|
|||
use tempdir::TempDir;
|
||||
|
||||
use crate::{
|
||||
cmd::{init, set},
|
||||
get_index_info, get_key, index_list_push,
|
||||
best_index,
|
||||
cmd::{idx, init, set},
|
||||
get_index_info, get_key, index_list_push, indices,
|
||||
parser::{
|
||||
assignment::parse_assignment,
|
||||
key::Key,
|
||||
predicate::parse_predicate,
|
||||
xattr::{parse_xattr, Namespace, Xattr},
|
||||
},
|
||||
set_key, walk, PathOrIndices,
|
||||
|
@ -623,4 +661,57 @@ mod test {
|
|||
assert!(visited_recursive.contains(&dir1));
|
||||
assert!(visited_recursive.contains(&dir2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_best_index() {
|
||||
let dir1 = TempDir::new("ghee-test-best-index-dir1")
|
||||
.unwrap()
|
||||
.into_path();
|
||||
|
||||
let key1 = Key::from_string("test");
|
||||
|
||||
init(&dir1, &key1, false).unwrap();
|
||||
|
||||
let dir2 = TempDir::new("ghee-test-best-index-dir2")
|
||||
.unwrap()
|
||||
.into_path();
|
||||
|
||||
let key2 = Key::from_string("blah,test");
|
||||
|
||||
idx(&dir1, Some(&dir2), &key2, false);
|
||||
|
||||
let indices = indices(&dir1).unwrap();
|
||||
|
||||
{
|
||||
// No predicate; should fall back to tie breaker
|
||||
let (best_key, best_path) = best_index(&indices, &vec![], &key1);
|
||||
|
||||
assert_eq!(best_key, &key1);
|
||||
assert_eq!(best_path, &dir1);
|
||||
}
|
||||
|
||||
{
|
||||
// Predicate "test" makes dir1 best
|
||||
let (best_key, best_path) = best_index(
|
||||
&indices,
|
||||
&vec![parse_predicate(b"test=5").unwrap().1],
|
||||
&key1,
|
||||
);
|
||||
|
||||
assert_eq!(best_key, &key1);
|
||||
assert_eq!(best_path, &dir1);
|
||||
}
|
||||
|
||||
{
|
||||
// Predicate "blah" makes dir2 best
|
||||
let (best_key, best_path) = best_index(
|
||||
&indices,
|
||||
&vec![parse_predicate(b"blah=6").unwrap().1],
|
||||
&key1,
|
||||
);
|
||||
|
||||
assert_eq!(best_key, &key2);
|
||||
assert_eq!(best_path, &dir2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue