handle missing parent names with varying case

Also convert to \x1f before sorting, so that numbers (with have a lower
ascii order than '::') don't mess up the sort.
This commit is contained in:
Damien Elmes 2021-01-16 19:28:06 +10:00
parent 6f7c68b661
commit a390a77815

View file

@ -12,11 +12,7 @@ use crate::{
use regex::{NoExpand, Regex, Replacer}; use regex::{NoExpand, Regex, Replacer};
use std::cmp::Ordering; use std::cmp::Ordering;
use std::{ use std::{borrow::Cow, collections::HashSet, iter::Peekable};
borrow::Cow,
collections::{HashMap, HashSet},
iter::Peekable,
};
use unicase::UniCase; use unicase::UniCase;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -120,30 +116,50 @@ fn normalize_tag_name(name: &str) -> String {
out.trim_end_matches("::").into() out.trim_end_matches("::").into()
} }
fn fill_missing_tags(tags: Vec<Tag>) -> Vec<Tag> { fn immediate_parent_name(tag_name: UniCase<&str>) -> Option<UniCase<&str>> {
let mut filled_tags: HashMap<String, Tag> = HashMap::with_capacity(tags.len()); tag_name.rsplitn(2, '\x1f').nth(1).map(UniCase::new)
for tag in tags.into_iter() {
let name = tag.name.to_owned();
let split: Vec<&str> = (&tag.name).split("::").collect();
for i in 0..split.len() - 1 {
let comp = split[0..i + 1].join("::");
let t = Tag::new(comp.to_owned(), Usn(-1));
if filled_tags.get(&comp).is_none() {
filled_tags.insert(comp, t);
}
}
if filled_tags.get(&name).is_none() {
filled_tags.insert(name, tag);
}
}
let mut tags: Vec<Tag> = filled_tags.values().map(|t| (*t).clone()).collect();
tags.sort_unstable();
tags
} }
fn tags_to_tree(tags: Vec<Tag>) -> TagTreeNode { /// For the given tag, check if immediate parent exists. If so, add
let tags = fill_missing_tags(tags); /// tag and return.
/// If the immediate parent is missing, check and add any missing parents.
/// This should ensure that if an immediate parent is found, all ancestors
/// are guaranteed to already exist.
fn add_self_and_missing_parents<'a, 'b>(
all: &'a mut HashSet<UniCase<&'b str>>,
missing: &'a mut Vec<UniCase<&'b str>>,
tag_name: UniCase<&'b str>,
) {
if let Some(parent) = immediate_parent_name(tag_name) {
if !all.contains(&parent) {
missing.push(parent.clone());
add_self_and_missing_parents(all, missing, parent);
}
}
// finally, add self
all.insert(tag_name);
}
/// Append any missing parents. Caller must sort afterwards.
fn add_missing_parents(tags: &mut Vec<Tag>) {
let mut all_names: HashSet<UniCase<&str>> = HashSet::new();
let mut missing = vec![];
for tag in &*tags {
add_self_and_missing_parents(&mut all_names, &mut missing, UniCase::new(&tag.name))
}
let mut missing: Vec<_> = missing
.into_iter()
.map(|n| Tag::new(n.to_string(), Usn(0)))
.collect();
tags.append(&mut missing);
}
fn tags_to_tree(mut tags: Vec<Tag>) -> TagTreeNode {
for tag in &mut tags {
tag.name = tag.name.replace("::", "\x1f");
}
add_missing_parents(&mut tags);
tags.sort_unstable_by(|a, b| UniCase::new(&a.name).cmp(&UniCase::new(&b.name)));
let mut top = TagTreeNode::default(); let mut top = TagTreeNode::default();
let mut it = tags.into_iter().peekable(); let mut it = tags.into_iter().peekable();
add_child_nodes(&mut it, &mut top); add_child_nodes(&mut it, &mut top);
@ -153,7 +169,7 @@ fn tags_to_tree(tags: Vec<Tag>) -> TagTreeNode {
fn add_child_nodes(tags: &mut Peekable<impl Iterator<Item = Tag>>, parent: &mut TagTreeNode) { fn add_child_nodes(tags: &mut Peekable<impl Iterator<Item = Tag>>, parent: &mut TagTreeNode) {
while let Some(tag) = tags.peek() { while let Some(tag) = tags.peek() {
let split_name: Vec<_> = tag.name.split("::").collect(); let split_name: Vec<_> = tag.name.split('\x1f').collect();
match split_name.len() as u32 { match split_name.len() as u32 {
l if l <= parent.level => { l if l <= parent.level => {
// next item is at a higher level // next item is at a higher level
@ -477,8 +493,8 @@ mod test {
let mut col = open_test_collection(); let mut col = open_test_collection();
let nt = col.get_notetype_by_name("Basic")?.unwrap(); let nt = col.get_notetype_by_name("Basic")?.unwrap();
let mut note = nt.new_note(); let mut note = nt.new_note();
note.tags.push("foo::a".into()); note.tags.push("foo::bar::a".into());
note.tags.push("foo::b".into()); note.tags.push("foo::bar::b".into());
col.add_note(&mut note, DeckID(1))?; col.add_note(&mut note, DeckID(1))?;
// missing parents are added // missing parents are added
@ -487,21 +503,64 @@ mod test {
node( node(
"", "",
0, 0,
vec![node("foo", 1, vec![leaf("a", 2), leaf("b", 2)])] vec![node(
"foo",
1,
vec![node("bar", 2, vec![leaf("a", 3), leaf("b", 3)])]
)]
) )
); );
// differing case should result in only one parent case being added - // differing case should result in only one parent case being added -
// the first one // the first one
*(&mut note.tags[1]) = "FOO::b".into();
col.storage.clear_tags()?; col.storage.clear_tags()?;
*(&mut note.tags[0]) = "foo::BAR::a".into();
*(&mut note.tags[1]) = "FOO::bar::b".into();
col.update_note(&mut note)?; col.update_note(&mut note)?;
assert_eq!( assert_eq!(
col.tag_tree()?, col.tag_tree()?,
node( node(
"", "",
0, 0,
vec![node("foo", 1, vec![leaf("a", 2), leaf("b", 2)])] vec![node(
"foo",
1,
vec![node("BAR", 2, vec![leaf("a", 3), leaf("b", 3)])]
)]
)
);
// things should work even if the immediate parent is not missing
col.storage.clear_tags()?;
*(&mut note.tags[0]) = "foo::bar::baz".into();
*(&mut note.tags[1]) = "foo::bar::baz::quux".into();
col.update_note(&mut note)?;
assert_eq!(
col.tag_tree()?,
node(
"",
0,
vec![node(
"foo",
1,
vec![node("bar", 2, vec![node("baz", 3, vec![leaf("quux", 4)])])]
)]
)
);
// numbers have a smaller ascii number than ':', so a naive sort on
// '::' would result in one::two being nested under one1.
col.storage.clear_tags()?;
*(&mut note.tags[0]) = "one".into();
*(&mut note.tags[1]) = "one1".into();
note.tags.push("one::two".into());
col.update_note(&mut note)?;
assert_eq!(
col.tag_tree()?,
node(
"",
0,
vec![node("one", 1, vec![leaf("two", 2)]), leaf("one1", 1)]
) )
); );