fix(Import): case-fold media filenames when checking uniqueness (#4435)

* add wrapper struct with case-folding get impl

* use wrapper struct

* restrict case-folding to windows

* Revert "restrict case-folding to windows"

This reverts commit aad01d904f.

* case-fold filenames for newly added media

* add test

* fix incorrect comment
This commit is contained in:
llama 2025-11-20 23:43:14 +08:00 committed by GitHub
parent dda192f24c
commit 5614d20bed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 32 additions and 8 deletions

View file

@ -17,6 +17,7 @@ use crate::import_export::package::media::SafeMediaEntry;
use crate::import_export::ImportProgress; use crate::import_export::ImportProgress;
use crate::media::files::add_hash_suffix_to_file_stem; use crate::media::files::add_hash_suffix_to_file_stem;
use crate::media::files::sha1_of_reader; use crate::media::files::sha1_of_reader;
use crate::media::Checksums;
use crate::prelude::*; use crate::prelude::*;
use crate::progress::ThrottlingProgressHandler; use crate::progress::ThrottlingProgressHandler;
@ -75,7 +76,7 @@ impl Context<'_> {
fn prepare_media( fn prepare_media(
media_entries: Vec<SafeMediaEntry>, media_entries: Vec<SafeMediaEntry>,
archive: &mut ZipArchive<File>, archive: &mut ZipArchive<File>,
existing_sha1s: &HashMap<String, Sha1Hash>, existing_sha1s: &Checksums,
progress: &mut ThrottlingProgressHandler<ImportProgress>, progress: &mut ThrottlingProgressHandler<ImportProgress>,
) -> Result<MediaUseMap> { ) -> Result<MediaUseMap> {
let mut media_map = MediaUseMap::default(); let mut media_map = MediaUseMap::default();

View file

@ -173,7 +173,9 @@ pub fn add_data_to_folder_uniquely<'a, P>(
where where
P: AsRef<Path>, P: AsRef<Path>,
{ {
let normalized_name = normalize_filename(desired_name); // force lowercase to account for case-insensitive filesystems
// but not within normalize_filename, for existing media refs
let normalized_name: Cow<_> = normalize_filename(desired_name).to_lowercase().into();
let mut target_path = folder.as_ref().join(normalized_name.as_ref()); let mut target_path = folder.as_ref().join(normalized_name.as_ref());
@ -496,8 +498,14 @@ mod test {
"test.mp3" "test.mp3"
); );
// different contents // different contents, filenames differ only by case
let h2 = sha1_of_data(b"hello1"); let h2 = sha1_of_data(b"hello1");
assert_eq!(
add_data_to_folder_uniquely(dpath, "Test.mp3", b"hello1", h2).unwrap(),
"test-88fdd585121a4ccb3d1540527aee53a77c77abb8.mp3"
);
// same contents, filenames differ only by case
assert_eq!( assert_eq!(
add_data_to_folder_uniquely(dpath, "test.mp3", b"hello1", h2).unwrap(), add_data_to_folder_uniquely(dpath, "test.mp3", b"hello1", h2).unwrap(),
"test-88fdd585121a4ccb3d1540527aee53a77c77abb8.mp3" "test-88fdd585121a4ccb3d1540527aee53a77c77abb8.mp3"

View file

@ -6,7 +6,6 @@ pub mod files;
mod service; mod service;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap;
use std::path::Path; use std::path::Path;
use std::path::PathBuf; use std::path::PathBuf;
@ -22,6 +21,7 @@ use crate::progress::ThrottlingProgressHandler;
use crate::sync::http_client::HttpSyncClient; use crate::sync::http_client::HttpSyncClient;
use crate::sync::login::SyncAuth; use crate::sync::login::SyncAuth;
use crate::sync::media::database::client::changetracker::ChangeTracker; use crate::sync::media::database::client::changetracker::ChangeTracker;
pub use crate::sync::media::database::client::Checksums;
use crate::sync::media::database::client::MediaDatabase; use crate::sync::media::database::client::MediaDatabase;
use crate::sync::media::database::client::MediaEntry; use crate::sync::media::database::client::MediaEntry;
use crate::sync::media::progress::MediaSyncProgress; use crate::sync::media::progress::MediaSyncProgress;
@ -157,7 +157,7 @@ impl MediaManager {
pub fn all_checksums_after_checking( pub fn all_checksums_after_checking(
&self, &self,
progress: impl FnMut(usize) -> bool, progress: impl FnMut(usize) -> bool,
) -> Result<HashMap<String, Sha1Hash>> { ) -> Result<Checksums> {
ChangeTracker::new(&self.media_folder, progress).register_changes(&self.db)?; ChangeTracker::new(&self.media_folder, progress).register_changes(&self.db)?;
self.db.all_registered_checksums() self.db.all_registered_checksums()
} }
@ -176,7 +176,7 @@ impl MediaManager {
/// All checksums without registering changes first. /// All checksums without registering changes first.
#[cfg(test)] #[cfg(test)]
pub(crate) fn all_checksums_as_is(&self) -> HashMap<String, [u8; 20]> { pub(crate) fn all_checksums_as_is(&self) -> Checksums {
self.db.all_registered_checksums().unwrap() self.db.all_registered_checksums().unwrap()
} }
} }

View file

@ -18,6 +18,20 @@ use crate::prelude::*;
pub mod changetracker; pub mod changetracker;
pub struct Checksums(HashMap<String, Sha1Hash>);
impl Checksums {
// case-fold filenames when checking files to be imported
// to account for case-insensitive filesystems
pub fn get(&self, key: impl AsRef<str>) -> Option<&Sha1Hash> {
self.0.get(key.as_ref().to_lowercase().as_str())
}
pub fn contains_key(&self, key: impl AsRef<str>) -> bool {
self.get(key).is_some()
}
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub struct MediaEntry { pub struct MediaEntry {
pub fname: String, pub fname: String,
@ -175,11 +189,12 @@ delete from media where fname=?",
} }
/// Returns all filenames and checksums, where the checksum is not null. /// Returns all filenames and checksums, where the checksum is not null.
pub(crate) fn all_registered_checksums(&self) -> error::Result<HashMap<String, Sha1Hash>> { pub(crate) fn all_registered_checksums(&self) -> error::Result<Checksums> {
self.db self.db
.prepare("SELECT fname, csum FROM media WHERE csum IS NOT NULL")? .prepare("SELECT fname, csum FROM media WHERE csum IS NOT NULL")?
.query_and_then([], row_to_name_and_checksum)? .query_and_then([], row_to_name_and_checksum)?
.collect() .collect::<error::Result<_>>()
.map(Checksums)
} }
pub(crate) fn force_resync(&self) -> error::Result<()> { pub(crate) fn force_resync(&self) -> error::Result<()> {