fix(Import): case-fold media filenames when checking uniqueness (#4435)

* add wrapper struct with case-folding get impl

* use wrapper struct

* restrict case-folding to windows

* Revert "restrict case-folding to windows"

This reverts commit aad01d904f.

* case-fold filenames for newly added media

* add test

* fix incorrect comment
This commit is contained in:
llama 2025-11-20 23:43:14 +08:00 committed by GitHub
parent dda192f24c
commit 5614d20bed
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 32 additions and 8 deletions

View file

@ -17,6 +17,7 @@ use crate::import_export::package::media::SafeMediaEntry;
use crate::import_export::ImportProgress;
use crate::media::files::add_hash_suffix_to_file_stem;
use crate::media::files::sha1_of_reader;
use crate::media::Checksums;
use crate::prelude::*;
use crate::progress::ThrottlingProgressHandler;
@ -75,7 +76,7 @@ impl Context<'_> {
fn prepare_media(
media_entries: Vec<SafeMediaEntry>,
archive: &mut ZipArchive<File>,
existing_sha1s: &HashMap<String, Sha1Hash>,
existing_sha1s: &Checksums,
progress: &mut ThrottlingProgressHandler<ImportProgress>,
) -> Result<MediaUseMap> {
let mut media_map = MediaUseMap::default();

View file

@ -173,7 +173,9 @@ pub fn add_data_to_folder_uniquely<'a, P>(
where
P: AsRef<Path>,
{
let normalized_name = normalize_filename(desired_name);
// force lowercase to account for case-insensitive filesystems
// but not within normalize_filename, for existing media refs
let normalized_name: Cow<_> = normalize_filename(desired_name).to_lowercase().into();
let mut target_path = folder.as_ref().join(normalized_name.as_ref());
@ -496,8 +498,14 @@ mod test {
"test.mp3"
);
// different contents
// different contents, filenames differ only by case
let h2 = sha1_of_data(b"hello1");
assert_eq!(
add_data_to_folder_uniquely(dpath, "Test.mp3", b"hello1", h2).unwrap(),
"test-88fdd585121a4ccb3d1540527aee53a77c77abb8.mp3"
);
// same contents, filenames differ only by case
assert_eq!(
add_data_to_folder_uniquely(dpath, "test.mp3", b"hello1", h2).unwrap(),
"test-88fdd585121a4ccb3d1540527aee53a77c77abb8.mp3"

View file

@ -6,7 +6,6 @@ pub mod files;
mod service;
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
@ -22,6 +21,7 @@ use crate::progress::ThrottlingProgressHandler;
use crate::sync::http_client::HttpSyncClient;
use crate::sync::login::SyncAuth;
use crate::sync::media::database::client::changetracker::ChangeTracker;
pub use crate::sync::media::database::client::Checksums;
use crate::sync::media::database::client::MediaDatabase;
use crate::sync::media::database::client::MediaEntry;
use crate::sync::media::progress::MediaSyncProgress;
@ -157,7 +157,7 @@ impl MediaManager {
pub fn all_checksums_after_checking(
&self,
progress: impl FnMut(usize) -> bool,
) -> Result<HashMap<String, Sha1Hash>> {
) -> Result<Checksums> {
ChangeTracker::new(&self.media_folder, progress).register_changes(&self.db)?;
self.db.all_registered_checksums()
}
@ -176,7 +176,7 @@ impl MediaManager {
/// All checksums without registering changes first.
#[cfg(test)]
pub(crate) fn all_checksums_as_is(&self) -> HashMap<String, [u8; 20]> {
pub(crate) fn all_checksums_as_is(&self) -> Checksums {
self.db.all_registered_checksums().unwrap()
}
}

View file

@ -18,6 +18,20 @@ use crate::prelude::*;
pub mod changetracker;
pub struct Checksums(HashMap<String, Sha1Hash>);
impl Checksums {
// case-fold filenames when checking files to be imported
// to account for case-insensitive filesystems
pub fn get(&self, key: impl AsRef<str>) -> Option<&Sha1Hash> {
self.0.get(key.as_ref().to_lowercase().as_str())
}
pub fn contains_key(&self, key: impl AsRef<str>) -> bool {
self.get(key).is_some()
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct MediaEntry {
pub fname: String,
@ -175,11 +189,12 @@ delete from media where fname=?",
}
/// Returns all filenames and checksums, where the checksum is not null.
pub(crate) fn all_registered_checksums(&self) -> error::Result<HashMap<String, Sha1Hash>> {
pub(crate) fn all_registered_checksums(&self) -> error::Result<Checksums> {
self.db
.prepare("SELECT fname, csum FROM media WHERE csum IS NOT NULL")?
.query_and_then([], row_to_name_and_checksum)?
.collect()
.collect::<error::Result<_>>()
.map(Checksums)
}
pub(crate) fn force_resync(&self) -> error::Result<()> {