Anki/rslib/src/media/mod.rs
Damien Elmes 3707e54ffa Rework syncing code, and replace local sync server (#2329)
This PR replaces the existing Python-driven sync server with a new one in Rust.
The new server supports both collection and media syncing, and is compatible
with both the new protocol mentioned below, and older clients. A setting has
been added to the preferences screen to point Anki to a local server, and a
similar setting is likely to come to AnkiMobile soon.

Documentation is available here: <https://docs.ankiweb.net/sync-server.html>

In addition to the new server and refactoring, this PR also makes changes to the
sync protocol. The existing sync protocol places payloads and metadata inside a
multipart POST body, which causes a few headaches:

- Legacy clients build the request in a non-deterministic order, meaning the
entire request needs to be scanned to extract the metadata.
- Reqwest's multipart API directly writes the multipart body, without exposing
the resulting stream to us, making it harder to track the progress of the
transfer. We've been relying on a patched version of reqwest for timeouts,
which is a pain to keep up to date.

To address these issues, the metadata is now sent in a HTTP header, with the
data payload sent directly in the body. Instead of the slower gzip, we now
use zstd. The old timeout handling code has been replaced with a new implementation
that wraps the request and response body streams to track progress, allowing us
to drop the git dependencies for reqwest, hyper-timeout and tokio-io-timeout.

The main other change to the protocol is that one-way syncs no longer need to
downgrade the collection to schema 11 prior to sending.
2023-01-18 12:43:46 +10:00

174 lines
5.4 KiB
Rust

// Copyright: Ankitects Pty Ltd and contributors
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
pub mod check;
pub mod files;
use std::{
borrow::Cow,
collections::HashMap,
path::{Path, PathBuf},
};
use crate::{
io::create_dir_all,
media::files::{add_data_to_folder_uniquely, mtime_as_i64, remove_files, sha1_of_data},
prelude::*,
sync::{
http_client::HttpSyncClient,
login::SyncAuth,
media::{
database::client::{changetracker::ChangeTracker, MediaDatabase, MediaEntry},
progress::MediaSyncProgress,
syncer::MediaSyncer,
},
},
};
pub type Sha1Hash = [u8; 20];
impl Collection {
pub fn media(&self) -> Result<MediaManager> {
MediaManager::new(&self.media_folder, &self.media_db)
}
}
pub struct MediaManager {
pub(crate) db: MediaDatabase,
pub(crate) media_folder: PathBuf,
}
impl MediaManager {
pub fn new<P, P2>(media_folder: P, media_db: P2) -> Result<Self>
where
P: Into<PathBuf>,
P2: AsRef<Path>,
{
let media_folder = media_folder.into();
create_dir_all(&media_folder)?;
Ok(MediaManager {
db: MediaDatabase::new(media_db.as_ref())?,
media_folder,
})
}
/// Add a file to the media folder.
///
/// If a file with differing contents already exists, a hash will be
/// appended to the name.
///
/// Also notes the file in the media database.
pub fn add_file<'a>(&self, desired_name: &'a str, data: &[u8]) -> Result<Cow<'a, str>> {
let data_hash = sha1_of_data(data);
self.transact(|db| {
let chosen_fname =
add_data_to_folder_uniquely(&self.media_folder, desired_name, data, data_hash)?;
let file_mtime = mtime_as_i64(self.media_folder.join(chosen_fname.as_ref()))?;
let existing_entry = db.get_entry(&chosen_fname)?;
let new_sha1 = Some(data_hash);
let entry_update_required = existing_entry.map(|e| e.sha1 != new_sha1).unwrap_or(true);
if entry_update_required {
db.set_entry(&MediaEntry {
fname: chosen_fname.to_string(),
sha1: new_sha1,
mtime: file_mtime,
sync_required: true,
})?;
}
Ok(chosen_fname)
})
}
pub fn remove_files<S>(&self, filenames: &[S]) -> Result<()>
where
S: AsRef<str> + std::fmt::Debug,
{
self.transact(|db| {
remove_files(&self.media_folder, filenames)?;
for fname in filenames {
if let Some(mut entry) = db.get_entry(fname.as_ref())? {
entry.sha1 = None;
entry.mtime = 0;
entry.sync_required = true;
db.set_entry(&entry)?;
}
}
Ok(())
})
}
/// Opens a transaction and manages folder mtime, so user should perform not
/// only db ops, but also all file ops inside the closure.
pub(crate) fn transact<T>(&self, func: impl FnOnce(&MediaDatabase) -> Result<T>) -> Result<T> {
let start_folder_mtime = mtime_as_i64(&self.media_folder)?;
self.db.transact(|db| {
let out = func(db)?;
let mut meta = db.get_meta()?;
if meta.folder_mtime == start_folder_mtime {
// if media db was in sync with folder prior to this add,
// we can keep it in sync
meta.folder_mtime = mtime_as_i64(&self.media_folder)?;
db.set_meta(&meta)?;
} else {
// otherwise, leave it alone so that other pending changes
// get picked up later
}
Ok(out)
})
}
/// Set entry for a newly added file. Caller must ensure transaction.
pub(crate) fn add_entry(&self, fname: impl Into<String>, sha1: [u8; 20]) -> Result<()> {
let fname = fname.into();
let mtime = mtime_as_i64(self.media_folder.join(&fname))?;
self.db.set_entry(&MediaEntry {
fname,
mtime,
sha1: Some(sha1),
sync_required: true,
})
}
/// Sync media.
pub async fn sync_media<F>(self, progress: F, auth: SyncAuth) -> Result<()>
where
F: FnMut(MediaSyncProgress) -> bool,
{
let client = HttpSyncClient::new(auth);
let mut syncer = MediaSyncer::new(self, progress, client)?;
syncer.sync().await
}
pub fn all_checksums_after_checking(
&self,
progress: impl FnMut(usize) -> bool,
) -> Result<HashMap<String, Sha1Hash>> {
ChangeTracker::new(&self.media_folder, progress).register_changes(&self.db)?;
self.db.all_registered_checksums()
}
pub fn checksum_getter(&self) -> impl FnMut(&str) -> Result<Option<Sha1Hash>> + '_ {
|fname: &str| {
self.db
.get_entry(fname)
.map(|opt| opt.and_then(|entry| entry.sha1))
}
}
pub fn register_changes(&self, progress: &mut impl FnMut(usize) -> bool) -> Result<()> {
ChangeTracker::new(&self.media_folder, progress).register_changes(&self.db)
}
/// All checksums without registering changes first.
#[cfg(test)]
pub(crate) fn all_checksums_as_is(&self) -> HashMap<String, [u8; 20]> {
self.db.all_registered_checksums().unwrap()
}
}