From 643795e383908f608784163a0451c72ec2462222 Mon Sep 17 00:00:00 2001 From: RumovZ Date: Mon, 7 Mar 2022 06:11:31 +0100 Subject: [PATCH] Backups (#1685) * Add zstd dep * Implement backend backup with zstd * Implement backup thinning * Write backup meta * Use new file ending anki21b * Asynchronously backup on collection close in Rust * Revert "Add zstd dep" This reverts commit 3fcb2141d2be15f907269d13275c41971431385c. * Add zstd again * Take backup col path from col struct * Fix formatting * Implement backup restoring on backend * Normalize restored media file names * Refactor `extract_legacy_data()` A bit cumbersome due to borrowing rules. * Refactor * Make thinning calendar-based and gradual * Consider last kept backups of previous stages * Import full apkgs and colpkgs with backend * Expose new backup settings * Test `BackupThinner` and make it deterministic * Mark backup_path when closing optional * Delete leaky timer * Add progress updates for restoring media * Write restored collection to tempfile first * Do collection compression in the background thread This has us currently storing an uncompressed and compressed copy of the collection in memory (not ideal), but means the collection can be closed without waiting for compression to complete. On a large collection, this takes a close and reopen from about 0.55s to about 0.07s. The old backup code for comparison: about 0.35s for compression off, about 8.5s for zip compression. * Use multithreading in zstd compression On my system, this reduces the compression time of a large collection from about 0.55s to 0.08s. * Stream compressed collection data into zip file * Tweak backup explanation + Fix incorrect tab order for ignore accents option * Decouple restoring backup and full import In the first case, no profile is opened, unless the new collection succeeds to load. In the second case, either the old collection is reloaded or the new one is loaded. * Fix number gap in Progress message * Don't revert backup when media fails but report it * Tweak error flow * Remove native BackupLimits enum * Fix type annotation * Add thinning test for whole year * Satisfy linter * Await async backup to finish * Move restart disclaimer out of backup tab Should be visible regardless of the current tab. * Write restored collection in chunks * Refactor * Write media in chunks and refactor * Log error if removing file fails * join_backup_task -> await_backup_completion * Refactor backup.rs * Refactor backup meta and collection extraction * Fix wrong error being returned * Call sync_all() on new collection * Add ImportError * Store logger in Backend, instead of creating one on demand init_backend() accepts a Logger rather than a log file, to allow other callers to customize the logger if they wish. In the future we may want to explore using the tracing crate as an alternative; it's a bit more ergonomic, as a logger doesn't need to be passed around, and it plays more nicely with async code. * Sync file contents prior to rename; sync folder after rename. * Limit backup creation to once per 30 min * Use zstd::stream::copy_decode * Make importing abortable * Don't revert if backup media is aborted * Set throttle implicitly * Change force flag to minimum_backup_interval * Don't attempt to open folders on Windows * Join last backup thread before starting new one Also refactor. * Disable auto sync and backup when restoring again * Force backup on full download * Include the reason why a media file import failed, and the file path - Introduce a FileIoError that contains a string representation of the underlying I/O error, and an associated path. There are a few places in the code where we're currently manually including the filename in a custom error message, and this is a step towards a more consistent approach (but we may be better served with a more general approach in the future similar to Anyhow's .context()) - Move the error message into importing.ftl, as it's a bit neater when error messages live in the same file as the rest of the messages associated with some functionality. * Fix importing of media files * Minor wording tweaks * Save an allocation I18n strings with replacements are already strings, so we can skip the extra allocation. Not that it matters here at all. * Terminate import if file missing from archive If a third-party tool is creating invalid archives, the user should know about it. This should be rare, so I did not attempt to make it translatable. * Skip multithreaded compression on small collections Co-authored-by: Damien Elmes --- Cargo.lock | 43 ++ cargo/BUILD.bazel | 18 + cargo/crates.bzl | 40 ++ cargo/licenses.json | 36 ++ cargo/remote/BUILD.cc-1.0.73.bazel | 6 + cargo/remote/BUILD.jobserver-0.1.24.bazel | 79 +++ .../remote/BUILD.zstd-0.10.0+zstd.1.5.2.bazel | 69 +++ .../BUILD.zstd-safe-4.1.4+zstd.1.5.2.bazel | 95 +++ .../BUILD.zstd-sys-1.6.3+zstd.1.5.2.bazel | 93 +++ ftl/core/errors.ftl | 1 + ftl/core/importing.ftl | 8 +- ftl/core/preferences.ftl | 6 + proto/anki/backend.proto | 1 + proto/anki/collection.proto | 14 + proto/anki/config.proto | 6 + pylib/anki/_backend/__init__.py | 3 +- pylib/anki/_backend/rsbridge.pyi | 2 +- pylib/anki/collection.py | 15 +- pylib/rsbridge/cargo/BUILD.bazel | 18 + pylib/rsbridge/lib.rs | 9 +- qt/aqt/forms/preferences.ui | 110 ++-- qt/aqt/importing.py | 106 ++-- qt/aqt/main.py | 125 ++-- qt/aqt/mediacheck.py | 2 +- qt/aqt/preferences.py | 22 +- rslib/BUILD.bazel | 2 + rslib/Cargo.toml | 2 + rslib/build/protobuf.rs | 4 + rslib/cargo/BUILD.bazel | 18 + rslib/i18n/cargo/BUILD.bazel | 18 + rslib/i18n_helpers/cargo/BUILD.bazel | 18 + rslib/linkchecker/cargo/BUILD.bazel | 18 + rslib/src/backend/collection.rs | 83 ++- rslib/src/backend/error.rs | 2 + rslib/src/backend/mod.rs | 14 +- rslib/src/backend/progress.rs | 9 + rslib/src/collection/backup.rs | 542 ++++++++++++++++++ rslib/src/collection/mod.rs | 1 + rslib/src/config/mod.rs | 18 +- rslib/src/error/mod.rs | 46 +- rslib/src/log.rs | 2 +- rslib/src/preferences.rs | 4 + rslib/src/sync/mod.rs | 5 + 43 files changed, 1526 insertions(+), 207 deletions(-) create mode 100644 cargo/remote/BUILD.jobserver-0.1.24.bazel create mode 100644 cargo/remote/BUILD.zstd-0.10.0+zstd.1.5.2.bazel create mode 100644 cargo/remote/BUILD.zstd-safe-4.1.4+zstd.1.5.2.bazel create mode 100644 cargo/remote/BUILD.zstd-sys-1.6.3+zstd.1.5.2.bazel create mode 100644 rslib/src/collection/backup.rs diff --git a/Cargo.lock b/Cargo.lock index a01280e99..bc25ed87c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,6 +68,7 @@ dependencies = [ "lazy_static", "nom", "num-integer", + "num_cpus", "num_enum", "once_cell", "pct-str", @@ -102,6 +103,7 @@ dependencies = [ "unicode-normalization", "utime", "zip", + "zstd", ] [[package]] @@ -275,6 +277,9 @@ name = "cc" version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +dependencies = [ + "jobserver", +] [[package]] name = "cfg-if" @@ -1177,6 +1182,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.56" @@ -3412,3 +3426,32 @@ dependencies = [ "thiserror", "time 0.1.44", ] + +[[package]] +name = "zstd" +version = "0.10.0+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "4.1.4+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "1.6.3+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8" +dependencies = [ + "cc", + "libc", +] diff --git a/cargo/BUILD.bazel b/cargo/BUILD.bazel index a416a4628..743347b83 100644 --- a/cargo/BUILD.bazel +++ b/cargo/BUILD.bazel @@ -210,6 +210,15 @@ alias( ], ) +alias( + name = "num_cpus", + actual = "@raze__num_cpus__1_13_1//:num_cpus", + tags = [ + "cargo-raze", + "manual", + ], +) + alias( name = "num_enum", actual = "@raze__num_enum__0_5_6//:num_enum", @@ -561,6 +570,15 @@ alias( ], ) +alias( + name = "zstd", + actual = "@raze__zstd__0_10_0_zstd_1_5_2//:zstd", + tags = [ + "cargo-raze", + "manual", + ], +) + # Export file for Stardoc support exports_files( [ diff --git a/cargo/crates.bzl b/cargo/crates.bzl index 5e73ab2b0..e0991254b 100644 --- a/cargo/crates.bzl +++ b/cargo/crates.bzl @@ -1041,6 +1041,16 @@ def raze_fetch_remote_crates(): build_file = Label("//cargo/remote:BUILD.itoa-1.0.1.bazel"), ) + maybe( + http_archive, + name = "raze__jobserver__0_1_24", + url = "https://crates.io/api/v1/crates/jobserver/0.1.24/download", + type = "tar.gz", + sha256 = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa", + strip_prefix = "jobserver-0.1.24", + build_file = Label("//cargo/remote:BUILD.jobserver-0.1.24.bazel"), + ) + maybe( http_archive, name = "raze__js_sys__0_3_56", @@ -3230,6 +3240,36 @@ def raze_fetch_remote_crates(): strip_prefix = "zip-0.5.13", build_file = Label("//cargo/remote:BUILD.zip-0.5.13.bazel"), ) + + maybe( + http_archive, + name = "raze__zstd__0_10_0_zstd_1_5_2", + url = "https://crates.io/api/v1/crates/zstd/0.10.0+zstd.1.5.2/download", + type = "tar.gz", + sha256 = "3b1365becbe415f3f0fcd024e2f7b45bacfb5bdd055f0dc113571394114e7bdd", + strip_prefix = "zstd-0.10.0+zstd.1.5.2", + build_file = Label("//cargo/remote:BUILD.zstd-0.10.0+zstd.1.5.2.bazel"), + ) + + maybe( + http_archive, + name = "raze__zstd_safe__4_1_4_zstd_1_5_2", + url = "https://crates.io/api/v1/crates/zstd-safe/4.1.4+zstd.1.5.2/download", + type = "tar.gz", + sha256 = "2f7cd17c9af1a4d6c24beb1cc54b17e2ef7b593dc92f19e9d9acad8b182bbaee", + strip_prefix = "zstd-safe-4.1.4+zstd.1.5.2", + build_file = Label("//cargo/remote:BUILD.zstd-safe-4.1.4+zstd.1.5.2.bazel"), + ) + + maybe( + http_archive, + name = "raze__zstd_sys__1_6_3_zstd_1_5_2", + url = "https://crates.io/api/v1/crates/zstd-sys/1.6.3+zstd.1.5.2/download", + type = "tar.gz", + sha256 = "fc49afa5c8d634e75761feda8c592051e7eeb4683ba827211eb0d731d3402ea8", + strip_prefix = "zstd-sys-1.6.3+zstd.1.5.2", + build_file = Label("//cargo/remote:BUILD.zstd-sys-1.6.3+zstd.1.5.2.bazel"), + ) maybe( new_git_repository, diff --git a/cargo/licenses.json b/cargo/licenses.json index b3f9d9bfd..cfc6c5c9b 100644 --- a/cargo/licenses.json +++ b/cargo/licenses.json @@ -836,6 +836,15 @@ "license_file": null, "description": "Fast integer primitive to string conversion" }, + { + "name": "jobserver", + "version": "0.1.24", + "authors": "Alex Crichton ", + "repository": "https://github.com/alexcrichton/jobserver-rs", + "license": "Apache-2.0 OR MIT", + "license_file": null, + "description": "An implementation of the GNU make jobserver for Rust" + }, { "name": "js-sys", "version": "0.3.56", @@ -2644,5 +2653,32 @@ "license": "MIT", "license_file": null, "description": "Library to support the reading and writing of zip files." + }, + { + "name": "zstd", + "version": "0.10.0+zstd.1.5.2", + "authors": "Alexandre Bury ", + "repository": "https://github.com/gyscos/zstd-rs", + "license": "MIT", + "license_file": null, + "description": "Binding for the zstd compression library." + }, + { + "name": "zstd-safe", + "version": "4.1.4+zstd.1.5.2", + "authors": "Alexandre Bury ", + "repository": "https://github.com/gyscos/zstd-rs", + "license": "Apache-2.0 OR MIT", + "license_file": null, + "description": "Safe low-level bindings for the zstd compression library." + }, + { + "name": "zstd-sys", + "version": "1.6.3+zstd.1.5.2", + "authors": "Alexandre Bury ", + "repository": "https://github.com/gyscos/zstd-rs", + "license": "Apache-2.0 OR MIT", + "license_file": null, + "description": "Low-level bindings for the zstd compression library." } ] diff --git a/cargo/remote/BUILD.cc-1.0.73.bazel b/cargo/remote/BUILD.cc-1.0.73.bazel index 13e80ec11..3c751b836 100644 --- a/cargo/remote/BUILD.cc-1.0.73.bazel +++ b/cargo/remote/BUILD.cc-1.0.73.bazel @@ -37,6 +37,8 @@ rust_binary( name = "cargo_bin_gcc_shim", srcs = glob(["**/*.rs"]), crate_features = [ + "jobserver", + "parallel", ], crate_root = "src/bin/gcc-shim.rs", data = [], @@ -53,6 +55,7 @@ rust_binary( # buildifier: leave-alone deps = [ ":cc", + "@raze__jobserver__0_1_24//:jobserver", ], ) @@ -60,6 +63,8 @@ rust_library( name = "cc", srcs = glob(["**/*.rs"]), crate_features = [ + "jobserver", + "parallel", ], crate_root = "src/lib.rs", data = [], @@ -75,6 +80,7 @@ rust_library( version = "1.0.73", # buildifier: leave-alone deps = [ + "@raze__jobserver__0_1_24//:jobserver", ], ) diff --git a/cargo/remote/BUILD.jobserver-0.1.24.bazel b/cargo/remote/BUILD.jobserver-0.1.24.bazel new file mode 100644 index 000000000..ca2edb08d --- /dev/null +++ b/cargo/remote/BUILD.jobserver-0.1.24.bazel @@ -0,0 +1,79 @@ +""" +@generated +cargo-raze crate build file. + +DO NOT EDIT! Replaced on runs of cargo-raze +""" + +# buildifier: disable=load +load("@bazel_skylib//lib:selects.bzl", "selects") + +# buildifier: disable=load +load( + "@rules_rust//rust:defs.bzl", + "rust_binary", + "rust_library", + "rust_proc_macro", + "rust_test", +) + +package(default_visibility = [ + # Public for visibility by "@raze__crate__version//" targets. + # + # Prefer access through "//cargo", which limits external + # visibility to explicit Cargo.toml dependencies. + "//visibility:public", +]) + +licenses([ + "notice", # MIT from expression "MIT OR Apache-2.0" +]) + +# Generated Targets + +rust_library( + name = "jobserver", + srcs = glob(["**/*.rs"]), + aliases = { + }, + crate_features = [ + ], + crate_root = "src/lib.rs", + data = [], + edition = "2018", + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-raze", + "crate-name=jobserver", + "manual", + ], + version = "0.1.24", + # buildifier: leave-alone + deps = [ + ] + selects.with_or({ + # cfg(unix) + ( + "@rules_rust//rust/platform:x86_64-apple-darwin", + "@rules_rust//rust/platform:x86_64-unknown-linux-gnu", + "@rules_rust//rust/platform:aarch64-apple-darwin", + "@rules_rust//rust/platform:aarch64-apple-ios", + "@rules_rust//rust/platform:aarch64-unknown-linux-gnu", + "@rules_rust//rust/platform:x86_64-apple-ios", + ): [ + "@raze__libc__0_2_119//:libc", + ], + "//conditions:default": [], + }), +) + +# Unsupported target "client" with type "test" omitted + +# Unsupported target "client-of-myself" with type "test" omitted + +# Unsupported target "helper" with type "test" omitted + +# Unsupported target "make-as-a-client" with type "test" omitted + +# Unsupported target "server" with type "test" omitted diff --git a/cargo/remote/BUILD.zstd-0.10.0+zstd.1.5.2.bazel b/cargo/remote/BUILD.zstd-0.10.0+zstd.1.5.2.bazel new file mode 100644 index 000000000..71df2ae69 --- /dev/null +++ b/cargo/remote/BUILD.zstd-0.10.0+zstd.1.5.2.bazel @@ -0,0 +1,69 @@ +""" +@generated +cargo-raze crate build file. + +DO NOT EDIT! Replaced on runs of cargo-raze +""" + +# buildifier: disable=load +load("@bazel_skylib//lib:selects.bzl", "selects") + +# buildifier: disable=load +load( + "@rules_rust//rust:defs.bzl", + "rust_binary", + "rust_library", + "rust_proc_macro", + "rust_test", +) + +package(default_visibility = [ + # Public for visibility by "@raze__crate__version//" targets. + # + # Prefer access through "//cargo", which limits external + # visibility to explicit Cargo.toml dependencies. + "//visibility:public", +]) + +licenses([ + "notice", # MIT from expression "MIT" +]) + +# Generated Targets + +# Unsupported target "benchmark" with type "example" omitted + +# Unsupported target "stream" with type "example" omitted + +# Unsupported target "train" with type "example" omitted + +# Unsupported target "zstd" with type "example" omitted + +# Unsupported target "zstdcat" with type "example" omitted + +rust_library( + name = "zstd", + srcs = glob(["**/*.rs"]), + crate_features = [ + "arrays", + "default", + "legacy", + "zstdmt", + ], + crate_root = "src/lib.rs", + data = [], + edition = "2018", + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-raze", + "crate-name=zstd", + "manual", + ], + version = "0.10.0+zstd.1.5.2", + # buildifier: leave-alone + deps = [ + "@raze__zstd_safe__4_1_4_zstd_1_5_2//:zstd_safe", + ], +) diff --git a/cargo/remote/BUILD.zstd-safe-4.1.4+zstd.1.5.2.bazel b/cargo/remote/BUILD.zstd-safe-4.1.4+zstd.1.5.2.bazel new file mode 100644 index 000000000..881289841 --- /dev/null +++ b/cargo/remote/BUILD.zstd-safe-4.1.4+zstd.1.5.2.bazel @@ -0,0 +1,95 @@ +""" +@generated +cargo-raze crate build file. + +DO NOT EDIT! Replaced on runs of cargo-raze +""" + +# buildifier: disable=load +load("@bazel_skylib//lib:selects.bzl", "selects") + +# buildifier: disable=load +load( + "@rules_rust//rust:defs.bzl", + "rust_binary", + "rust_library", + "rust_proc_macro", + "rust_test", +) + +package(default_visibility = [ + # Public for visibility by "@raze__crate__version//" targets. + # + # Prefer access through "//cargo", which limits external + # visibility to explicit Cargo.toml dependencies. + "//visibility:public", +]) + +licenses([ + "notice", # MIT from expression "MIT OR Apache-2.0" +]) + +# Generated Targets +# buildifier: disable=out-of-order-load +# buildifier: disable=load-on-top +load( + "@rules_rust//cargo:cargo_build_script.bzl", + "cargo_build_script", +) + +cargo_build_script( + name = "zstd_safe_build_script", + srcs = glob(["**/*.rs"]), + build_script_env = { + }, + crate_features = [ + "arrays", + "legacy", + "std", + "zstdmt", + ], + crate_root = "build.rs", + data = glob(["**"]), + edition = "2018", + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-raze", + "manual", + ], + version = "4.1.4+zstd.1.5.2", + visibility = ["//visibility:private"], + deps = [ + "@raze__zstd_sys__1_6_3_zstd_1_5_2//:zstd_sys", + ], +) + +rust_library( + name = "zstd_safe", + srcs = glob(["**/*.rs"]), + crate_features = [ + "arrays", + "legacy", + "std", + "zstdmt", + ], + crate_root = "src/lib.rs", + data = [], + edition = "2018", + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-raze", + "crate-name=zstd-safe", + "manual", + ], + version = "4.1.4+zstd.1.5.2", + # buildifier: leave-alone + deps = [ + ":zstd_safe_build_script", + "@raze__libc__0_2_119//:libc", + "@raze__zstd_sys__1_6_3_zstd_1_5_2//:zstd_sys", + ], +) diff --git a/cargo/remote/BUILD.zstd-sys-1.6.3+zstd.1.5.2.bazel b/cargo/remote/BUILD.zstd-sys-1.6.3+zstd.1.5.2.bazel new file mode 100644 index 000000000..b76a1326d --- /dev/null +++ b/cargo/remote/BUILD.zstd-sys-1.6.3+zstd.1.5.2.bazel @@ -0,0 +1,93 @@ +""" +@generated +cargo-raze crate build file. + +DO NOT EDIT! Replaced on runs of cargo-raze +""" + +# buildifier: disable=load +load("@bazel_skylib//lib:selects.bzl", "selects") + +# buildifier: disable=load +load( + "@rules_rust//rust:defs.bzl", + "rust_binary", + "rust_library", + "rust_proc_macro", + "rust_test", +) + +package(default_visibility = [ + # Public for visibility by "@raze__crate__version//" targets. + # + # Prefer access through "//cargo", which limits external + # visibility to explicit Cargo.toml dependencies. + "//visibility:public", +]) + +licenses([ + "notice", # MIT from expression "MIT OR Apache-2.0" +]) + +# Generated Targets +# buildifier: disable=out-of-order-load +# buildifier: disable=load-on-top +load( + "@rules_rust//cargo:cargo_build_script.bzl", + "cargo_build_script", +) + +cargo_build_script( + name = "zstd_sys_build_script", + srcs = glob(["**/*.rs"]), + build_script_env = { + }, + crate_features = [ + "legacy", + "std", + "zstdmt", + ], + crate_root = "build.rs", + data = glob(["**"]), + edition = "2018", + links = "zstd", + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-raze", + "manual", + ], + version = "1.6.3+zstd.1.5.2", + visibility = ["//visibility:private"], + deps = [ + "@raze__cc__1_0_73//:cc", + ], +) + +rust_library( + name = "zstd_sys", + srcs = glob(["**/*.rs"]), + crate_features = [ + "legacy", + "std", + "zstdmt", + ], + crate_root = "src/lib.rs", + data = [], + edition = "2018", + rustc_flags = [ + "--cap-lints=allow", + ], + tags = [ + "cargo-raze", + "crate-name=zstd-sys", + "manual", + ], + version = "1.6.3+zstd.1.5.2", + # buildifier: leave-alone + deps = [ + ":zstd_sys_build_script", + "@raze__libc__0_2_119//:libc", + ], +) diff --git a/ftl/core/errors.ftl b/ftl/core/errors.ftl index fdab5a2de..f4afca53a 100644 --- a/ftl/core/errors.ftl +++ b/ftl/core/errors.ftl @@ -9,6 +9,7 @@ errors-100-tags-max = is no need to select child tags if you have selected a parent tag. errors-multiple-notetypes-selected = Please select notes from only one notetype. errors-please-check-database = Please use the Check Database action, then try again. +errors-collection-too-new = This collection requires a newer version of Anki to open. ## Card Rendering diff --git a/ftl/core/importing.ftl b/ftl/core/importing.ftl index 19d2da9ff..729c8b71e 100644 --- a/ftl/core/importing.ftl +++ b/ftl/core/importing.ftl @@ -72,8 +72,10 @@ importing-note-updated = [one] { $count } note updated *[other] { $count } notes updated } -importing-processed-media-file = +importing-imported-media-file = { $count -> - [one] Processed { $count } media file - *[other] Processed { $count } media files + [one] Imported { $count } media file + *[other] Imported { $count } media files } +importing-importing-collection = Importing collection... +importing-failed-to-import-media-file = Failed to import media file: { $debugInfo } diff --git a/ftl/core/preferences.ftl b/ftl/core/preferences.ftl index e51c5ac38..efaa63075 100644 --- a/ftl/core/preferences.ftl +++ b/ftl/core/preferences.ftl @@ -43,3 +43,9 @@ preferences-theme-light = Light preferences-theme-dark = Dark preferences-v3-scheduler = V3 scheduler preferences-ignore-accents-in-search = Ignore accents in search (slower) +preferences-backup-explanation = + Anki periodically backs up your collection when it is closed. After backups are more than 2 days old, + Anki will start removing some of them to free up disk space. +preferences-daily-backups = Daily backups to keep: +preferences-weekly-backups = Weekly backups to keep: +preferences-monthly-backups = Monthly backups to keep: diff --git a/proto/anki/backend.proto b/proto/anki/backend.proto index f9e9636ef..f2deb7d71 100644 --- a/proto/anki/backend.proto +++ b/proto/anki/backend.proto @@ -56,6 +56,7 @@ message BackendError { FILTERED_DECK_ERROR = 13; SEARCH_ERROR = 14; CUSTOM_STUDY_ERROR = 15; + IMPORT_ERROR = 16; } // localized error description suitable for displaying to the user diff --git a/proto/anki/collection.proto b/proto/anki/collection.proto index 21465371d..a76097dff 100644 --- a/proto/anki/collection.proto +++ b/proto/anki/collection.proto @@ -10,6 +10,7 @@ import "anki/generic.proto"; service CollectionService { rpc OpenCollection(OpenCollectionRequest) returns (generic.Empty); rpc CloseCollection(CloseCollectionRequest) returns (generic.Empty); + rpc RestoreBackup(RestoreBackupRequest) returns (generic.String); rpc CheckDatabase(generic.Empty) returns (CheckDatabaseResponse); rpc GetUndoStatus(generic.Empty) returns (UndoStatus); rpc Undo(generic.Empty) returns (OpChangesAfterUndo); @@ -18,6 +19,7 @@ service CollectionService { rpc MergeUndoEntries(generic.UInt32) returns (OpChanges); rpc LatestProgress(generic.Empty) returns (Progress); rpc SetWantsAbort(generic.Empty) returns (generic.Empty); + rpc AwaitBackupCompletion(generic.Empty) returns (generic.Empty); } message OpenCollectionRequest { @@ -29,6 +31,17 @@ message OpenCollectionRequest { message CloseCollectionRequest { bool downgrade_to_schema11 = 1; + // Skip backup if unset + optional string backup_folder = 2; + // Skip backup if one has been created in the last x seconds. + // If unset a default value is used. + optional uint64 minimum_backup_interval = 3; +} + +message RestoreBackupRequest { + string col_path = 1; + string backup_path = 2; + string media_folder = 3; } message CheckDatabaseResponse { @@ -107,5 +120,6 @@ message Progress { FullSync full_sync = 4; NormalSync normal_sync = 5; DatabaseCheck database_check = 6; + string importing = 7; } } diff --git a/proto/anki/config.proto b/proto/anki/config.proto index 75a7f98de..f1ad1119d 100644 --- a/proto/anki/config.proto +++ b/proto/anki/config.proto @@ -113,8 +113,14 @@ message Preferences { string default_search_text = 4; bool ignore_accents_in_search = 5; } + message Backups { + uint32 daily = 1; + uint32 weekly = 2; + uint32 monthly = 3; + } Scheduling scheduling = 1; Reviewing reviewing = 2; Editing editing = 3; + Backups backups = 4; } diff --git a/pylib/anki/_backend/__init__.py b/pylib/anki/_backend/__init__.py index 718fbbf48..e0084a326 100644 --- a/pylib/anki/_backend/__init__.py +++ b/pylib/anki/_backend/__init__.py @@ -63,6 +63,7 @@ class RustBackend(RustBackendGenerated): self, langs: list[str] | None = None, server: bool = False, + log_file: str | None = None, ) -> None: # pick up global defaults if not provided if langs is None: @@ -72,7 +73,7 @@ class RustBackend(RustBackendGenerated): preferred_langs=langs, server=server, ) - self._backend = rsbridge.open_backend(init_msg.SerializeToString()) + self._backend = rsbridge.open_backend(init_msg.SerializeToString(), log_file) def db_query( self, sql: str, args: Sequence[ValueForDB], first_row_only: bool diff --git a/pylib/anki/_backend/rsbridge.pyi b/pylib/anki/_backend/rsbridge.pyi index 9d0e23d6a..af205848e 100644 --- a/pylib/anki/_backend/rsbridge.pyi +++ b/pylib/anki/_backend/rsbridge.pyi @@ -1,5 +1,5 @@ def buildhash() -> str: ... -def open_backend(data: bytes) -> Backend: ... +def open_backend(data: bytes, log_file: str | None) -> Backend: ... class Backend: @classmethod diff --git a/pylib/anki/collection.py b/pylib/anki/collection.py index ba5125f33..84896cb5a 100644 --- a/pylib/anki/collection.py +++ b/pylib/anki/collection.py @@ -235,7 +235,13 @@ class Collection(DeprecatedNamesMixin): elif time.time() - self._last_checkpoint_at > 300: self.save() - def close(self, save: bool = True, downgrade: bool = False) -> None: + def close( + self, + save: bool = True, + downgrade: bool = False, + backup_folder: str | None = None, + minimum_backup_interval: int | None = None, + ) -> None: "Disconnect from DB." if self.db: if save: @@ -243,7 +249,12 @@ class Collection(DeprecatedNamesMixin): else: self.db.rollback() self._clear_caches() - self._backend.close_collection(downgrade_to_schema11=downgrade) + request = collection_pb2.CloseCollectionRequest( + downgrade_to_schema11=downgrade, + backup_folder=backup_folder, + minimum_backup_interval=minimum_backup_interval, + ) + self._backend.close_collection(request) self.db = None def close_for_full_sync(self) -> None: diff --git a/pylib/rsbridge/cargo/BUILD.bazel b/pylib/rsbridge/cargo/BUILD.bazel index 5c53f75b4..9ccdfc3cb 100644 --- a/pylib/rsbridge/cargo/BUILD.bazel +++ b/pylib/rsbridge/cargo/BUILD.bazel @@ -210,6 +210,15 @@ alias( ], ) +alias( + name = "num_cpus", + actual = "@raze__num_cpus__1_13_1//:num_cpus", + tags = [ + "cargo-raze", + "manual", + ], +) + alias( name = "num_enum", actual = "@raze__num_enum__0_5_6//:num_enum", @@ -560,3 +569,12 @@ alias( "manual", ], ) + +alias( + name = "zstd", + actual = "@raze__zstd__0_10_0_zstd_1_5_2//:zstd", + tags = [ + "cargo-raze", + "manual", + ], +) diff --git a/pylib/rsbridge/lib.rs b/pylib/rsbridge/lib.rs index b7ce73955..258b168c2 100644 --- a/pylib/rsbridge/lib.rs +++ b/pylib/rsbridge/lib.rs @@ -2,6 +2,7 @@ // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html use anki::backend::{init_backend, Backend as RustBackend}; +use anki::log::default_logger; use pyo3::exceptions::PyException; use pyo3::prelude::*; use pyo3::types::PyBytes; @@ -20,8 +21,12 @@ fn buildhash() -> &'static str { } #[pyfunction] -fn open_backend(init_msg: &PyBytes) -> PyResult { - match init_backend(init_msg.as_bytes()) { +fn open_backend(init_msg: &PyBytes, log_file: Option) -> PyResult { + let log = match default_logger(log_file.as_deref()) { + Ok(log) => Some(log), + Err(e) => return Err(PyException::new_err(e)), + }; + match init_backend(init_msg.as_bytes(), log) { Ok(backend) => Ok(Backend { backend }), Err(e) => Err(PyException::new_err(e)), } diff --git a/qt/aqt/forms/preferences.ui b/qt/aqt/forms/preferences.ui index 0fa21315d..bbad22d30 100644 --- a/qt/aqt/forms/preferences.ui +++ b/qt/aqt/forms/preferences.ui @@ -7,7 +7,7 @@ 0 0 640 - 480 + 518 @@ -491,9 +491,9 @@ 12 - + - preferences_backupsanki_will_create_a_backup_of + preferences_backup_explanation true @@ -501,39 +501,77 @@ - - - - - preferences_keep + + + + + 9999 - - - - 60 - 0 - + + + 9999 - - - 60 - 16777215 - + + + + + + preferences_weekly_backups + + + + + + + preferences_daily_backups + + + + + + + 9999 + + + + + + + preferences_monthly_backups - - - preferences_backups2 + + + Qt::Horizontal - + + + 40 + 20 + + + - - + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + Qt::Horizontal @@ -577,20 +615,17 @@ - - - - preferences_some_settings_will_take_effect_after - - - Qt::AlignCenter - - - + + + + preferences_some_settings_will_take_effect_after + + + @@ -611,6 +646,7 @@ interrupt_audio pastePNG paste_strips_formatting + ignore_accents_in_search useCurrent default_search_text uiScale @@ -630,7 +666,9 @@ syncDeauth media_log tabWidget - numBackups + weekly_backups + daily_backups + monthly_backups diff --git a/qt/aqt/importing.py b/qt/aqt/importing.py index f6402dac9..7ed676f6e 100644 --- a/qt/aqt/importing.py +++ b/qt/aqt/importing.py @@ -1,11 +1,8 @@ # Copyright: Ankitects Pty Ltd and contributors # License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html -import json import os import re -import shutil import traceback -import unicodedata import zipfile from concurrent.futures import Future from typing import Any, Optional @@ -14,6 +11,7 @@ import anki.importing as importing import aqt.deckchooser import aqt.forms import aqt.modelchooser +from anki.errors import Interrupted from anki.importing.anki2 import V2ImportIntoV1 from anki.importing.apkg import AnkiPackageImporter from aqt import AnkiQt, gui_hooks @@ -375,12 +373,6 @@ def importFile(mw: AnkiQt, file: str) -> None: else: # if it's an apkg/zip, first test it's a valid file if isinstance(importer, AnkiPackageImporter): - try: - z = zipfile.ZipFile(importer.file) - z.getinfo("collection.anki2") - except: - showWarning(invalidZipMsg()) - return # we need to ask whether to import/replace; if it's # a colpkg file then the rest of the import process # will happen in setupApkgImport() @@ -441,74 +433,74 @@ def setupApkgImport(mw: AnkiQt, importer: AnkiPackageImporter) -> bool: if not full: # adding return True - if not mw.restoringBackup and not askUser( + if not askUser( tr.importing_this_will_delete_your_existing_collection(), msgfunc=QMessageBox.warning, defaultno=True, ): return False - replaceWithApkg(mw, importer.file, mw.restoringBackup) + full_apkg_import(mw, importer.file) return False -def replaceWithApkg(mw: aqt.AnkiQt, file: str, backup: bool) -> None: - mw.unloadCollection(lambda: _replaceWithApkg(mw, file, backup)) +def full_apkg_import(mw: aqt.AnkiQt, file: str) -> None: + def on_done(success: bool) -> None: + mw.loadCollection() + if success: + tooltip(tr.importing_importing_complete()) + + mw.unloadCollection(lambda: replace_with_apkg(mw, file, on_done)) -def _replaceWithApkg(mw: aqt.AnkiQt, filename: str, backup: bool) -> None: - mw.progress.start(immediate=True) +def replace_with_apkg( + mw: aqt.AnkiQt, filename: str, callback: Callable[[bool], None] +) -> None: + """Tries to replace the provided collection with the provided backup, + then calls the callback. True if success. + """ + dialog = mw.progress.start(immediate=True) + timer = QTimer() + timer.setSingleShot(False) + timer.setInterval(100) - def do_import() -> None: - z = zipfile.ZipFile(filename) + def on_progress() -> None: + progress = mw.backend.latest_progress() + if not progress.HasField("importing"): + return + label = progress.importing - # v2 scheduler? - colname = "collection.anki21" try: - z.getinfo(colname) - except KeyError: - colname = "collection.anki2" + if dialog.wantCancel: + mw.backend.set_wants_abort() + except AttributeError: + # dialog may not be active + pass - with z.open(colname) as source, open(mw.pm.collectionPath(), "wb") as target: - # ignore appears related to https://github.com/python/typeshed/issues/4349 - # see if can turn off once issue fix is merged in - shutil.copyfileobj(source, target) + mw.taskman.run_on_main(lambda: mw.progress.update(label=label)) - d = os.path.join(mw.pm.profileFolder(), "collection.media") - for n, (cStr, file) in enumerate( - json.loads(z.read("media").decode("utf8")).items() - ): - mw.taskman.run_on_main( - lambda n=n: mw.progress.update( # type: ignore - tr.importing_processed_media_file(count=n) - ) - ) - size = z.getinfo(cStr).file_size - dest = os.path.join(d, unicodedata.normalize("NFC", file)) - # if we have a matching file size - if os.path.exists(dest) and size == os.stat(dest).st_size: - continue - data = z.read(cStr) - with open(dest, "wb") as file: - file.write(data) - - z.close() + def do_import() -> str: + col_path = mw.pm.collectionPath() + media_folder = os.path.join(mw.pm.profileFolder(), "collection.media") + return mw.backend.restore_backup( + col_path=col_path, backup_path=filename, media_folder=media_folder + ) def on_done(future: Future) -> None: mw.progress.finish() + timer.deleteLater() try: - future.result() - except Exception as e: - print(e) - showWarning(tr.importing_the_provided_file_is_not_a()) - return - - if not mw.loadCollection(): - return - if backup: - mw.col.mod_schema(check=False) - - tooltip(tr.importing_importing_complete()) + soft_error = future.result() + except Exception as error: + if not isinstance(error, Interrupted): + showWarning(str(error)) + callback(False) + else: + if soft_error: + showWarning(soft_error) + callback(True) + qconnect(timer.timeout, on_progress) + timer.start() mw.taskman.run_in_background(do_import, on_done) diff --git a/qt/aqt/main.py b/qt/aqt/main.py index 619506fa6..fcddb0ec9 100644 --- a/qt/aqt/main.py +++ b/qt/aqt/main.py @@ -7,12 +7,9 @@ import gc import os import re import signal -import time import weakref -import zipfile from argparse import Namespace from concurrent.futures import Future -from threading import Thread from typing import Any, Literal, Sequence, TextIO, TypeVar, cast import anki @@ -28,7 +25,7 @@ import aqt.sound import aqt.stats import aqt.toolbar import aqt.webview -from anki import hooks +from anki import collection_pb2, hooks from anki._backend import RustBackend as _RustBackend from anki.collection import Collection, Config, OpChanges, UndoStatus from anki.decks import DeckDict, DeckId @@ -267,7 +264,7 @@ class AnkiQt(QMainWindow): self.pm.save() self.pendingImport: str | None = None - self.restoringBackup = False + self.restoring_backup = False # profile not provided on command line? if not self.pm.name: # if there's a single profile, load it automatically @@ -328,11 +325,16 @@ class AnkiQt(QMainWindow): self.pm.load(name) return - def onOpenProfile(self) -> None: + def onOpenProfile(self, *, callback: Callable[[], None] | None = None) -> None: + def on_done() -> None: + self.profileDiag.closeWithoutQuitting() + if callback: + callback() + self.profileDiag.hide() # code flow is confusing here - if load fails, profile dialog # will be shown again - self.loadProfile(self.profileDiag.closeWithoutQuitting) + self.loadProfile(on_done) def profileNameOk(self, name: str) -> bool: return not checkInvalidFilename(name) and name != "addons21" @@ -398,19 +400,15 @@ class AnkiQt(QMainWindow): ) def _openBackup(self, path: str) -> None: - try: - # move the existing collection to the trash, as it may not open - self.pm.trashCollection() - except: - showWarning(tr.qt_misc_unable_to_move_existing_file_to()) - return + def on_done(success: bool) -> None: + if success: + self.onOpenProfile(callback=lambda: self.col.mod_schema(check=False)) - self.pendingImport = path - self.restoringBackup = True + import aqt.importing + self.restoring_backup = True showInfo(tr.qt_misc_automatic_syncing_and_backups_have_been()) - - self.onOpenProfile() + aqt.importing.replace_with_apkg(self, path, on_done) def _on_downgrade(self) -> None: self.progress.start() @@ -483,7 +481,7 @@ class AnkiQt(QMainWindow): self.pm.save() self.hide() - self.restoringBackup = False + self.restoring_backup = False # at this point there should be no windows left self._checkForUnclosedWidgets() @@ -506,6 +504,8 @@ class AnkiQt(QMainWindow): def cleanupAndExit(self) -> None: self.errorHandler.unload() self.mediaServer.shutdown() + # Rust background jobs are not awaited implicitly + self.backend.await_backup_completion() self.app.exit(0) # Sound/video @@ -546,7 +546,10 @@ class AnkiQt(QMainWindow): ) # clean up open collection if possible try: - self.backend.close_collection(False) + request = collection_pb2.CloseCollectionRequest( + downgrade_to_schema11=False, backup_folder=None + ) + self.backend.close_collection(request) except Exception as e: print("unable to close collection:", e) self.col = None @@ -593,35 +596,43 @@ class AnkiQt(QMainWindow): def _unloadCollection(self) -> None: if not self.col: return - if self.restoringBackup: - label = tr.qt_misc_closing() - else: - label = tr.qt_misc_backing_up() + + label = ( + tr.qt_misc_closing() if self.restoring_backup else tr.qt_misc_backing_up() + ) self.progress.start(label=label) + corrupt = False + try: self.maybeOptimize() if not dev_mode: corrupt = self.col.db.scalar("pragma quick_check") != "ok" except: corrupt = True + + if corrupt or dev_mode or self.restoring_backup: + backup_folder = None + else: + backup_folder = self.pm.backupFolder() try: - self.col.close(downgrade=False) + self.col.close(downgrade=False, backup_folder=backup_folder) except Exception as e: print(e) corrupt = True finally: self.col = None self.progress.finish() + if corrupt: showWarning(tr.qt_misc_your_collection_file_appears_to_be()) - if not corrupt and not self.restoringBackup: - self.backup() def _close_for_full_download(self) -> None: "Backup and prepare collection to be overwritten." - self.col.close(downgrade=False) - self.backup() + backup_folder = None if dev_mode else self.pm.backupFolder() + self.col.close( + downgrade=False, backup_folder=backup_folder, minimum_backup_interval=0 + ) self.col.reopen(after_full_sync=False) self.col.close_for_full_sync() @@ -631,63 +642,9 @@ class AnkiQt(QMainWindow): Config.Bool.INTERRUPT_AUDIO_WHEN_ANSWERING ) - # Backup and auto-optimize + # Auto-optimize ########################################################################## - class BackupThread(Thread): - def __init__(self, path: str, data: bytes) -> None: - Thread.__init__(self) - self.path = path - self.data = data - # create the file in calling thread to ensure the same - # file is not created twice - with open(self.path, "wb") as file: - pass - - def run(self) -> None: - z = zipfile.ZipFile(self.path, "w", zipfile.ZIP_STORED) - z.writestr("collection.anki2", self.data) - z.writestr("media", "{}") - z.close() - - def backup(self) -> None: - "Read data into memory, and complete backup on a background thread." - if self.col and self.col.db: - raise Exception("collection must be closed") - - nbacks = self.pm.profile["numBackups"] - if not nbacks or dev_mode: - return - dir = self.pm.backupFolder() - path = self.pm.collectionPath() - - # do backup - fname = time.strftime( - "backup-%Y-%m-%d-%H.%M.%S.colpkg", time.localtime(time.time()) - ) - newpath = os.path.join(dir, fname) - with open(path, "rb") as f: - data = f.read() - self.BackupThread(newpath, data).start() - - # find existing backups - backups = [] - for file in os.listdir(dir): - # only look for new-style format - m = re.match(r"backup-\d{4}-\d{2}-.+.colpkg", file) - if not m: - continue - backups.append(file) - backups.sort() - - # remove old ones - while len(backups) > nbacks: - fname = backups.pop(0) - path = os.path.join(dir, fname) - os.unlink(path) - - self.taskman.run_on_main(gui_hooks.backup_did_complete) - def maybeOptimize(self) -> None: # have two weeks passed? if (int_time() - self.pm.profile["lastOptimize"]) < 86400 * 14: @@ -1030,7 +987,7 @@ title="{}" {}>{}""".format( self.pm.auto_syncing_enabled() and bool(self.pm.sync_auth()) and not self.safeMode - and not self.restoringBackup + and not self.restoring_backup ) # legacy diff --git a/qt/aqt/mediacheck.py b/qt/aqt/mediacheck.py index db88028bf..c95f2a953 100644 --- a/qt/aqt/mediacheck.py +++ b/qt/aqt/mediacheck.py @@ -56,7 +56,7 @@ class MediaChecker: def _set_progress_enabled(self, enabled: bool) -> None: if self._progress_timer: - self._progress_timer.stop() + self._progress_timer.deleteLater() self._progress_timer = None if enabled: self._progress_timer = timer = QTimer() diff --git a/qt/aqt/preferences.py b/qt/aqt/preferences.py index 952d026d8..a921b8d62 100644 --- a/qt/aqt/preferences.py +++ b/qt/aqt/preferences.py @@ -97,6 +97,13 @@ class Preferences(QDialog): form.pastePNG.setChecked(editing.paste_images_as_png) form.default_search_text.setText(editing.default_search_text) + form.backup_explanation.setText( + anki.lang.with_collapsed_whitespace(tr.preferences_backup_explanation()) + ) + form.daily_backups.setValue(self.prefs.backups.daily) + form.weekly_backups.setValue(self.prefs.backups.weekly) + form.monthly_backups.setValue(self.prefs.backups.monthly) + def update_collection(self, on_done: Callable[[], None]) -> None: form = self.form @@ -123,6 +130,10 @@ class Preferences(QDialog): self.form.ignore_accents_in_search.isChecked() ) + self.prefs.backups.daily = form.daily_backups.value() + self.prefs.backups.weekly = form.weekly_backups.value() + self.prefs.backups.monthly = form.monthly_backups.value() + def after_prefs_update(changes: OpChanges) -> None: self.mw.apply_collection_options() if scheduling.scheduler_version > 1: @@ -142,11 +153,9 @@ class Preferences(QDialog): def setup_profile(self) -> None: "Setup options stored in the user profile." self.setup_network() - self.setup_backup() def update_profile(self) -> None: self.update_network() - self.update_backup() # Profile: network ###################################################################### @@ -191,15 +200,6 @@ class Preferences(QDialog): if self.form.fullSync.isChecked(): self.mw.col.mod_schema(check=False) - # Profile: backup - ###################################################################### - - def setup_backup(self) -> None: - self.form.numBackups.setValue(self.prof["numBackups"]) - - def update_backup(self) -> None: - self.prof["numBackups"] = self.form.numBackups.value() - # Global preferences ###################################################################### diff --git a/rslib/BUILD.bazel b/rslib/BUILD.bazel index b5f3b8eda..98c57a69c 100644 --- a/rslib/BUILD.bazel +++ b/rslib/BUILD.bazel @@ -87,6 +87,7 @@ rust_library( "//rslib/cargo:itertools", "//rslib/cargo:lazy_static", "//rslib/cargo:nom", + "//rslib/cargo:num_cpus", "//rslib/cargo:num_enum", "//rslib/cargo:num_integer", "//rslib/cargo:once_cell", @@ -116,6 +117,7 @@ rust_library( "//rslib/cargo:unic_ucd_category", "//rslib/cargo:utime", "//rslib/cargo:zip", + "//rslib/cargo:zstd", "//rslib/cargo:pct_str", "//rslib/i18n:anki_i18n", ] + select({ diff --git a/rslib/Cargo.toml b/rslib/Cargo.toml index ea044f5a4..2dfd7fe5f 100644 --- a/rslib/Cargo.toml +++ b/rslib/Cargo.toml @@ -98,3 +98,5 @@ tokio-util = { version = "0.6.8", features = ["io"] } pct-str = { git="https://github.com/timothee-haudebourg/pct-str.git", rev="4adccd8d4a222ab2672350a102f06ae832a0572d" } unic-ucd-category = "0.9.0" id_tree = "1.8.0" +zstd = { version="0.10.0+zstd.1.5.2", features=["zstdmt"] } +num_cpus = "1.13.1" diff --git a/rslib/build/protobuf.rs b/rslib/build/protobuf.rs index 7b16a06af..246d003f9 100644 --- a/rslib/build/protobuf.rs +++ b/rslib/build/protobuf.rs @@ -106,6 +106,10 @@ pub fn write_backend_proto_rs() { "#[derive(strum::EnumIter)]", ) .type_attribute("HelpPageLinkRequest.HelpPage", "#[derive(strum::EnumIter)]") + .type_attribute( + "Preferences.Backups", + "#[derive(Copy, serde_derive::Deserialize, serde_derive::Serialize)]", + ) .compile_protos(paths.as_slice(), &[proto_dir]) .unwrap(); } diff --git a/rslib/cargo/BUILD.bazel b/rslib/cargo/BUILD.bazel index 5c53f75b4..9ccdfc3cb 100644 --- a/rslib/cargo/BUILD.bazel +++ b/rslib/cargo/BUILD.bazel @@ -210,6 +210,15 @@ alias( ], ) +alias( + name = "num_cpus", + actual = "@raze__num_cpus__1_13_1//:num_cpus", + tags = [ + "cargo-raze", + "manual", + ], +) + alias( name = "num_enum", actual = "@raze__num_enum__0_5_6//:num_enum", @@ -560,3 +569,12 @@ alias( "manual", ], ) + +alias( + name = "zstd", + actual = "@raze__zstd__0_10_0_zstd_1_5_2//:zstd", + tags = [ + "cargo-raze", + "manual", + ], +) diff --git a/rslib/i18n/cargo/BUILD.bazel b/rslib/i18n/cargo/BUILD.bazel index 5c53f75b4..9ccdfc3cb 100644 --- a/rslib/i18n/cargo/BUILD.bazel +++ b/rslib/i18n/cargo/BUILD.bazel @@ -210,6 +210,15 @@ alias( ], ) +alias( + name = "num_cpus", + actual = "@raze__num_cpus__1_13_1//:num_cpus", + tags = [ + "cargo-raze", + "manual", + ], +) + alias( name = "num_enum", actual = "@raze__num_enum__0_5_6//:num_enum", @@ -560,3 +569,12 @@ alias( "manual", ], ) + +alias( + name = "zstd", + actual = "@raze__zstd__0_10_0_zstd_1_5_2//:zstd", + tags = [ + "cargo-raze", + "manual", + ], +) diff --git a/rslib/i18n_helpers/cargo/BUILD.bazel b/rslib/i18n_helpers/cargo/BUILD.bazel index 5c53f75b4..9ccdfc3cb 100644 --- a/rslib/i18n_helpers/cargo/BUILD.bazel +++ b/rslib/i18n_helpers/cargo/BUILD.bazel @@ -210,6 +210,15 @@ alias( ], ) +alias( + name = "num_cpus", + actual = "@raze__num_cpus__1_13_1//:num_cpus", + tags = [ + "cargo-raze", + "manual", + ], +) + alias( name = "num_enum", actual = "@raze__num_enum__0_5_6//:num_enum", @@ -560,3 +569,12 @@ alias( "manual", ], ) + +alias( + name = "zstd", + actual = "@raze__zstd__0_10_0_zstd_1_5_2//:zstd", + tags = [ + "cargo-raze", + "manual", + ], +) diff --git a/rslib/linkchecker/cargo/BUILD.bazel b/rslib/linkchecker/cargo/BUILD.bazel index 5c53f75b4..9ccdfc3cb 100644 --- a/rslib/linkchecker/cargo/BUILD.bazel +++ b/rslib/linkchecker/cargo/BUILD.bazel @@ -210,6 +210,15 @@ alias( ], ) +alias( + name = "num_cpus", + actual = "@raze__num_cpus__1_13_1//:num_cpus", + tags = [ + "cargo-raze", + "manual", + ], +) + alias( name = "num_enum", actual = "@raze__num_enum__0_5_6//:num_enum", @@ -560,3 +569,12 @@ alias( "manual", ], ) + +alias( + name = "zstd", + actual = "@raze__zstd__0_10_0_zstd_1_5_2//:zstd", + tags = [ + "cargo-raze", + "manual", + ], +) diff --git a/rslib/src/backend/collection.rs b/rslib/src/backend/collection.rs index a92ffa11b..9e9672067 100644 --- a/rslib/src/backend/collection.rs +++ b/rslib/src/backend/collection.rs @@ -1,14 +1,19 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +use std::path::Path; + use slog::error; use super::{progress::Progress, Backend}; pub(super) use crate::backend_proto::collection_service::Service as CollectionService; use crate::{ backend::progress::progress_to_proto, - backend_proto as pb, - collection::CollectionBuilder, + backend_proto::{self as pb, preferences::Backups}, + collection::{ + backup::{self, ImportProgress}, + CollectionBuilder, + }, log::{self}, prelude::*, }; @@ -37,6 +42,8 @@ impl CollectionService for Backend { .set_tr(self.tr.clone()); if !input.log_path.is_empty() { builder.set_log_file(&input.log_path)?; + } else { + builder.set_logger(self.log.clone()); } *col = Some(builder.build()?); @@ -52,7 +59,10 @@ impl CollectionService for Backend { return Err(AnkiError::CollectionNotOpen); } - let col_inner = col.take().unwrap(); + let mut col_inner = col.take().unwrap(); + let limits = col_inner.get_backups(); + let col_path = std::mem::take(&mut col_inner.col_path); + if input.downgrade_to_schema11 { let log = log::terminal(); if let Err(e) = col_inner.close(input.downgrade_to_schema11) { @@ -60,9 +70,44 @@ impl CollectionService for Backend { } } + if let Some(backup_folder) = input.backup_folder { + self.start_backup( + col_path, + backup_folder, + limits, + input.minimum_backup_interval, + )?; + } + Ok(().into()) } + fn restore_backup(&self, input: pb::RestoreBackupRequest) -> Result { + let col = self.col.lock().unwrap(); + if col.is_some() { + Err(AnkiError::CollectionAlreadyOpen) + } else { + let mut handler = self.new_progress_handler(); + let progress_fn = move |progress| { + let throttle = matches!(progress, ImportProgress::Media(_)); + if handler.update(Progress::Import(progress), throttle) { + Ok(()) + } else { + Err(AnkiError::Interrupted) + } + }; + + backup::restore_backup( + progress_fn, + &input.col_path, + &input.backup_path, + &input.media_folder, + &self.tr, + ) + .map(Into::into) + } + } + fn check_database(&self, _input: pb::Empty) -> Result { let mut handler = self.new_progress_handler(); let progress_fn = move |progress, throttle| { @@ -97,4 +142,36 @@ impl CollectionService for Backend { self.with_col(|col| col.merge_undoable_ops(starting_from)) .map(Into::into) } + + fn await_backup_completion(&self, _input: pb::Empty) -> Result { + self.await_backup_completion(); + Ok(().into()) + } +} + +impl Backend { + fn await_backup_completion(&self) { + if let Some(task) = self.backup_task.lock().unwrap().take() { + task.join().unwrap(); + } + } + + fn start_backup( + &self, + col_path: impl AsRef, + backup_folder: impl AsRef + Send + 'static, + limits: Backups, + minimum_backup_interval: Option, + ) -> Result<()> { + self.await_backup_completion(); + *self.backup_task.lock().unwrap() = backup::backup( + col_path, + backup_folder, + limits, + minimum_backup_interval, + self.log.clone(), + )?; + + Ok(()) + } } diff --git a/rslib/src/backend/error.rs b/rslib/src/backend/error.rs index 382762b79..116878c2f 100644 --- a/rslib/src/backend/error.rs +++ b/rslib/src/backend/error.rs @@ -34,6 +34,8 @@ impl AnkiError { AnkiError::MultipleNotetypesSelected => Kind::InvalidInput, AnkiError::DatabaseCheckRequired => Kind::InvalidInput, AnkiError::CustomStudyError(_) => Kind::CustomStudyError, + AnkiError::ImportError(_) => Kind::ImportError, + AnkiError::FileIoError(_) => Kind::IoError, }; pb::BackendError { diff --git a/rslib/src/backend/mod.rs b/rslib/src/backend/mod.rs index ad1557e8b..8cfa8dc69 100644 --- a/rslib/src/backend/mod.rs +++ b/rslib/src/backend/mod.rs @@ -30,11 +30,13 @@ mod tags; use std::{ result, sync::{Arc, Mutex}, + thread::JoinHandle, }; use once_cell::sync::OnceCell; use progress::AbortHandleSlot; use prost::Message; +use slog::Logger; use tokio::runtime::{self, Runtime}; use self::{ @@ -62,6 +64,7 @@ use crate::{ collection::Collection, error::{AnkiError, Result}, i18n::I18n, + log, }; pub struct Backend { @@ -71,7 +74,9 @@ pub struct Backend { sync_abort: AbortHandleSlot, progress_state: Arc>, runtime: OnceCell, + log: Logger, state: Arc>, + backup_task: Arc>>>, } #[derive(Default)] @@ -79,19 +84,20 @@ struct BackendState { sync: SyncState, } -pub fn init_backend(init_msg: &[u8]) -> std::result::Result { +pub fn init_backend(init_msg: &[u8], log: Option) -> std::result::Result { let input: pb::BackendInit = match pb::BackendInit::decode(init_msg) { Ok(req) => req, Err(_) => return Err("couldn't decode init request".into()), }; let tr = I18n::new(&input.preferred_langs); + let log = log.unwrap_or_else(log::terminal); - Ok(Backend::new(tr, input.server)) + Ok(Backend::new(tr, input.server, log)) } impl Backend { - pub fn new(tr: I18n, server: bool) -> Backend { + pub fn new(tr: I18n, server: bool, log: Logger) -> Backend { Backend { col: Arc::new(Mutex::new(None)), tr, @@ -102,7 +108,9 @@ impl Backend { last_progress: None, })), runtime: OnceCell::new(), + log, state: Arc::new(Mutex::new(BackendState::default())), + backup_task: Arc::new(Mutex::new(None)), } } diff --git a/rslib/src/backend/progress.rs b/rslib/src/backend/progress.rs index e4e334559..271662270 100644 --- a/rslib/src/backend/progress.rs +++ b/rslib/src/backend/progress.rs @@ -8,6 +8,7 @@ use futures::future::AbortHandle; use super::Backend; use crate::{ backend_proto as pb, + collection::backup::ImportProgress, dbcheck::DatabaseCheckProgress, i18n::I18n, media::sync::MediaSyncProgress, @@ -50,6 +51,7 @@ pub(super) enum Progress { FullSync(FullSyncProgress), NormalSync(NormalSyncProgress), DatabaseCheck(DatabaseCheckProgress), + Import(ImportProgress), } pub(super) fn progress_to_proto(progress: Option, tr: &I18n) -> pb::Progress { @@ -103,6 +105,13 @@ pub(super) fn progress_to_proto(progress: Option, tr: &I18n) -> pb::Pr stage_current, }) } + Progress::Import(progress) => pb::progress::Value::Importing( + match progress { + ImportProgress::Collection => tr.importing_importing_collection(), + ImportProgress::Media(n) => tr.importing_imported_media_file(n), + } + .into(), + ), } } else { pb::progress::Value::None(pb::Empty {}) diff --git a/rslib/src/collection/backup.rs b/rslib/src/collection/backup.rs new file mode 100644 index 000000000..b1db8a3fe --- /dev/null +++ b/rslib/src/collection/backup.rs @@ -0,0 +1,542 @@ +// Copyright: Ankitects Pty Ltd and contributors +// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html + +use std::{ + collections::HashMap, + ffi::OsStr, + fs::{self, read_dir, remove_file, DirEntry, File}, + io::{self, Read, Write}, + path::{Path, PathBuf}, + thread::{self, JoinHandle}, + time::SystemTime, +}; + +use chrono::prelude::*; +use itertools::Itertools; +use log::error; +use serde_derive::{Deserialize, Serialize}; +use tempfile::NamedTempFile; +use zip::{write::FileOptions, CompressionMethod, ZipArchive, ZipWriter}; +use zstd::{self, stream::copy_decode, Encoder}; + +use crate::{ + backend_proto::preferences::Backups, collection::CollectionBuilder, error::ImportError, log, + prelude::*, text::normalize_to_nfc, +}; + +/// Bump if making changes that break restoring on older releases. +const BACKUP_VERSION: u8 = 3; +const BACKUP_FORMAT_STRING: &str = "backup-%Y-%m-%d-%H.%M.%S.colpkg"; +/// Default seconds after a backup, in which further backups will be skipped. +const MINIMUM_BACKUP_INTERVAL: u64 = 5 * 60; +/// Enable multithreaded compression if over this size. For smaller files, +/// multithreading makes things slower, and in initial tests, the crossover +/// point was somewhere between 1MB and 10MB on a many-core system. +const MULTITHREAD_MIN_BYTES: usize = 10 * 1024 * 1024; + +#[derive(Debug, Default, Serialize, Deserialize)] +#[serde(default)] +struct Meta { + #[serde(rename = "ver")] + version: u8, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ImportProgress { + Collection, + Media(usize), +} + +pub fn backup( + col_path: impl AsRef, + backup_folder: impl AsRef + Send + 'static, + limits: Backups, + minimum_backup_interval: Option, + log: Logger, +) -> Result>> { + let recent_secs = minimum_backup_interval.unwrap_or(MINIMUM_BACKUP_INTERVAL); + if recent_secs > 0 && has_recent_backup(backup_folder.as_ref(), recent_secs)? { + Ok(None) + } else { + let col_data = std::fs::read(col_path)?; + Ok(Some(thread::spawn(move || { + backup_inner(&col_data, &backup_folder, limits, log) + }))) + } +} + +fn has_recent_backup(backup_folder: &Path, recent_secs: u64) -> Result { + let now = SystemTime::now(); + Ok(read_dir(backup_folder)? + .filter_map(|res| res.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter_map(|meta| meta.created().ok()) + .filter_map(|time| now.duration_since(time).ok()) + .any(|duration| duration.as_secs() < recent_secs)) +} + +pub fn restore_backup( + mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, + col_path: &str, + backup_path: &str, + media_folder: &str, + tr: &I18n, +) -> Result { + progress_fn(ImportProgress::Collection)?; + let col_path = PathBuf::from(col_path); + let col_dir = col_path + .parent() + .ok_or_else(|| AnkiError::invalid_input("bad collection path"))?; + let mut tempfile = NamedTempFile::new_in(col_dir)?; + + let backup_file = File::open(backup_path)?; + let mut archive = ZipArchive::new(backup_file)?; + let meta = Meta::from_archive(&mut archive)?; + + copy_collection(&mut archive, &mut tempfile, meta)?; + progress_fn(ImportProgress::Collection)?; + check_collection(tempfile.path())?; + progress_fn(ImportProgress::Collection)?; + + let mut result = String::new(); + if let Err(e) = restore_media(progress_fn, &mut archive, media_folder) { + result = tr + .importing_failed_to_import_media_file(e.localized_description(tr)) + .into_owned() + }; + + tempfile.as_file().sync_all()?; + tempfile.persist(&col_path).map_err(|err| err.error)?; + if !cfg!(windows) { + File::open(col_dir)?.sync_all()?; + } + + Ok(result) +} + +fn backup_inner>(col_data: &[u8], backup_folder: P, limits: Backups, log: Logger) { + if let Err(error) = write_backup(col_data, backup_folder.as_ref()) { + error!(log, "failed to backup collection: {error:?}"); + } + if let Err(error) = thin_backups(backup_folder, limits, &log) { + error!(log, "failed to thin backups: {error:?}"); + } +} + +fn write_backup>(mut col_data: &[u8], backup_folder: S) -> Result<()> { + let out_file = File::create(out_path(backup_folder))?; + let mut zip = ZipWriter::new(out_file); + let options = FileOptions::default().compression_method(CompressionMethod::Stored); + let meta = serde_json::to_string(&Meta { + version: BACKUP_VERSION, + }) + .unwrap(); + + zip.start_file("meta", options)?; + zip.write_all(meta.as_bytes())?; + zip.start_file("collection.anki21b", options)?; + let col_data_len = col_data.len(); + zstd_copy(&mut col_data, &mut zip, col_data_len)?; + zip.start_file("media", options)?; + zip.write_all(b"{}")?; + zip.finish()?; + + Ok(()) +} + +/// Copy contents of reader into writer, compressing as we copy. +fn zstd_copy(reader: &mut R, writer: &mut W, size: usize) -> Result<()> { + let mut encoder = Encoder::new(writer, 0)?; + if size > MULTITHREAD_MIN_BYTES { + encoder.multithread(num_cpus::get() as u32)?; + } + io::copy(reader, &mut encoder)?; + encoder.finish()?; + Ok(()) +} + +fn thin_backups>(backup_folder: P, limits: Backups, log: &Logger) -> Result<()> { + let backups = + read_dir(backup_folder)?.filter_map(|entry| entry.ok().and_then(Backup::from_entry)); + let obsolete_backups = BackupFilter::new(Local::today(), limits).obsolete_backups(backups); + for backup in obsolete_backups { + if let Err(error) = remove_file(&backup.path) { + error!(log, "failed to remove {:?}: {error:?}", &backup.path); + }; + } + + Ok(()) +} + +fn out_path>(backup_folder: S) -> PathBuf { + Path::new(&backup_folder).join(&format!("{}", Local::now().format(BACKUP_FORMAT_STRING))) +} + +fn datetime_from_file_name(file_name: &str) -> Option> { + NaiveDateTime::parse_from_str(file_name, BACKUP_FORMAT_STRING) + .ok() + .and_then(|datetime| Local.from_local_datetime(&datetime).latest()) +} + +#[derive(Debug, PartialEq, Clone)] +struct Backup { + path: PathBuf, + datetime: DateTime, +} + +impl Backup { + /// Serial day number + fn day(&self) -> i32 { + self.datetime.num_days_from_ce() + } + + /// Serial week number, starting on Monday + fn week(&self) -> i32 { + // Day 1 (01/01/01) was a Monday, meaning week rolled over on Sunday (when day % 7 == 0). + // We subtract 1 to shift the rollover to Monday. + (self.day() - 1) / 7 + } + + /// Serial month number + fn month(&self) -> u32 { + self.datetime.year() as u32 * 12 + self.datetime.month() + } +} + +impl Backup { + fn from_entry(entry: DirEntry) -> Option { + entry + .file_name() + .to_str() + .and_then(datetime_from_file_name) + .map(|datetime| Self { + path: entry.path(), + datetime, + }) + } +} + +#[derive(Debug)] +struct BackupFilter { + yesterday: i32, + last_kept_day: i32, + last_kept_week: i32, + last_kept_month: u32, + limits: Backups, + obsolete: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum BackupStage { + Daily, + Weekly, + Monthly, +} + +impl BackupFilter { + fn new(today: Date, limits: Backups) -> Self { + Self { + yesterday: today.num_days_from_ce() - 1, + last_kept_day: i32::MAX, + last_kept_week: i32::MAX, + last_kept_month: u32::MAX, + limits, + obsolete: Vec::new(), + } + } + + fn obsolete_backups(mut self, backups: impl Iterator) -> Vec { + use BackupStage::*; + + for backup in backups + .sorted_unstable_by_key(|b| b.datetime.timestamp()) + .rev() + { + if self.is_recent(&backup) { + self.mark_fresh(None, backup); + } else if self.remaining(Daily) { + self.mark_fresh_or_obsolete(Daily, backup); + } else if self.remaining(Weekly) { + self.mark_fresh_or_obsolete(Weekly, backup); + } else if self.remaining(Monthly) { + self.mark_fresh_or_obsolete(Monthly, backup); + } else { + self.mark_obsolete(backup); + } + } + + self.obsolete + } + + fn is_recent(&self, backup: &Backup) -> bool { + backup.day() >= self.yesterday + } + + fn remaining(&self, stage: BackupStage) -> bool { + match stage { + BackupStage::Daily => self.limits.daily > 0, + BackupStage::Weekly => self.limits.weekly > 0, + BackupStage::Monthly => self.limits.monthly > 0, + } + } + + fn mark_fresh_or_obsolete(&mut self, stage: BackupStage, backup: Backup) { + let keep = match stage { + BackupStage::Daily => backup.day() < self.last_kept_day, + BackupStage::Weekly => backup.week() < self.last_kept_week, + BackupStage::Monthly => backup.month() < self.last_kept_month, + }; + if keep { + self.mark_fresh(Some(stage), backup); + } else { + self.mark_obsolete(backup); + } + } + + /// Adjusts limits as per the stage of the kept backup, and last kept times. + fn mark_fresh(&mut self, stage: Option, backup: Backup) { + self.last_kept_day = backup.day(); + self.last_kept_week = backup.week(); + self.last_kept_month = backup.month(); + match stage { + None => (), + Some(BackupStage::Daily) => self.limits.daily -= 1, + Some(BackupStage::Weekly) => self.limits.weekly -= 1, + Some(BackupStage::Monthly) => self.limits.monthly -= 1, + } + } + + fn mark_obsolete(&mut self, backup: Backup) { + self.obsolete.push(backup); + } +} + +impl Meta { + /// Extracts meta data from an archive and checks if its version is supported. + fn from_archive(archive: &mut ZipArchive) -> Result { + let mut meta: Self = archive + .by_name("meta") + .ok() + .and_then(|file| serde_json::from_reader(file).ok()) + .unwrap_or_default(); + if meta.version > BACKUP_VERSION { + return Err(AnkiError::ImportError(ImportError::TooNew)); + } else if meta.version == 0 { + meta.version = if archive.by_name("collection.anki21").is_ok() { + 2 + } else { + 1 + }; + } + + Ok(meta) + } + + fn collection_name(&self) -> &'static str { + match self.version { + 1 => "collection.anki2", + 2 => "collection.anki21", + _ => "collection.anki21b", + } + } +} + +fn check_collection(col_path: &Path) -> Result<()> { + CollectionBuilder::new(col_path) + .build() + .ok() + .and_then(|col| { + col.storage + .db + .pragma_query_value(None, "integrity_check", |row| row.get::<_, String>(0)) + .ok() + }) + .and_then(|s| (s == "ok").then(|| ())) + .ok_or(AnkiError::ImportError(ImportError::Corrupt)) +} + +fn restore_media( + mut progress_fn: impl FnMut(ImportProgress) -> Result<()>, + archive: &mut ZipArchive, + media_folder: &str, +) -> Result<()> { + let media_file_names = extract_media_file_names(archive).ok_or(AnkiError::NotFound)?; + let mut count = 0; + + for (archive_file_name, file_name) in media_file_names { + count += 1; + if count % 10 == 0 { + progress_fn(ImportProgress::Media(count))?; + } + + if let Ok(mut zip_file) = archive.by_name(&archive_file_name) { + let file_path = Path::new(&media_folder).join(normalize_to_nfc(&file_name).as_ref()); + let files_are_equal = fs::metadata(&file_path) + .map(|metadata| metadata.len() == zip_file.size()) + .unwrap_or_default(); + if !files_are_equal { + let mut file = match File::create(&file_path) { + Ok(file) => file, + Err(err) => return Err(AnkiError::file_io_error(err, &file_path)), + }; + if let Err(err) = io::copy(&mut zip_file, &mut file) { + return Err(AnkiError::file_io_error(err, &file_path)); + } + } + } else { + return Err(AnkiError::invalid_input(&format!( + "{archive_file_name} missing from archive" + ))); + } + } + Ok(()) +} + +fn extract_media_file_names(archive: &mut ZipArchive) -> Option> { + archive + .by_name("media") + .ok() + .and_then(|mut file| { + let mut buf = Vec::new(); + file.read_to_end(&mut buf).ok().map(|_| buf) + }) + .and_then(|bytes| serde_json::from_slice(&bytes).ok()) +} + +fn copy_collection( + archive: &mut ZipArchive, + writer: &mut impl Write, + meta: Meta, +) -> Result<()> { + let mut file = archive + .by_name(meta.collection_name()) + .map_err(|_| AnkiError::ImportError(ImportError::Corrupt))?; + if meta.version < 3 { + io::copy(&mut file, writer)?; + } else { + copy_decode(file, writer)?; + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + + macro_rules! backup { + ($num_days_from_ce:expr) => { + Backup { + datetime: Local + .from_local_datetime( + &NaiveDate::from_num_days_from_ce($num_days_from_ce).and_hms(0, 0, 0), + ) + .latest() + .unwrap(), + path: PathBuf::new(), + } + }; + ($year:expr, $month:expr, $day:expr) => { + Backup { + datetime: Local.ymd($year, $month, $day).and_hms(0, 0, 0), + path: PathBuf::new(), + } + }; + ($year:expr, $month:expr, $day:expr, $hour:expr, $min:expr, $sec:expr) => { + Backup { + datetime: Local.ymd($year, $month, $day).and_hms($hour, $min, $sec), + path: PathBuf::new(), + } + }; + } + + #[test] + fn thinning_manual() { + let today = Local.ymd(2022, 2, 22); + let limits = Backups { + daily: 3, + weekly: 2, + monthly: 1, + }; + + // true => should be removed + let backups = [ + // grace period + (backup!(2022, 2, 22), false), + (backup!(2022, 2, 22), false), + (backup!(2022, 2, 21), false), + // daily + (backup!(2022, 2, 20, 6, 0, 0), true), + (backup!(2022, 2, 20, 18, 0, 0), false), + (backup!(2022, 2, 10), false), + (backup!(2022, 2, 9), false), + // weekly + (backup!(2022, 2, 7), true), // Monday, week already backed up + (backup!(2022, 2, 6, 1, 0, 0), true), + (backup!(2022, 2, 6, 2, 0, 0), false), + (backup!(2022, 1, 6), false), + // monthly + (backup!(2022, 1, 5), true), + (backup!(2021, 12, 24), false), + (backup!(2021, 12, 1), true), + (backup!(2021, 11, 1), true), + ]; + + let expected: Vec<_> = backups + .iter() + .filter_map(|b| b.1.then(|| b.0.clone())) + .collect(); + let obsolete_backups = + BackupFilter::new(today, limits).obsolete_backups(backups.into_iter().map(|b| b.0)); + + assert_eq!(obsolete_backups, expected); + } + + #[test] + fn thinning_generic() { + let today = Local.ymd(2022, 1, 1); + let today_ce_days = today.num_days_from_ce(); + let limits = Backups { + // config defaults + daily: 12, + weekly: 10, + monthly: 9, + }; + let backups: Vec<_> = (1..366).map(|i| backup!(today_ce_days - i)).collect(); + let mut expected = Vec::new(); + + // one day grace period, then daily backups + let mut backup_iter = backups.iter().skip(1 + limits.daily as usize); + + // weekly backups from the last day of the week (Sunday) + for _ in 0..limits.weekly { + for backup in backup_iter.by_ref() { + if backup.datetime.weekday() == Weekday::Sun { + break; + } else { + expected.push(backup.clone()) + } + } + } + + // monthly backups from the last day of the month + for _ in 0..limits.monthly { + for backup in backup_iter.by_ref() { + if backup.datetime.date().month() != backup.datetime.date().succ().month() { + break; + } else { + expected.push(backup.clone()) + } + } + } + + // limits reached; collect rest + backup_iter + .cloned() + .for_each(|backup| expected.push(backup)); + + let obsolete_backups = + BackupFilter::new(today, limits).obsolete_backups(backups.into_iter()); + assert_eq!(obsolete_backups, expected); + } +} diff --git a/rslib/src/collection/mod.rs b/rslib/src/collection/mod.rs index e991cea87..b5d8fa4f2 100644 --- a/rslib/src/collection/mod.rs +++ b/rslib/src/collection/mod.rs @@ -1,6 +1,7 @@ // Copyright: Ankitects Pty Ltd and contributors // License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +pub mod backup; pub(crate) mod timestamps; mod transact; pub(crate) mod undo; diff --git a/rslib/src/config/mod.rs b/rslib/src/config/mod.rs index 72490e1ac..955d65832 100644 --- a/rslib/src/config/mod.rs +++ b/rslib/src/config/mod.rs @@ -14,7 +14,7 @@ use slog::warn; use strum::IntoStaticStr; pub use self::{bool::BoolKey, notetype::get_aux_notetype_config_key, string::StringKey}; -use crate::prelude::*; +use crate::{backend_proto::preferences::Backups, prelude::*}; /// Only used when updating/undoing. #[derive(Debug)] @@ -43,6 +43,7 @@ pub(crate) enum ConfigKey { FirstDayOfWeek, LocalOffset, Rollover, + Backups, #[strum(to_string = "timeLim")] AnswerTimeLimitSecs, @@ -262,6 +263,21 @@ impl Collection { self.set_config(ConfigKey::LastUnburiedDay, &day) .map(|_| ()) } + + pub(crate) fn get_backups(&self) -> Backups { + self.get_config_optional(ConfigKey::Backups).unwrap_or( + // 2d + 12d + 10w + 9m ≈ 1y + Backups { + daily: 12, + weekly: 10, + monthly: 9, + }, + ) + } + + pub(crate) fn set_backups(&mut self, limits: Backups) -> Result<()> { + self.set_config(ConfigKey::Backups, &limits).map(|_| ()) + } } // 2021 scheduler moves this into deck config diff --git a/rslib/src/error/mod.rs b/rslib/src/error/mod.rs index 0c67f36c7..ae20ad9f8 100644 --- a/rslib/src/error/mod.rs +++ b/rslib/src/error/mod.rs @@ -6,7 +6,7 @@ mod filtered; mod network; mod search; -use std::{fmt::Display, io}; +use std::{fmt::Display, io, path::Path}; pub use db::{DbError, DbErrorKind}; pub use filtered::{CustomStudyError, FilteredDeckError}; @@ -24,6 +24,7 @@ pub enum AnkiError { TemplateError(String), TemplateSaveError(TemplateSaveError), IoError(String), + FileIoError(FileIoError), DbError(DbError), NetworkError(NetworkError), SyncError(SyncError), @@ -42,6 +43,7 @@ pub enum AnkiError { MultipleNotetypesSelected, DatabaseCheckRequired, CustomStudyError(CustomStudyError), + ImportError(ImportError), } impl Display for AnkiError { @@ -96,6 +98,7 @@ impl AnkiError { AnkiError::MultipleNotetypesSelected => tr.errors_multiple_notetypes_selected().into(), AnkiError::DatabaseCheckRequired => tr.errors_please_check_database().into(), AnkiError::CustomStudyError(err) => err.localized_description(tr), + AnkiError::ImportError(err) => err.localized_description(tr), AnkiError::IoError(_) | AnkiError::JsonError(_) | AnkiError::ProtoError(_) @@ -105,6 +108,9 @@ impl AnkiError { | AnkiError::NotFound | AnkiError::Existing | AnkiError::UndoEmpty => format!("{:?}", self), + AnkiError::FileIoError(err) => { + format!("{}: {}", err.path, err.error) + } } } } @@ -176,3 +182,41 @@ pub enum TemplateSaveErrorDetails { MissingCloze, ExtraneousCloze, } + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum ImportError { + Corrupt, + TooNew, +} + +impl ImportError { + fn localized_description(self, tr: &I18n) -> String { + match self { + Self::Corrupt => tr.importing_the_provided_file_is_not_a(), + Self::TooNew => tr.errors_collection_too_new(), + } + .into() + } +} + +#[derive(Debug, PartialEq, Clone)] + +pub struct FileIoError { + pub path: String, + pub error: String, +} + +impl AnkiError { + pub(crate) fn file_io_error>(err: std::io::Error, path: P) -> Self { + AnkiError::FileIoError(FileIoError::new(err, path.as_ref())) + } +} + +impl FileIoError { + pub fn new(err: std::io::Error, path: &Path) -> FileIoError { + FileIoError { + path: path.to_string_lossy().to_string(), + error: err.to_string(), + } + } +} diff --git a/rslib/src/log.rs b/rslib/src/log.rs index 2c0cddd7b..b9ad49e65 100644 --- a/rslib/src/log.rs +++ b/rslib/src/log.rs @@ -81,7 +81,7 @@ fn maybe_rotate_log(path: &str) -> io::Result<()> { } /// Get a logger, logging to a file if a path was provided, otherwise terminal. -pub(crate) fn default_logger(path: Option<&str>) -> io::Result { +pub fn default_logger(path: Option<&str>) -> io::Result { Ok(match path { Some(path) => file(path)?, None => terminal(), diff --git a/rslib/src/preferences.rs b/rslib/src/preferences.rs index 6ac5dd5b4..be6bf75cf 100644 --- a/rslib/src/preferences.rs +++ b/rslib/src/preferences.rs @@ -19,6 +19,7 @@ impl Collection { scheduling: Some(self.get_scheduling_preferences()?), reviewing: Some(self.get_reviewing_preferences()?), editing: Some(self.get_editing_preferences()?), + backups: Some(self.get_backups()), }) } @@ -38,6 +39,9 @@ impl Collection { if let Some(editing) = prefs.editing { self.set_editing_preferences(editing)?; } + if let Some(backups) = prefs.backups { + self.set_backups(backups)?; + } Ok(()) } diff --git a/rslib/src/sync/mod.rs b/rslib/src/sync/mod.rs index 499ff3712..3d0a5054d 100644 --- a/rslib/src/sync/mod.rs +++ b/rslib/src/sync/mod.rs @@ -680,9 +680,14 @@ impl Collection { db.execute_batch("update col set ls=mod")?; drop(db); // overwrite existing collection atomically + out_file.as_file().sync_all()?; out_file .persist(&col_path) .map_err(|e| AnkiError::IoError(format!("download save failed: {}", e)))?; + if !cfg!(windows) { + std::fs::File::open(col_folder)?.sync_all()?; + } + Ok(()) }