mirror of
https://github.com/ankitects/anki.git
synced 2025-09-18 14:02:21 -04:00
gather field references in Rust; media check now mostly complete
This commit is contained in:
parent
aa832e9117
commit
fabfcb0338
9 changed files with 437 additions and 55 deletions
|
@ -29,6 +29,7 @@ log = "0.4.8"
|
|||
serde_tuple = "0.4.0"
|
||||
coarsetime = "0.1.12"
|
||||
utime = "0.2.1"
|
||||
serde-aux = "0.6.1"
|
||||
|
||||
[target.'cfg(target_vendor="apple")'.dependencies]
|
||||
rusqlite = { version = "0.21.0", features = ["trace"] }
|
||||
|
|
|
@ -17,3 +17,5 @@ pub mod sched;
|
|||
pub mod template;
|
||||
pub mod template_filters;
|
||||
pub mod text;
|
||||
pub mod time;
|
||||
pub mod types;
|
||||
|
|
|
@ -2,29 +2,37 @@
|
|||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use crate::err::{AnkiError, Result};
|
||||
use crate::media::col::{
|
||||
for_every_note, get_note_types, mark_collection_modified, open_or_create_collection_db,
|
||||
set_note, Note,
|
||||
};
|
||||
use crate::media::database::MediaDatabaseContext;
|
||||
use crate::media::files::{
|
||||
data_for_file, filename_if_normalized, remove_files, trash_folder, MEDIA_SYNC_FILESIZE_LIMIT,
|
||||
};
|
||||
use crate::media::MediaManager;
|
||||
use crate::text::{normalize_to_nfc, MediaRef};
|
||||
use crate::{media::MediaManager, text::extract_media_refs};
|
||||
use coarsetime::Instant;
|
||||
use log::debug;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::Path;
|
||||
use std::{borrow::Cow, fs, time};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct MediaCheckOutput {
|
||||
files: Vec<String>,
|
||||
renamed: Vec<RenamedFile>,
|
||||
unused: Vec<String>,
|
||||
missing: Vec<String>,
|
||||
renamed: HashMap<String, String>,
|
||||
dirs: Vec<String>,
|
||||
oversize: Vec<String>,
|
||||
}
|
||||
|
||||
/// A file that was renamed due to invalid chars or non-NFC encoding.
|
||||
/// On Apple computers, files in NFD format are not renamed.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct RenamedFile {
|
||||
current_fname: String,
|
||||
original_fname: String,
|
||||
#[derive(Debug, PartialEq, Default)]
|
||||
struct MediaFolderCheck {
|
||||
files: Vec<String>,
|
||||
renamed: HashMap<String, String>,
|
||||
dirs: Vec<String>,
|
||||
oversize: Vec<String>,
|
||||
}
|
||||
|
||||
pub struct MediaChecker<'a, P>
|
||||
|
@ -32,6 +40,7 @@ where
|
|||
P: FnMut(usize) -> bool,
|
||||
{
|
||||
mgr: &'a MediaManager,
|
||||
col_path: &'a Path,
|
||||
progress_cb: P,
|
||||
checked: usize,
|
||||
progress_updated: Instant,
|
||||
|
@ -41,9 +50,14 @@ impl<P> MediaChecker<'_, P>
|
|||
where
|
||||
P: FnMut(usize) -> bool,
|
||||
{
|
||||
pub fn new(mgr: &MediaManager, progress_cb: P) -> MediaChecker<'_, P> {
|
||||
pub fn new<'a>(
|
||||
mgr: &'a MediaManager,
|
||||
col_path: &'a Path,
|
||||
progress_cb: P,
|
||||
) -> MediaChecker<'a, P> {
|
||||
MediaChecker {
|
||||
mgr,
|
||||
col_path,
|
||||
progress_cb,
|
||||
checked: 0,
|
||||
progress_updated: Instant::now(),
|
||||
|
@ -53,12 +67,28 @@ where
|
|||
pub fn check(&mut self) -> Result<MediaCheckOutput> {
|
||||
self.expire_old_trash()?;
|
||||
|
||||
// loop through on-disk files
|
||||
let mut dirs = vec![];
|
||||
let mut oversize = vec![];
|
||||
let mut all_files = vec![];
|
||||
let mut renamed_files = vec![];
|
||||
let mut ctx = self.mgr.dbctx();
|
||||
|
||||
let folder_check = self.check_media_folder(&mut ctx)?;
|
||||
let referenced_files = self.check_media_references(&folder_check.renamed)?;
|
||||
let (unused, missing) = find_unused_and_missing(folder_check.files, referenced_files);
|
||||
|
||||
Ok(MediaCheckOutput {
|
||||
unused,
|
||||
missing,
|
||||
renamed: folder_check.renamed,
|
||||
dirs: folder_check.dirs,
|
||||
oversize: folder_check.oversize,
|
||||
})
|
||||
}
|
||||
|
||||
/// Check all the files in the media folder.
|
||||
///
|
||||
/// - Renames files with invalid names
|
||||
/// - Notes folders/oversized files
|
||||
/// - Gathers a list of all files
|
||||
fn check_media_folder(&mut self, ctx: &mut MediaDatabaseContext) -> Result<MediaFolderCheck> {
|
||||
let mut out = MediaFolderCheck::default();
|
||||
for dentry in self.mgr.media_folder.read_dir()? {
|
||||
let dentry = dentry?;
|
||||
|
||||
|
@ -76,14 +106,14 @@ where
|
|||
|
||||
// skip folders
|
||||
if dentry.file_type()?.is_dir() {
|
||||
dirs.push(disk_fname.to_string());
|
||||
out.dirs.push(disk_fname.to_string());
|
||||
continue;
|
||||
}
|
||||
|
||||
// ignore large files and zero byte files
|
||||
let metadata = dentry.metadata()?;
|
||||
if metadata.len() > MEDIA_SYNC_FILESIZE_LIMIT as u64 {
|
||||
oversize.push(disk_fname.to_string());
|
||||
out.oversize.push(disk_fname.to_string());
|
||||
continue;
|
||||
}
|
||||
if metadata.len() == 0 {
|
||||
|
@ -91,23 +121,21 @@ where
|
|||
}
|
||||
|
||||
// rename if required
|
||||
let (norm_name, renamed) = self.normalize_and_maybe_rename(&mut ctx, &disk_fname)?;
|
||||
let (norm_name, renamed) = self.normalize_and_maybe_rename(ctx, &disk_fname)?;
|
||||
if renamed {
|
||||
renamed_files.push(RenamedFile {
|
||||
current_fname: norm_name.to_string(),
|
||||
original_fname: disk_fname.to_string(),
|
||||
})
|
||||
let orig_as_nfc = normalize_to_nfc(&disk_fname);
|
||||
// if the only difference is the unicode normalization,
|
||||
// we don't mark the file as a renamed file
|
||||
if orig_as_nfc.as_ref() != norm_name.as_ref() {
|
||||
out.renamed
|
||||
.insert(orig_as_nfc.to_string(), norm_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
all_files.push(norm_name.into_owned());
|
||||
out.files.push(norm_name.into_owned());
|
||||
}
|
||||
|
||||
Ok(MediaCheckOutput {
|
||||
files: all_files,
|
||||
renamed: renamed_files,
|
||||
dirs,
|
||||
oversize,
|
||||
})
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Returns (normalized_form, needs_rename)
|
||||
|
@ -182,30 +210,149 @@ where
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find all media references in notes, fixing as necessary.
|
||||
fn check_media_references(
|
||||
&mut self,
|
||||
renamed: &HashMap<String, String>,
|
||||
) -> Result<HashSet<String>> {
|
||||
let mut db = open_or_create_collection_db(self.col_path)?;
|
||||
let trx = db.transaction()?;
|
||||
|
||||
let mut referenced_files = HashSet::new();
|
||||
let note_types = get_note_types(&trx)?;
|
||||
let mut collection_modified = false;
|
||||
|
||||
for_every_note(&trx, |note| {
|
||||
self.checked += 1;
|
||||
if self.checked % 10 == 0 {
|
||||
self.maybe_fire_progress_cb()?;
|
||||
}
|
||||
if fix_and_extract_media_refs(note, &mut referenced_files, renamed)? {
|
||||
// note was modified, needs saving
|
||||
set_note(
|
||||
&trx,
|
||||
note,
|
||||
note_types
|
||||
.get(¬e.mid)
|
||||
.ok_or_else(|| AnkiError::DBError {
|
||||
info: "missing note type".to_string(),
|
||||
})?,
|
||||
)?;
|
||||
collection_modified = true;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
if collection_modified {
|
||||
mark_collection_modified(&trx)?;
|
||||
trx.commit()?;
|
||||
}
|
||||
|
||||
Ok(referenced_files)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if note was modified.
|
||||
fn fix_and_extract_media_refs(
|
||||
note: &mut Note,
|
||||
seen_files: &mut HashSet<String>,
|
||||
renamed: &HashMap<String, String>,
|
||||
) -> Result<bool> {
|
||||
let mut updated = false;
|
||||
|
||||
for idx in 0..note.fields().len() {
|
||||
let field = normalize_and_maybe_rename_files(¬e.fields()[idx], renamed, seen_files);
|
||||
if let Cow::Owned(field) = field {
|
||||
// field was modified, need to save
|
||||
note.set_field(idx, field)?;
|
||||
updated = true;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(updated)
|
||||
}
|
||||
|
||||
/// Convert any filenames that are not in NFC form into NFC,
|
||||
/// and update any files that were renamed on disk.
|
||||
fn normalize_and_maybe_rename_files<'a>(
|
||||
field: &'a str,
|
||||
renamed: &HashMap<String, String>,
|
||||
seen_files: &mut HashSet<String>,
|
||||
) -> Cow<'a, str> {
|
||||
let refs = extract_media_refs(field);
|
||||
let mut field: Cow<str> = field.into();
|
||||
|
||||
for media_ref in refs {
|
||||
// normalize fname into NFC
|
||||
let mut fname = normalize_to_nfc(media_ref.fname);
|
||||
// and look it up to see if it's been renamed
|
||||
if let Some(new_name) = renamed.get(fname.as_ref()) {
|
||||
fname = new_name.to_owned().into();
|
||||
}
|
||||
// if it was not in NFC or was renamed, update the field
|
||||
if let Cow::Owned(ref new_name) = fname {
|
||||
field = rename_media_ref_in_field(field.as_ref(), &media_ref, new_name).into();
|
||||
}
|
||||
// and mark this filename as having been referenced
|
||||
seen_files.insert(fname.into_owned());
|
||||
}
|
||||
|
||||
field
|
||||
}
|
||||
|
||||
fn rename_media_ref_in_field(field: &str, media_ref: &MediaRef, new_name: &str) -> String {
|
||||
let updated_tag = media_ref.full_ref.replace(media_ref.fname, new_name);
|
||||
field.replace(media_ref.full_ref, &updated_tag)
|
||||
}
|
||||
|
||||
/// Returns (unused, missing)
|
||||
fn find_unused_and_missing(
|
||||
files: Vec<String>,
|
||||
mut references: HashSet<String>,
|
||||
) -> (Vec<String>, Vec<String>) {
|
||||
let mut unused = vec![];
|
||||
|
||||
for file in files {
|
||||
if !references.contains(&file) {
|
||||
unused.push(file);
|
||||
} else {
|
||||
references.remove(&file);
|
||||
}
|
||||
}
|
||||
|
||||
(unused, references.into_iter().collect())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::err::Result;
|
||||
use crate::media::check::{MediaCheckOutput, MediaChecker, RenamedFile};
|
||||
use crate::media::check::{MediaCheckOutput, MediaChecker};
|
||||
use crate::media::MediaManager;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
fn common_setup() -> Result<(TempDir, MediaManager)> {
|
||||
fn common_setup() -> Result<(TempDir, MediaManager, PathBuf)> {
|
||||
let dir = tempdir()?;
|
||||
let media_dir = dir.path().join("media");
|
||||
fs::create_dir(&media_dir)?;
|
||||
let media_db = dir.path().join("media.db");
|
||||
let col_path = dir.path().join("col.anki2");
|
||||
fs::write(
|
||||
&col_path,
|
||||
&include_bytes!("../../tests/support/mediacheck.anki2")[..],
|
||||
)?;
|
||||
|
||||
let mgr = MediaManager::new(&media_dir, media_db)?;
|
||||
|
||||
Ok((dir, mgr))
|
||||
Ok((dir, mgr, col_path))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_media_check() -> Result<()> {
|
||||
let (_dir, mgr) = common_setup()?;
|
||||
let (_dir, mgr, col_path) = common_setup()?;
|
||||
|
||||
// add some test files
|
||||
fs::write(&mgr.media_folder.join("zerobytes"), "")?;
|
||||
|
@ -214,18 +361,17 @@ mod test {
|
|||
fs::write(&mgr.media_folder.join("foo[.jpg"), "foo")?;
|
||||
|
||||
let progress = |_n| true;
|
||||
let mut checker = MediaChecker::new(&mgr, progress);
|
||||
let mut output = checker.check()?;
|
||||
output.files.sort();
|
||||
let mut checker = MediaChecker::new(&mgr, &col_path, progress);
|
||||
let output = checker.check()?;
|
||||
|
||||
assert_eq!(
|
||||
output,
|
||||
MediaCheckOutput {
|
||||
files: vec!["foo.jpg".to_string(), "normal.jpg".to_string()],
|
||||
renamed: vec![RenamedFile {
|
||||
current_fname: "foo.jpg".to_string(),
|
||||
original_fname: "foo[.jpg".to_string()
|
||||
}],
|
||||
unused: vec![],
|
||||
missing: vec!["ぱぱ.jpg".into()],
|
||||
renamed: vec![("foo[.jpg".into(), "foo.jpg".into())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
dirs: vec!["folder".to_string()],
|
||||
oversize: vec![]
|
||||
}
|
||||
|
@ -239,13 +385,14 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_unicode_normalization() -> Result<()> {
|
||||
let (_dir, mgr) = common_setup()?;
|
||||
let (_dir, mgr, col_path) = common_setup()?;
|
||||
|
||||
fs::write(&mgr.media_folder.join("ぱぱ.jpg"), "nfd encoding")?;
|
||||
|
||||
let progress = |_n| true;
|
||||
let mut checker = MediaChecker::new(&mgr, progress);
|
||||
let output = checker.check()?;
|
||||
let mut checker = MediaChecker::new(&mgr, &col_path, progress);
|
||||
let mut output = checker.check()?;
|
||||
output.missing.sort();
|
||||
|
||||
if cfg!(target_vendor = "apple") {
|
||||
// on a Mac, the file should not have been renamed, but the returned name
|
||||
|
@ -253,8 +400,9 @@ mod test {
|
|||
assert_eq!(
|
||||
output,
|
||||
MediaCheckOutput {
|
||||
files: vec!["ぱぱ.jpg".to_string()],
|
||||
renamed: vec![],
|
||||
unused: vec![],
|
||||
missing: vec!["foo[.jpg".into(), "normal.jpg".into()],
|
||||
renamed: Default::default(),
|
||||
dirs: vec![],
|
||||
oversize: vec![]
|
||||
}
|
||||
|
@ -265,11 +413,11 @@ mod test {
|
|||
assert_eq!(
|
||||
output,
|
||||
MediaCheckOutput {
|
||||
files: vec!["ぱぱ.jpg".to_string()],
|
||||
renamed: vec![RenamedFile {
|
||||
current_fname: "ぱぱ.jpg".to_string(),
|
||||
original_fname: "ぱぱ.jpg".to_string()
|
||||
}],
|
||||
unused: vec![],
|
||||
missing: vec!["foo[.jpg".into(), "normal.jpg".into()],
|
||||
renamed: vec![("ぱぱ.jpg".into(), "ぱぱ.jpg".into())]
|
||||
.into_iter()
|
||||
.collect(),
|
||||
dirs: vec![],
|
||||
oversize: vec![]
|
||||
}
|
||||
|
|
153
rslib/src/media/col.rs
Normal file
153
rslib/src/media/col.rs
Normal file
|
@ -0,0 +1,153 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
/// Basic note reading/updating functionality for the media DB check.
|
||||
use crate::err::{AnkiError, Result};
|
||||
use crate::text::strip_html_preserving_image_filenames;
|
||||
use crate::time::i64_unix_timestamp;
|
||||
use crate::types::{ObjID, Timestamp, Usn};
|
||||
use rusqlite::{params, Connection, Row, NO_PARAMS};
|
||||
use serde_aux::field_attributes::deserialize_number_from_string;
|
||||
use serde_derive::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct Note {
|
||||
pub id: ObjID,
|
||||
pub mid: ObjID,
|
||||
pub mtime_secs: Timestamp,
|
||||
pub usn: Usn,
|
||||
fields: Vec<String>,
|
||||
}
|
||||
|
||||
impl Note {
|
||||
pub fn fields(&self) -> &Vec<String> {
|
||||
&self.fields
|
||||
}
|
||||
|
||||
pub fn set_field(&mut self, idx: usize, text: impl Into<String>) -> Result<()> {
|
||||
if idx >= self.fields.len() {
|
||||
return Err(AnkiError::invalid_input(
|
||||
"field idx out of range".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
self.fields[idx] = text.into();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn field_checksum(text: &str) -> u32 {
|
||||
let digest = sha1::Sha1::from(text).digest().bytes();
|
||||
u32::from_be_bytes(digest[..4].try_into().unwrap())
|
||||
}
|
||||
|
||||
pub(super) fn open_or_create_collection_db(path: &Path) -> Result<Connection> {
|
||||
let db = Connection::open(path)?;
|
||||
|
||||
db.pragma_update(None, "locking_mode", &"exclusive")?;
|
||||
db.pragma_update(None, "page_size", &4096)?;
|
||||
db.pragma_update(None, "cache_size", &(-40 * 1024))?;
|
||||
db.pragma_update(None, "legacy_file_format", &false)?;
|
||||
db.pragma_update(None, "journal", &"wal")?;
|
||||
db.set_prepared_statement_cache_capacity(5);
|
||||
|
||||
Ok(db)
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub(super) struct NoteType {
|
||||
#[serde(deserialize_with = "deserialize_number_from_string")]
|
||||
id: ObjID,
|
||||
#[serde(rename = "sortf")]
|
||||
sort_field_idx: u16,
|
||||
}
|
||||
|
||||
pub(super) fn get_note_types(db: &Connection) -> Result<HashMap<ObjID, NoteType>> {
|
||||
let mut stmt = db.prepare("select models from col")?;
|
||||
let note_types = stmt
|
||||
.query_and_then(NO_PARAMS, |row| -> Result<HashMap<ObjID, NoteType>> {
|
||||
let v: HashMap<ObjID, NoteType> = serde_json::from_str(row.get_raw(0).as_str()?)?;
|
||||
Ok(v)
|
||||
})?
|
||||
.next()
|
||||
.ok_or_else(|| AnkiError::DBError {
|
||||
info: "col table empty".to_string(),
|
||||
})??;
|
||||
Ok(note_types)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_note(db: &Connection, nid: ObjID) -> Result<Option<Note>> {
|
||||
let mut stmt = db.prepare_cached("select id, mid, mod, usn, flds from notes where id=?")?;
|
||||
let note = stmt.query_and_then(params![nid], row_to_note)?.next();
|
||||
|
||||
note.transpose()
|
||||
}
|
||||
|
||||
pub(super) fn for_every_note<F: FnMut(&mut Note) -> Result<()>>(
|
||||
db: &Connection,
|
||||
mut func: F,
|
||||
) -> Result<()> {
|
||||
let mut stmt = db.prepare("select id, mid, mod, usn, flds from notes")?;
|
||||
for result in stmt.query_and_then(NO_PARAMS, |row| {
|
||||
let mut note = row_to_note(row)?;
|
||||
func(&mut note)
|
||||
})? {
|
||||
result?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn row_to_note(row: &Row) -> Result<Note> {
|
||||
Ok(Note {
|
||||
id: row.get(0)?,
|
||||
mid: row.get(1)?,
|
||||
mtime_secs: row.get(2)?,
|
||||
usn: row.get(3)?,
|
||||
fields: row
|
||||
.get_raw(4)
|
||||
.as_str()?
|
||||
.split('\x1f')
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn set_note(db: &Connection, note: &mut Note, note_type: &NoteType) -> Result<()> {
|
||||
note.mtime_secs = i64_unix_timestamp();
|
||||
// hard-coded for now
|
||||
note.usn = -1;
|
||||
let csum = field_checksum(¬e.fields()[0]);
|
||||
let sort_field = strip_html_preserving_image_filenames(
|
||||
note.fields()
|
||||
.get(note_type.sort_field_idx as usize)
|
||||
.ok_or_else(|| AnkiError::DBError {
|
||||
info: "sort field out of range".to_string(),
|
||||
})?,
|
||||
);
|
||||
|
||||
let mut stmt =
|
||||
db.prepare_cached("update notes set mod=?,usn=?,flds=?,sfld=?,csum=? where id=?")?;
|
||||
stmt.execute(params![
|
||||
note.mtime_secs,
|
||||
note.usn,
|
||||
note.fields().join("\x1f"),
|
||||
sort_field,
|
||||
csum,
|
||||
note.id,
|
||||
])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn mark_collection_modified(db: &Connection) -> Result<()> {
|
||||
db.execute(
|
||||
"update col set usn=-1, mod=?",
|
||||
params![i64_unix_timestamp()],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
|
@ -11,6 +11,7 @@ use std::path::{Path, PathBuf};
|
|||
|
||||
pub mod changetracker;
|
||||
pub mod check;
|
||||
pub mod col;
|
||||
pub mod database;
|
||||
pub mod files;
|
||||
pub mod sync;
|
||||
|
|
|
@ -31,8 +31,33 @@ lazy_static! {
|
|||
.unwrap();
|
||||
|
||||
static ref IMG_TAG: Regex = Regex::new(
|
||||
// group 1 is filename
|
||||
r#"(?i)<img[^>]+src=["']?([^"'>]+)["']?[^>]*>"#
|
||||
r#"(?xsi)
|
||||
# the start of the image tag
|
||||
<img[^>]+src=
|
||||
(?:
|
||||
# 1: double-quoted filename
|
||||
"
|
||||
([^"]+?)
|
||||
"
|
||||
[^>]*>
|
||||
|
|
||||
# 2: single-quoted filename
|
||||
'
|
||||
([^']+?)
|
||||
'
|
||||
[^>]*>
|
||||
|
|
||||
# 3: unquoted filename
|
||||
([^ >]+?)
|
||||
(?:
|
||||
# then either a space and the rest
|
||||
\x20[^>]*>
|
||||
|
|
||||
# or the tag immediately ends
|
||||
>
|
||||
)
|
||||
)
|
||||
"#
|
||||
).unwrap();
|
||||
|
||||
// videos are also in sound tags
|
||||
|
@ -106,6 +131,39 @@ pub fn extract_av_tags<'a>(text: &'a str, question_side: bool) -> (Cow<'a, str>,
|
|||
(replaced_text, tags)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct MediaRef<'a> {
|
||||
pub full_ref: &'a str,
|
||||
pub fname: &'a str,
|
||||
}
|
||||
|
||||
pub(crate) fn extract_media_refs(text: &str) -> Vec<MediaRef> {
|
||||
let mut out = vec![];
|
||||
|
||||
for caps in IMG_TAG.captures_iter(text) {
|
||||
out.push(MediaRef {
|
||||
full_ref: caps.get(0).unwrap().as_str(),
|
||||
fname: caps
|
||||
.get(1)
|
||||
.or_else(|| caps.get(2))
|
||||
.or_else(|| caps.get(3))
|
||||
.unwrap()
|
||||
.as_str(),
|
||||
});
|
||||
}
|
||||
|
||||
for caps in AV_TAGS.captures_iter(text) {
|
||||
if let Some(m) = caps.get(1) {
|
||||
out.push(MediaRef {
|
||||
full_ref: caps.get(0).unwrap().as_str(),
|
||||
fname: m.as_str(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
||||
let mut other_args = vec![];
|
||||
let mut split_args = args.split_ascii_whitespace();
|
||||
|
@ -141,7 +199,7 @@ fn tts_tag_from_string<'a>(field_text: &'a str, args: &'a str) -> AVTag {
|
|||
}
|
||||
|
||||
pub fn strip_html_preserving_image_filenames(html: &str) -> Cow<str> {
|
||||
let without_fnames = IMG_TAG.replace_all(html, r" $1 ");
|
||||
let without_fnames = IMG_TAG.replace_all(html, r" ${1}${2}${3} ");
|
||||
let without_html = HTML.replace_all(&without_fnames, "");
|
||||
// no changes?
|
||||
if let Cow::Borrowed(b) = without_html {
|
||||
|
@ -157,7 +215,6 @@ pub(crate) fn contains_latex(text: &str) -> bool {
|
|||
LATEX.is_match(text)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn normalize_to_nfc(s: &str) -> Cow<str> {
|
||||
if !is_nfc(s) {
|
||||
s.chars().nfc().collect::<String>().into()
|
||||
|
|
11
rslib/src/time.rs
Normal file
11
rslib/src/time.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
use std::time;
|
||||
|
||||
pub(crate) fn i64_unix_timestamp() -> i64 {
|
||||
time::SystemTime::now()
|
||||
.duration_since(time::SystemTime::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs() as i64
|
||||
}
|
9
rslib/src/types.rs
Normal file
9
rslib/src/types.rs
Normal file
|
@ -0,0 +1,9 @@
|
|||
// Copyright: Ankitects Pty Ltd and contributors
|
||||
// License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
// while Anki tends to only use positive numbers, sqlite only supports
|
||||
// signed integers, so these numbers are signed as well.
|
||||
|
||||
pub type ObjID = i64;
|
||||
pub type Usn = i32;
|
||||
pub type Timestamp = i64;
|
BIN
rslib/tests/support/mediacheck.anki2
Normal file
BIN
rslib/tests/support/mediacheck.anki2
Normal file
Binary file not shown.
Loading…
Reference in a new issue