mirror of
https://github.com/ankitects/anki.git
synced 2025-09-24 08:46:37 -04:00
move from Python's URI escaping to IRI escaping in Rust
Should make non-Latin text readable in the HTML editor, without the breakages reverted in the previous change.
This commit is contained in:
parent
e97c381a6f
commit
bf507cca98
14 changed files with 252 additions and 27 deletions
15
Cargo.lock
generated
15
Cargo.lock
generated
|
@ -76,6 +76,7 @@ dependencies = [
|
|||
"num-integer",
|
||||
"num_enum",
|
||||
"once_cell",
|
||||
"pct-str",
|
||||
"pin-project",
|
||||
"proc-macro-nested",
|
||||
"prost",
|
||||
|
@ -1466,6 +1467,14 @@ dependencies = [
|
|||
"proc-macro-hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pct-str"
|
||||
version = "1.1.0"
|
||||
source = "git+https://github.com/timothee-haudebourg/pct-str.git?rev=4adccd8d4a222ab2672350a102f06ae832a0572d#4adccd8d4a222ab2672350a102f06ae832a0572d"
|
||||
dependencies = [
|
||||
"utf8-decode",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.1.0"
|
||||
|
@ -2719,6 +2728,12 @@ version = "0.7.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-decode"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498"
|
||||
|
||||
[[package]]
|
||||
name = "utime"
|
||||
version = "0.3.1"
|
||||
|
|
|
@ -1471,6 +1471,16 @@ def raze_fetch_remote_crates():
|
|||
build_file = Label("//cargo/remote:BUILD.paste-impl-0.1.18.bazel"),
|
||||
)
|
||||
|
||||
maybe(
|
||||
new_git_repository,
|
||||
name = "raze__pct_str__1_1_0",
|
||||
remote = "https://github.com/timothee-haudebourg/pct-str.git",
|
||||
shallow_since = "1605376517 +0100",
|
||||
commit = "4adccd8d4a222ab2672350a102f06ae832a0572d",
|
||||
build_file = Label("//cargo/remote:BUILD.pct-str-1.1.0.bazel"),
|
||||
init_submodules = True,
|
||||
)
|
||||
|
||||
maybe(
|
||||
http_archive,
|
||||
name = "raze__percent_encoding__2_1_0",
|
||||
|
@ -2741,6 +2751,16 @@ def raze_fetch_remote_crates():
|
|||
build_file = Label("//cargo/remote:BUILD.utf-8-0.7.6.bazel"),
|
||||
)
|
||||
|
||||
maybe(
|
||||
http_archive,
|
||||
name = "raze__utf8_decode__1_0_1",
|
||||
url = "https://crates.io/api/v1/crates/utf8-decode/1.0.1/download",
|
||||
type = "tar.gz",
|
||||
sha256 = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498",
|
||||
strip_prefix = "utf8-decode-1.0.1",
|
||||
build_file = Label("//cargo/remote:BUILD.utf8-decode-1.0.1.bazel"),
|
||||
)
|
||||
|
||||
maybe(
|
||||
http_archive,
|
||||
name = "raze__utime__0_3_1",
|
||||
|
|
|
@ -1340,6 +1340,15 @@
|
|||
"license_file": null,
|
||||
"description": "Implementation detail of the `paste` crate"
|
||||
},
|
||||
{
|
||||
"name": "pct-str",
|
||||
"version": "1.1.0",
|
||||
"authors": "Timothée Haudebourg <author@haudebourg.net>",
|
||||
"repository": "https://github.com/timothee-haudebourg/pct-str",
|
||||
"license": "Apache-2.0 OR MIT",
|
||||
"license_file": null,
|
||||
"description": "Percent-encoded strings for URL, URI, IRI, etc."
|
||||
},
|
||||
{
|
||||
"name": "percent-encoding",
|
||||
"version": "2.1.0",
|
||||
|
@ -2492,6 +2501,15 @@
|
|||
"license_file": null,
|
||||
"description": "Incremental, zero-copy UTF-8 decoding with error handling"
|
||||
},
|
||||
{
|
||||
"name": "utf8-decode",
|
||||
"version": "1.0.1",
|
||||
"authors": "Timothée Haudebourg <author@haudebourg.net>",
|
||||
"repository": "https://github.com/timothee-haudebourg/utf8-decode",
|
||||
"license": "Apache-2.0 OR MIT",
|
||||
"license_file": null,
|
||||
"description": "UTF-8 incremental decoding iterators."
|
||||
},
|
||||
{
|
||||
"name": "utime",
|
||||
"version": "0.3.1",
|
||||
|
|
60
cargo/remote/BUILD.pct-str-1.1.0.bazel
vendored
Normal file
60
cargo/remote/BUILD.pct-str-1.1.0.bazel
vendored
Normal file
|
@ -0,0 +1,60 @@
|
|||
"""
|
||||
@generated
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
|
||||
# buildifier: disable=load
|
||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||
|
||||
# buildifier: disable=load
|
||||
load(
|
||||
"@rules_rust//rust:rust.bzl",
|
||||
"rust_binary",
|
||||
"rust_library",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # MIT from expression "MIT OR Apache-2.0"
|
||||
])
|
||||
|
||||
# Generated Targets
|
||||
|
||||
# Unsupported target "encode" with type "example" omitted
|
||||
|
||||
# Unsupported target "str" with type "example" omitted
|
||||
|
||||
# Unsupported target "string" with type "example" omitted
|
||||
|
||||
rust_library(
|
||||
name = "pct_str",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
crate_features = [
|
||||
],
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
data = [],
|
||||
edition = "2018",
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
tags = [
|
||||
"cargo-raze",
|
||||
"manual",
|
||||
],
|
||||
version = "1.1.0",
|
||||
# buildifier: leave-alone
|
||||
deps = [
|
||||
"@raze__utf8_decode__1_0_1//:utf8_decode",
|
||||
],
|
||||
)
|
57
cargo/remote/BUILD.utf8-decode-1.0.1.bazel
vendored
Normal file
57
cargo/remote/BUILD.utf8-decode-1.0.1.bazel
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
"""
|
||||
@generated
|
||||
cargo-raze crate build file.
|
||||
|
||||
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||
"""
|
||||
|
||||
# buildifier: disable=load
|
||||
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||
|
||||
# buildifier: disable=load
|
||||
load(
|
||||
"@rules_rust//rust:rust.bzl",
|
||||
"rust_binary",
|
||||
"rust_library",
|
||||
"rust_test",
|
||||
)
|
||||
|
||||
package(default_visibility = [
|
||||
# Public for visibility by "@raze__crate__version//" targets.
|
||||
#
|
||||
# Prefer access through "//cargo", which limits external
|
||||
# visibility to explicit Cargo.toml dependencies.
|
||||
"//visibility:public",
|
||||
])
|
||||
|
||||
licenses([
|
||||
"notice", # MIT from expression "MIT OR Apache-2.0"
|
||||
])
|
||||
|
||||
# Generated Targets
|
||||
|
||||
# Unsupported target "safe" with type "example" omitted
|
||||
|
||||
# Unsupported target "unsafe" with type "example" omitted
|
||||
|
||||
rust_library(
|
||||
name = "utf8_decode",
|
||||
srcs = glob(["**/*.rs"]),
|
||||
crate_features = [
|
||||
],
|
||||
crate_root = "src/lib.rs",
|
||||
crate_type = "lib",
|
||||
data = [],
|
||||
edition = "2018",
|
||||
rustc_flags = [
|
||||
"--cap-lints=allow",
|
||||
],
|
||||
tags = [
|
||||
"cargo-raze",
|
||||
"manual",
|
||||
],
|
||||
version = "1.0.1",
|
||||
# buildifier: leave-alone
|
||||
deps = [
|
||||
],
|
||||
)
|
|
@ -19,6 +19,8 @@ COMMITS_SHALLOW_SINCE = {
|
|||
"0cb6f7d14c62819e37cd221736f8b0555e823712": "1619519657 +1000",
|
||||
# tokio-io-timeout
|
||||
"1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000",
|
||||
# pct-str
|
||||
"4adccd8d4a222ab2672350a102f06ae832a0572d": "1605376517 +0100",
|
||||
}
|
||||
|
||||
import glob
|
||||
|
|
|
@ -21,6 +21,8 @@ service CardRenderingService {
|
|||
returns (RenderCardResponse);
|
||||
rpc StripAVTags(generic.String) returns (generic.String);
|
||||
rpc RenderMarkdown(RenderMarkdownRequest) returns (generic.String);
|
||||
rpc EncodeIriPaths(generic.String) returns (generic.String);
|
||||
rpc DecodeIriPaths(generic.String) returns (generic.String);
|
||||
}
|
||||
|
||||
message ExtractAVTagsRequest {
|
||||
|
|
|
@ -8,10 +8,7 @@ import pprint
|
|||
import re
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from typing import Any, Callable, List, Match, Optional, Tuple
|
||||
from typing import Any, Callable, List, Optional, Tuple
|
||||
|
||||
from anki import media_pb2
|
||||
from anki.consts import *
|
||||
|
@ -191,22 +188,10 @@ class MediaManager:
|
|||
|
||||
def escape_media_filenames(self, string: str, unescape: bool = False) -> str:
|
||||
"Apply or remove percent encoding to filenames in html tags (audio, image, object)."
|
||||
fn: Callable
|
||||
if unescape:
|
||||
fn = urllib.parse.unquote
|
||||
return self.col._backend.decode_iri_paths(string)
|
||||
else:
|
||||
fn = urllib.parse.quote
|
||||
|
||||
def repl(match: Match) -> str:
|
||||
tag = match.group(0)
|
||||
fname = match.group("fname")
|
||||
if re.match("(https?|ftp)://", fname):
|
||||
return tag
|
||||
return tag.replace(fname, fn(fname))
|
||||
|
||||
for reg in self.html_media_regexps:
|
||||
string = re.sub(reg, repl, string)
|
||||
return string
|
||||
return self.col._backend.encode_iri_paths(string)
|
||||
|
||||
# Checking media
|
||||
##########################################################################
|
||||
|
|
|
@ -116,6 +116,7 @@ rust_library(
|
|||
"//rslib/cargo:unicode_normalization",
|
||||
"//rslib/cargo:utime",
|
||||
"//rslib/cargo:zip",
|
||||
"//rslib/cargo:pct_str",
|
||||
"//rslib/i18n:anki_i18n",
|
||||
] + select({
|
||||
# rustls on Linux
|
||||
|
|
|
@ -92,3 +92,4 @@ pulldown-cmark = "0.8.0"
|
|||
fnv = "1.0.7"
|
||||
strum = { version = "0.21.0", features = ["derive"] }
|
||||
tokio-util = { version = "0.6.7", features = ["io"] }
|
||||
pct-str = { git="https://github.com/timothee-haudebourg/pct-str.git", rev="4adccd8d4a222ab2672350a102f06ae832a0572d" }
|
||||
|
|
|
@ -210,6 +210,15 @@ alias(
|
|||
],
|
||||
)
|
||||
|
||||
alias(
|
||||
name = "pct_str",
|
||||
actual = "@raze__pct_str__1_1_0//:pct_str",
|
||||
tags = [
|
||||
"cargo-raze",
|
||||
"manual",
|
||||
],
|
||||
)
|
||||
|
||||
alias(
|
||||
name = "pin_project",
|
||||
actual = "@raze__pin_project__1_0_7//:pin_project",
|
||||
|
|
|
@ -10,7 +10,10 @@ use crate::{
|
|||
notetype::{CardTemplateSchema11, RenderCardOutput},
|
||||
prelude::*,
|
||||
template::RenderedNode,
|
||||
text::{extract_av_tags, sanitize_html_no_images, strip_av_tags, AvTag},
|
||||
text::{
|
||||
decode_iri_paths, encode_iri_paths, extract_av_tags, sanitize_html_no_images,
|
||||
strip_av_tags, AvTag,
|
||||
},
|
||||
};
|
||||
|
||||
impl CardRenderingService for Backend {
|
||||
|
@ -150,6 +153,14 @@ impl CardRenderingService for Backend {
|
|||
}
|
||||
Ok(text.into())
|
||||
}
|
||||
|
||||
fn encode_iri_paths(&self, input: pb::String) -> Result<pb::String> {
|
||||
Ok(encode_iri_paths(&input.val).to_string().into())
|
||||
}
|
||||
|
||||
fn decode_iri_paths(&self, input: pb::String) -> Result<pb::String> {
|
||||
Ok(decode_iri_paths(&input.val).to_string().into())
|
||||
}
|
||||
}
|
||||
|
||||
fn rendered_nodes_to_proto(nodes: Vec<RenderedNode>) -> Vec<pb::RenderedTemplateNode> {
|
||||
|
|
|
@ -8,9 +8,6 @@ use std::{
|
|||
path::Path,
|
||||
};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::{
|
||||
collection::Collection,
|
||||
error::{AnkiError, DbErrorKind, Result},
|
||||
|
@ -25,13 +22,9 @@ use crate::{
|
|||
MediaManager,
|
||||
},
|
||||
notes::Note,
|
||||
text::{extract_media_refs, normalize_to_nfc, MediaRef},
|
||||
text::{extract_media_refs, normalize_to_nfc, MediaRef, REMOTE_FILENAME},
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref REMOTE_FILENAME: Regex = Regex::new("(?i)^https?://").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct MediaCheckOutput {
|
||||
pub unused: Vec<String>,
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
use std::{borrow::Cow, ptr};
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use pct_str::{IriReserved, PctStr, PctString};
|
||||
use regex::{Captures, Regex};
|
||||
use unicase::eq as uni_eq;
|
||||
use unicode_normalization::{
|
||||
|
@ -424,6 +425,56 @@ pub(crate) fn matches_glob(text: &str, search: &str) -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub(crate) static ref REMOTE_FILENAME: Regex = Regex::new("(?i)^https?://").unwrap();
|
||||
}
|
||||
|
||||
/// IRI-encode unescaped local paths in HTML fragment.
|
||||
pub(crate) fn encode_iri_paths(unescaped_html: &str) -> Cow<str> {
|
||||
transform_html_paths(unescaped_html, |fname| {
|
||||
PctString::encode(fname.chars(), IriReserved::Segment)
|
||||
.into_string()
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
/// URI-decode unescaped local paths in HTML fragment.
|
||||
pub(crate) fn decode_iri_paths(unescaped_html: &str) -> Cow<str> {
|
||||
transform_html_paths(unescaped_html, |fname| {
|
||||
match PctStr::new(fname) {
|
||||
Ok(s) => s.decode().into(),
|
||||
Err(_e) => {
|
||||
// invalid percent encoding; return unchanged
|
||||
fname.into()
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Apply a transform to local filename references in tags like IMG.
|
||||
/// Required to display time, as Anki unfortunately stores the references
|
||||
/// in unencoded form in the database.
|
||||
fn transform_html_paths<F>(html: &str, transform: F) -> Cow<str>
|
||||
where
|
||||
F: Fn(&str) -> Cow<str>,
|
||||
{
|
||||
HTML_MEDIA_TAGS.replace_all(html, |caps: &Captures| {
|
||||
let fname = caps
|
||||
.get(1)
|
||||
.or_else(|| caps.get(2))
|
||||
.or_else(|| caps.get(3))
|
||||
.unwrap()
|
||||
.as_str()
|
||||
.trim();
|
||||
let full = caps.get(0).unwrap().as_str();
|
||||
if REMOTE_FILENAME.is_match(fname) {
|
||||
full.into()
|
||||
} else {
|
||||
full.replace(fname, &transform(fname))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::borrow::Cow;
|
||||
|
|
Loading…
Reference in a new issue