mirror of
https://github.com/ankitects/anki.git
synced 2025-09-25 01:06:35 -04:00
move from Python's URI escaping to IRI escaping in Rust
Should make non-Latin text readable in the HTML editor, without the breakages reverted in the previous change.
This commit is contained in:
parent
e97c381a6f
commit
bf507cca98
14 changed files with 252 additions and 27 deletions
15
Cargo.lock
generated
15
Cargo.lock
generated
|
@ -76,6 +76,7 @@ dependencies = [
|
||||||
"num-integer",
|
"num-integer",
|
||||||
"num_enum",
|
"num_enum",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"pct-str",
|
||||||
"pin-project",
|
"pin-project",
|
||||||
"proc-macro-nested",
|
"proc-macro-nested",
|
||||||
"prost",
|
"prost",
|
||||||
|
@ -1466,6 +1467,14 @@ dependencies = [
|
||||||
"proc-macro-hack",
|
"proc-macro-hack",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pct-str"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "git+https://github.com/timothee-haudebourg/pct-str.git?rev=4adccd8d4a222ab2672350a102f06ae832a0572d#4adccd8d4a222ab2672350a102f06ae832a0572d"
|
||||||
|
dependencies = [
|
||||||
|
"utf8-decode",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.1.0"
|
version = "2.1.0"
|
||||||
|
@ -2719,6 +2728,12 @@ version = "0.7.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8-decode"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utime"
|
name = "utime"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
|
|
|
@ -1471,6 +1471,16 @@ def raze_fetch_remote_crates():
|
||||||
build_file = Label("//cargo/remote:BUILD.paste-impl-0.1.18.bazel"),
|
build_file = Label("//cargo/remote:BUILD.paste-impl-0.1.18.bazel"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
maybe(
|
||||||
|
new_git_repository,
|
||||||
|
name = "raze__pct_str__1_1_0",
|
||||||
|
remote = "https://github.com/timothee-haudebourg/pct-str.git",
|
||||||
|
shallow_since = "1605376517 +0100",
|
||||||
|
commit = "4adccd8d4a222ab2672350a102f06ae832a0572d",
|
||||||
|
build_file = Label("//cargo/remote:BUILD.pct-str-1.1.0.bazel"),
|
||||||
|
init_submodules = True,
|
||||||
|
)
|
||||||
|
|
||||||
maybe(
|
maybe(
|
||||||
http_archive,
|
http_archive,
|
||||||
name = "raze__percent_encoding__2_1_0",
|
name = "raze__percent_encoding__2_1_0",
|
||||||
|
@ -2741,6 +2751,16 @@ def raze_fetch_remote_crates():
|
||||||
build_file = Label("//cargo/remote:BUILD.utf-8-0.7.6.bazel"),
|
build_file = Label("//cargo/remote:BUILD.utf-8-0.7.6.bazel"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
maybe(
|
||||||
|
http_archive,
|
||||||
|
name = "raze__utf8_decode__1_0_1",
|
||||||
|
url = "https://crates.io/api/v1/crates/utf8-decode/1.0.1/download",
|
||||||
|
type = "tar.gz",
|
||||||
|
sha256 = "ca61eb27fa339aa08826a29f03e87b99b4d8f0fc2255306fd266bb1b6a9de498",
|
||||||
|
strip_prefix = "utf8-decode-1.0.1",
|
||||||
|
build_file = Label("//cargo/remote:BUILD.utf8-decode-1.0.1.bazel"),
|
||||||
|
)
|
||||||
|
|
||||||
maybe(
|
maybe(
|
||||||
http_archive,
|
http_archive,
|
||||||
name = "raze__utime__0_3_1",
|
name = "raze__utime__0_3_1",
|
||||||
|
|
|
@ -1340,6 +1340,15 @@
|
||||||
"license_file": null,
|
"license_file": null,
|
||||||
"description": "Implementation detail of the `paste` crate"
|
"description": "Implementation detail of the `paste` crate"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "pct-str",
|
||||||
|
"version": "1.1.0",
|
||||||
|
"authors": "Timothée Haudebourg <author@haudebourg.net>",
|
||||||
|
"repository": "https://github.com/timothee-haudebourg/pct-str",
|
||||||
|
"license": "Apache-2.0 OR MIT",
|
||||||
|
"license_file": null,
|
||||||
|
"description": "Percent-encoded strings for URL, URI, IRI, etc."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "percent-encoding",
|
"name": "percent-encoding",
|
||||||
"version": "2.1.0",
|
"version": "2.1.0",
|
||||||
|
@ -2492,6 +2501,15 @@
|
||||||
"license_file": null,
|
"license_file": null,
|
||||||
"description": "Incremental, zero-copy UTF-8 decoding with error handling"
|
"description": "Incremental, zero-copy UTF-8 decoding with error handling"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "utf8-decode",
|
||||||
|
"version": "1.0.1",
|
||||||
|
"authors": "Timothée Haudebourg <author@haudebourg.net>",
|
||||||
|
"repository": "https://github.com/timothee-haudebourg/utf8-decode",
|
||||||
|
"license": "Apache-2.0 OR MIT",
|
||||||
|
"license_file": null,
|
||||||
|
"description": "UTF-8 incremental decoding iterators."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "utime",
|
"name": "utime",
|
||||||
"version": "0.3.1",
|
"version": "0.3.1",
|
||||||
|
|
60
cargo/remote/BUILD.pct-str-1.1.0.bazel
vendored
Normal file
60
cargo/remote/BUILD.pct-str-1.1.0.bazel
vendored
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"notice", # MIT from expression "MIT OR Apache-2.0"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
# Unsupported target "encode" with type "example" omitted
|
||||||
|
|
||||||
|
# Unsupported target "str" with type "example" omitted
|
||||||
|
|
||||||
|
# Unsupported target "string" with type "example" omitted
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "pct_str",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2018",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "1.1.0",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
"@raze__utf8_decode__1_0_1//:utf8_decode",
|
||||||
|
],
|
||||||
|
)
|
57
cargo/remote/BUILD.utf8-decode-1.0.1.bazel
vendored
Normal file
57
cargo/remote/BUILD.utf8-decode-1.0.1.bazel
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
"""
|
||||||
|
@generated
|
||||||
|
cargo-raze crate build file.
|
||||||
|
|
||||||
|
DO NOT EDIT! Replaced on runs of cargo-raze
|
||||||
|
"""
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load("@bazel_skylib//lib:selects.bzl", "selects")
|
||||||
|
|
||||||
|
# buildifier: disable=load
|
||||||
|
load(
|
||||||
|
"@rules_rust//rust:rust.bzl",
|
||||||
|
"rust_binary",
|
||||||
|
"rust_library",
|
||||||
|
"rust_test",
|
||||||
|
)
|
||||||
|
|
||||||
|
package(default_visibility = [
|
||||||
|
# Public for visibility by "@raze__crate__version//" targets.
|
||||||
|
#
|
||||||
|
# Prefer access through "//cargo", which limits external
|
||||||
|
# visibility to explicit Cargo.toml dependencies.
|
||||||
|
"//visibility:public",
|
||||||
|
])
|
||||||
|
|
||||||
|
licenses([
|
||||||
|
"notice", # MIT from expression "MIT OR Apache-2.0"
|
||||||
|
])
|
||||||
|
|
||||||
|
# Generated Targets
|
||||||
|
|
||||||
|
# Unsupported target "safe" with type "example" omitted
|
||||||
|
|
||||||
|
# Unsupported target "unsafe" with type "example" omitted
|
||||||
|
|
||||||
|
rust_library(
|
||||||
|
name = "utf8_decode",
|
||||||
|
srcs = glob(["**/*.rs"]),
|
||||||
|
crate_features = [
|
||||||
|
],
|
||||||
|
crate_root = "src/lib.rs",
|
||||||
|
crate_type = "lib",
|
||||||
|
data = [],
|
||||||
|
edition = "2018",
|
||||||
|
rustc_flags = [
|
||||||
|
"--cap-lints=allow",
|
||||||
|
],
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
version = "1.0.1",
|
||||||
|
# buildifier: leave-alone
|
||||||
|
deps = [
|
||||||
|
],
|
||||||
|
)
|
|
@ -19,6 +19,8 @@ COMMITS_SHALLOW_SINCE = {
|
||||||
"0cb6f7d14c62819e37cd221736f8b0555e823712": "1619519657 +1000",
|
"0cb6f7d14c62819e37cd221736f8b0555e823712": "1619519657 +1000",
|
||||||
# tokio-io-timeout
|
# tokio-io-timeout
|
||||||
"1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000",
|
"1ee0892217e9a76bba4bb369ec5fab8854935a3c": "1619517354 +1000",
|
||||||
|
# pct-str
|
||||||
|
"4adccd8d4a222ab2672350a102f06ae832a0572d": "1605376517 +0100",
|
||||||
}
|
}
|
||||||
|
|
||||||
import glob
|
import glob
|
||||||
|
|
|
@ -21,6 +21,8 @@ service CardRenderingService {
|
||||||
returns (RenderCardResponse);
|
returns (RenderCardResponse);
|
||||||
rpc StripAVTags(generic.String) returns (generic.String);
|
rpc StripAVTags(generic.String) returns (generic.String);
|
||||||
rpc RenderMarkdown(RenderMarkdownRequest) returns (generic.String);
|
rpc RenderMarkdown(RenderMarkdownRequest) returns (generic.String);
|
||||||
|
rpc EncodeIriPaths(generic.String) returns (generic.String);
|
||||||
|
rpc DecodeIriPaths(generic.String) returns (generic.String);
|
||||||
}
|
}
|
||||||
|
|
||||||
message ExtractAVTagsRequest {
|
message ExtractAVTagsRequest {
|
||||||
|
|
|
@ -8,10 +8,7 @@ import pprint
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import urllib.error
|
from typing import Any, Callable, List, Optional, Tuple
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
from typing import Any, Callable, List, Match, Optional, Tuple
|
|
||||||
|
|
||||||
from anki import media_pb2
|
from anki import media_pb2
|
||||||
from anki.consts import *
|
from anki.consts import *
|
||||||
|
@ -191,22 +188,10 @@ class MediaManager:
|
||||||
|
|
||||||
def escape_media_filenames(self, string: str, unescape: bool = False) -> str:
|
def escape_media_filenames(self, string: str, unescape: bool = False) -> str:
|
||||||
"Apply or remove percent encoding to filenames in html tags (audio, image, object)."
|
"Apply or remove percent encoding to filenames in html tags (audio, image, object)."
|
||||||
fn: Callable
|
|
||||||
if unescape:
|
if unescape:
|
||||||
fn = urllib.parse.unquote
|
return self.col._backend.decode_iri_paths(string)
|
||||||
else:
|
else:
|
||||||
fn = urllib.parse.quote
|
return self.col._backend.encode_iri_paths(string)
|
||||||
|
|
||||||
def repl(match: Match) -> str:
|
|
||||||
tag = match.group(0)
|
|
||||||
fname = match.group("fname")
|
|
||||||
if re.match("(https?|ftp)://", fname):
|
|
||||||
return tag
|
|
||||||
return tag.replace(fname, fn(fname))
|
|
||||||
|
|
||||||
for reg in self.html_media_regexps:
|
|
||||||
string = re.sub(reg, repl, string)
|
|
||||||
return string
|
|
||||||
|
|
||||||
# Checking media
|
# Checking media
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
|
@ -116,6 +116,7 @@ rust_library(
|
||||||
"//rslib/cargo:unicode_normalization",
|
"//rslib/cargo:unicode_normalization",
|
||||||
"//rslib/cargo:utime",
|
"//rslib/cargo:utime",
|
||||||
"//rslib/cargo:zip",
|
"//rslib/cargo:zip",
|
||||||
|
"//rslib/cargo:pct_str",
|
||||||
"//rslib/i18n:anki_i18n",
|
"//rslib/i18n:anki_i18n",
|
||||||
] + select({
|
] + select({
|
||||||
# rustls on Linux
|
# rustls on Linux
|
||||||
|
|
|
@ -92,3 +92,4 @@ pulldown-cmark = "0.8.0"
|
||||||
fnv = "1.0.7"
|
fnv = "1.0.7"
|
||||||
strum = { version = "0.21.0", features = ["derive"] }
|
strum = { version = "0.21.0", features = ["derive"] }
|
||||||
tokio-util = { version = "0.6.7", features = ["io"] }
|
tokio-util = { version = "0.6.7", features = ["io"] }
|
||||||
|
pct-str = { git="https://github.com/timothee-haudebourg/pct-str.git", rev="4adccd8d4a222ab2672350a102f06ae832a0572d" }
|
||||||
|
|
|
@ -210,6 +210,15 @@ alias(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
alias(
|
||||||
|
name = "pct_str",
|
||||||
|
actual = "@raze__pct_str__1_1_0//:pct_str",
|
||||||
|
tags = [
|
||||||
|
"cargo-raze",
|
||||||
|
"manual",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
alias(
|
alias(
|
||||||
name = "pin_project",
|
name = "pin_project",
|
||||||
actual = "@raze__pin_project__1_0_7//:pin_project",
|
actual = "@raze__pin_project__1_0_7//:pin_project",
|
||||||
|
|
|
@ -10,7 +10,10 @@ use crate::{
|
||||||
notetype::{CardTemplateSchema11, RenderCardOutput},
|
notetype::{CardTemplateSchema11, RenderCardOutput},
|
||||||
prelude::*,
|
prelude::*,
|
||||||
template::RenderedNode,
|
template::RenderedNode,
|
||||||
text::{extract_av_tags, sanitize_html_no_images, strip_av_tags, AvTag},
|
text::{
|
||||||
|
decode_iri_paths, encode_iri_paths, extract_av_tags, sanitize_html_no_images,
|
||||||
|
strip_av_tags, AvTag,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
impl CardRenderingService for Backend {
|
impl CardRenderingService for Backend {
|
||||||
|
@ -150,6 +153,14 @@ impl CardRenderingService for Backend {
|
||||||
}
|
}
|
||||||
Ok(text.into())
|
Ok(text.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn encode_iri_paths(&self, input: pb::String) -> Result<pb::String> {
|
||||||
|
Ok(encode_iri_paths(&input.val).to_string().into())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_iri_paths(&self, input: pb::String) -> Result<pb::String> {
|
||||||
|
Ok(decode_iri_paths(&input.val).to_string().into())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn rendered_nodes_to_proto(nodes: Vec<RenderedNode>) -> Vec<pb::RenderedTemplateNode> {
|
fn rendered_nodes_to_proto(nodes: Vec<RenderedNode>) -> Vec<pb::RenderedTemplateNode> {
|
||||||
|
|
|
@ -8,9 +8,6 @@ use std::{
|
||||||
path::Path,
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use regex::Regex;
|
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
collection::Collection,
|
collection::Collection,
|
||||||
error::{AnkiError, DbErrorKind, Result},
|
error::{AnkiError, DbErrorKind, Result},
|
||||||
|
@ -25,13 +22,9 @@ use crate::{
|
||||||
MediaManager,
|
MediaManager,
|
||||||
},
|
},
|
||||||
notes::Note,
|
notes::Note,
|
||||||
text::{extract_media_refs, normalize_to_nfc, MediaRef},
|
text::{extract_media_refs, normalize_to_nfc, MediaRef, REMOTE_FILENAME},
|
||||||
};
|
};
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
static ref REMOTE_FILENAME: Regex = Regex::new("(?i)^https?://").unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
pub struct MediaCheckOutput {
|
pub struct MediaCheckOutput {
|
||||||
pub unused: Vec<String>,
|
pub unused: Vec<String>,
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
use std::{borrow::Cow, ptr};
|
use std::{borrow::Cow, ptr};
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use pct_str::{IriReserved, PctStr, PctString};
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Regex};
|
||||||
use unicase::eq as uni_eq;
|
use unicase::eq as uni_eq;
|
||||||
use unicode_normalization::{
|
use unicode_normalization::{
|
||||||
|
@ -424,6 +425,56 @@ pub(crate) fn matches_glob(text: &str, search: &str) -> bool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
pub(crate) static ref REMOTE_FILENAME: Regex = Regex::new("(?i)^https?://").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// IRI-encode unescaped local paths in HTML fragment.
|
||||||
|
pub(crate) fn encode_iri_paths(unescaped_html: &str) -> Cow<str> {
|
||||||
|
transform_html_paths(unescaped_html, |fname| {
|
||||||
|
PctString::encode(fname.chars(), IriReserved::Segment)
|
||||||
|
.into_string()
|
||||||
|
.into()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// URI-decode unescaped local paths in HTML fragment.
|
||||||
|
pub(crate) fn decode_iri_paths(unescaped_html: &str) -> Cow<str> {
|
||||||
|
transform_html_paths(unescaped_html, |fname| {
|
||||||
|
match PctStr::new(fname) {
|
||||||
|
Ok(s) => s.decode().into(),
|
||||||
|
Err(_e) => {
|
||||||
|
// invalid percent encoding; return unchanged
|
||||||
|
fname.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Apply a transform to local filename references in tags like IMG.
|
||||||
|
/// Required to display time, as Anki unfortunately stores the references
|
||||||
|
/// in unencoded form in the database.
|
||||||
|
fn transform_html_paths<F>(html: &str, transform: F) -> Cow<str>
|
||||||
|
where
|
||||||
|
F: Fn(&str) -> Cow<str>,
|
||||||
|
{
|
||||||
|
HTML_MEDIA_TAGS.replace_all(html, |caps: &Captures| {
|
||||||
|
let fname = caps
|
||||||
|
.get(1)
|
||||||
|
.or_else(|| caps.get(2))
|
||||||
|
.or_else(|| caps.get(3))
|
||||||
|
.unwrap()
|
||||||
|
.as_str()
|
||||||
|
.trim();
|
||||||
|
let full = caps.get(0).unwrap().as_str();
|
||||||
|
if REMOTE_FILENAME.is_match(fname) {
|
||||||
|
full.into()
|
||||||
|
} else {
|
||||||
|
full.replace(fname, &transform(fname))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
Loading…
Reference in a new issue