Anki/qt/aqt/import_export/importing.py
RumovZ 42cbe42f06
Plaintext import/export (#1850)
* Add crate csv

* Add start of csv importing on backend

* Add Menomosyne serializer

* Add csv and json importing on backend

* Add plaintext importing on frontend

* Add csv metadata extraction on backend

* Add csv importing with GUI

* Fix missing dfa file in build

Added compile_data_attr, then re-ran cargo/update.py.

* Don't use doubly buffered reader in csv

* Escape HTML entities if CSV is not HTML

Also use name 'is_html' consistently.

* Use decimal number as foreign ease (like '2.5')

* ForeignCard.ivl → ForeignCard.interval

* Only allow fixed set of CSV delimiters

* Map timestamp of ForeignCard to native due time

* Don't trim CSV records

* Document use of empty strings for defaults

* Avoid creating CardGenContexts for every note

This requires CardGenContext to be generic, so it works both with an
owned and borrowed notetype.

* Show all accepted file types  in import file picker

* Add import_json_file()

* factor → ease_factor

* delimter_from_value → delimiter_from_value

* Map columns to fields, not the other way around

* Fallback to current config for csv metadata

* Add start of new import csv screen

* Temporary fix for compilation issue on Linux/Mac

* Disable jest bazel action for import-csv

Jest fails with an error code if no tests are available, but this would
not be noticable on Windows as Jest is not run there.

* Fix field mapping issue

* Revert "Temporary fix for compilation issue on Linux/Mac"

This reverts commit 21f8a26140.

* Add HtmlSwitch and move Switch to components

* Fix spacing and make selectors consistent

* Fix shortcut tooltip

* Place import button at the top with path

* Fix meta column indices

* Remove NotetypeForString

* Fix queue and type of foreign cards

* Support different dupe resolution strategies

* Allow dupe resolution selection when importing CSV

* Test import of unnormalized text

Close  #1863.

* Fix logging of foreign notes

* Implement CSV exports

* Use db_scalar() in notes_table_len()

* Rework CSV metadata

- Notetypes and decks are either defined by a global id or by a column.
- If a notetype id is provided, its field map must also be specified.
- If a notetype column is provided, fields are now mapped by index
instead of name at import time. So the first non-meta column is used for
the first field of every note, regardless of notetype. This makes
importing easier and should improve compatiblity with files without a
notetype column.
- Ensure first field can be mapped to a column.
- Meta columns must be defined as `#[meta name]:[column index]` instead
of in the `#columns` tag.
- Column labels contain the raw names defined by the file and must be
prettified by the frontend.

* Adjust frontend to new backend column mapping

* Add force flags for is_html and delimiter

* Detect if CSV is HTML by field content

* Update dupe resolution labels

* Simplify selectors

* Fix coalescence of oneofs in TS

* Disable meta columns from selection

Plus a lot of refactoring.

* Make import button stick to the bottom

* Write delimiter and html flag into csv

* Refetch field map after notetype change

* Fix log labels for csv import

* Log notes whose deck/notetype was missing

* Fix hiding of empty log queues

* Implement adding tags to all notes of a csv

* Fix dupe resolution not being set in log

* Implement adding tags to updated notes of a csv

* Check first note field is not empty

* Temporary fix for build on Linux/Mac

* Fix inverted html check (dae)

* Remove unused ftl string

* Delimiter → Separator

* Remove commented-out line

* Don't accept .json files

* Tweak tag ftl strings

* Remove redundant blur call

* Strip sound and add spaces in csv export

* Export HTML by default

* Fix unset deck in Mnemosyne import

Also accept both numbers and strings for notetypes and decks in JSON.

* Make DupeResolution::Update the default

* Fix missing dot in extension

* Make column indices 1-based

* Remove StickContainer from TagEditor

Fixes line breaking, border and z index on ImportCsvPage.

* Assign different key combos to tag editors

* Log all updated duplicates

Add a log field for the true number of found notes.

* Show identical notes as skipped

* Split tag-editor into separate ts module (dae)

* Add progress for CSV export

* Add progress for text import

* Tidy-ups after tag-editor split (dae)

- import-csv no longer depends on editor
- remove some commented lines
2022-06-01 20:26:16 +10:00

295 lines
8.9 KiB
Python

# Copyright: Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from itertools import chain
from typing import Any, Tuple, Type
import aqt.main
from anki.collection import (
Collection,
DupeResolution,
ImportCsvRequest,
ImportLogWithChanges,
Progress,
)
from anki.errors import Interrupted
from anki.foreign_data import mnemosyne
from anki.lang import without_unicode_isolation
from aqt.import_export.import_csv_dialog import ImportCsvDialog
from aqt.operations import CollectionOp, QueryOp
from aqt.progress import ProgressUpdate
from aqt.qt import *
from aqt.utils import askUser, getFile, showText, showWarning, tooltip, tr
class Importer(ABC):
accepted_file_endings: list[str]
@classmethod
def can_import(cls, lowercase_filename: str) -> bool:
return any(
lowercase_filename.endswith(ending) for ending in cls.accepted_file_endings
)
@classmethod
@abstractmethod
def do_import(cls, mw: aqt.main.AnkiQt, path: str) -> None:
...
class ColpkgImporter(Importer):
accepted_file_endings = [".apkg", ".colpkg"]
@staticmethod
def can_import(filename: str) -> bool:
return (
filename == "collection.apkg"
or (filename.startswith("backup-") and filename.endswith(".apkg"))
or filename.endswith(".colpkg")
)
@staticmethod
def do_import(mw: aqt.main.AnkiQt, path: str) -> None:
if askUser(
tr.importing_this_will_delete_your_existing_collection(),
msgfunc=QMessageBox.warning,
defaultno=True,
):
ColpkgImporter._import(mw, path)
@staticmethod
def _import(mw: aqt.main.AnkiQt, file: str) -> None:
def on_success() -> None:
mw.loadCollection()
tooltip(tr.importing_importing_complete())
def on_failure(err: Exception) -> None:
mw.loadCollection()
if not isinstance(err, Interrupted):
showWarning(str(err))
QueryOp(
parent=mw,
op=lambda _: mw.create_backup_now(),
success=lambda _: mw.unloadCollection(
lambda: import_collection_package_op(mw, file, on_success)
.failure(on_failure)
.run_in_background()
),
).with_progress().run_in_background()
class ApkgImporter(Importer):
accepted_file_endings = [".apkg", ".zip"]
@staticmethod
def do_import(mw: aqt.main.AnkiQt, path: str) -> None:
CollectionOp(
parent=mw,
op=lambda col: col.import_anki_package(path),
).with_backend_progress(import_progress_update).success(
show_import_log
).run_in_background()
class MnemosyneImporter(Importer):
accepted_file_endings = [".db"]
@staticmethod
def do_import(mw: aqt.main.AnkiQt, path: str) -> None:
QueryOp(
parent=mw,
op=lambda col: mnemosyne.serialize(path, col.decks.current()["id"]),
success=lambda json: import_json_string(mw, json),
).with_progress().run_in_background()
class CsvImporter(Importer):
accepted_file_endings = [".csv", ".tsv", ".txt"]
@staticmethod
def do_import(mw: aqt.main.AnkiQt, path: str) -> None:
def on_accepted(request: ImportCsvRequest) -> None:
CollectionOp(
parent=mw,
op=lambda col: col.import_csv(request),
).with_backend_progress(import_progress_update).success(
show_import_log
).run_in_background()
ImportCsvDialog(mw, path, on_accepted)
class JsonImporter(Importer):
accepted_file_endings = [".anki-json"]
@staticmethod
def do_import(mw: aqt.main.AnkiQt, path: str) -> None:
CollectionOp(
parent=mw,
op=lambda col: col.import_json_file(path),
).with_backend_progress(import_progress_update).success(
show_import_log
).run_in_background()
IMPORTERS: list[Type[Importer]] = [
ColpkgImporter,
ApkgImporter,
MnemosyneImporter,
CsvImporter,
]
def import_file(mw: aqt.main.AnkiQt, path: str) -> None:
filename = os.path.basename(path).lower()
for importer in IMPORTERS:
if importer.can_import(filename):
importer.do_import(mw, path)
return
showWarning("Unsupported file type.")
def prompt_for_file_then_import(mw: aqt.main.AnkiQt) -> None:
if path := get_file_path(mw):
import_file(mw, path)
def get_file_path(mw: aqt.main.AnkiQt) -> str | None:
filter = without_unicode_isolation(
tr.importing_all_supported_formats(
val="({})".format(
" ".join(f"*{ending}" for ending in all_accepted_file_endings())
)
)
)
if file := getFile(mw, tr.actions_import(), None, key="import", filter=filter):
return str(file)
return None
def all_accepted_file_endings() -> set[str]:
return set(chain(*(importer.accepted_file_endings for importer in IMPORTERS)))
def import_collection_package_op(
mw: aqt.main.AnkiQt, path: str, success: Callable[[], None]
) -> QueryOp[None]:
def op(_: Collection) -> None:
col_path = mw.pm.collectionPath()
media_folder = os.path.join(mw.pm.profileFolder(), "collection.media")
media_db = os.path.join(mw.pm.profileFolder(), "collection.media.db2")
mw.backend.import_collection_package(
col_path=col_path,
backup_path=path,
media_folder=media_folder,
media_db=media_db,
)
return QueryOp(parent=mw, op=op, success=lambda _: success()).with_backend_progress(
import_progress_update
)
def import_json_string(mw: aqt.main.AnkiQt, json: str) -> None:
CollectionOp(
parent=mw, op=lambda col: col.import_json_string(json)
).with_backend_progress(import_progress_update).success(
show_import_log
).run_in_background()
def show_import_log(log_with_changes: ImportLogWithChanges) -> None:
showText(stringify_log(log_with_changes.log), plain_text_edit=True)
def stringify_log(log: ImportLogWithChanges.Log) -> str:
queues = log_queues(log)
return "\n".join(
chain(
(tr.importing_notes_found_in_file(val=log.found_notes),),
(
queue.summary_template(val=len(queue.notes))
for queue in queues
if queue.notes
),
("",),
*(
[
f"[{queue.action_string}] {', '.join(note.fields)}"
for note in queue.notes
]
for queue in queues
),
)
)
def import_progress_update(progress: Progress, update: ProgressUpdate) -> None:
if not progress.HasField("importing"):
return
update.label = progress.importing
if update.user_wants_abort:
update.abort = True
@dataclass
class LogQueue:
notes: Any
# Callable[[Union[str, int, float]], str] (if mypy understood kwargs)
summary_template: Any
action_string: str
def first_field_queue(log: ImportLogWithChanges.Log) -> LogQueue:
if log.dupe_resolution == DupeResolution.ADD:
summary_template = tr.importing_added_duplicate_with_first_field
action_string = tr.adding_added()
elif log.dupe_resolution == DupeResolution.IGNORE:
summary_template = tr.importing_first_field_matched
action_string = tr.importing_skipped()
else:
summary_template = tr.importing_first_field_matched
action_string = tr.importing_updated()
return LogQueue(log.first_field_match, summary_template, action_string)
def log_queues(log: ImportLogWithChanges.Log) -> Tuple[LogQueue, ...]:
return (
LogQueue(
log.conflicting,
tr.importing_notes_that_could_not_be_imported,
tr.importing_skipped(),
),
LogQueue(
log.updated,
tr.importing_notes_updated_as_file_had_newer,
tr.importing_updated(),
),
LogQueue(log.new, tr.importing_notes_added_from_file, tr.adding_added()),
LogQueue(
log.duplicate,
tr.importing_notes_skipped_as_theyre_already_in,
tr.importing_identical(),
),
first_field_queue(log),
LogQueue(
log.missing_notetype,
lambda val: f"Notes skipped, as their notetype was missing: {val}",
tr.importing_skipped(),
),
LogQueue(
log.missing_deck,
lambda val: f"Notes skipped, as their deck was missing: {val}",
tr.importing_skipped(),
),
LogQueue(
log.empty_first_field,
tr.importing_empty_first_field,
tr.importing_skipped(),
),
)