mirror of
https://github.com/ankitects/anki.git
synced 2025-09-19 06:22:22 -04:00

* Add crate csv
* Add start of csv importing on backend
* Add Menomosyne serializer
* Add csv and json importing on backend
* Add plaintext importing on frontend
* Add csv metadata extraction on backend
* Add csv importing with GUI
* Fix missing dfa file in build
Added compile_data_attr, then re-ran cargo/update.py.
* Don't use doubly buffered reader in csv
* Escape HTML entities if CSV is not HTML
Also use name 'is_html' consistently.
* Use decimal number as foreign ease (like '2.5')
* ForeignCard.ivl → ForeignCard.interval
* Only allow fixed set of CSV delimiters
* Map timestamp of ForeignCard to native due time
* Don't trim CSV records
* Document use of empty strings for defaults
* Avoid creating CardGenContexts for every note
This requires CardGenContext to be generic, so it works both with an
owned and borrowed notetype.
* Show all accepted file types in import file picker
* Add import_json_file()
* factor → ease_factor
* delimter_from_value → delimiter_from_value
* Map columns to fields, not the other way around
* Fallback to current config for csv metadata
* Add start of new import csv screen
* Temporary fix for compilation issue on Linux/Mac
* Disable jest bazel action for import-csv
Jest fails with an error code if no tests are available, but this would
not be noticable on Windows as Jest is not run there.
* Fix field mapping issue
* Revert "Temporary fix for compilation issue on Linux/Mac"
This reverts commit 21f8a26140
.
* Add HtmlSwitch and move Switch to components
* Fix spacing and make selectors consistent
* Fix shortcut tooltip
* Place import button at the top with path
* Fix meta column indices
* Remove NotetypeForString
* Fix queue and type of foreign cards
* Support different dupe resolution strategies
* Allow dupe resolution selection when importing CSV
* Test import of unnormalized text
Close #1863.
* Fix logging of foreign notes
* Implement CSV exports
* Use db_scalar() in notes_table_len()
* Rework CSV metadata
- Notetypes and decks are either defined by a global id or by a column.
- If a notetype id is provided, its field map must also be specified.
- If a notetype column is provided, fields are now mapped by index
instead of name at import time. So the first non-meta column is used for
the first field of every note, regardless of notetype. This makes
importing easier and should improve compatiblity with files without a
notetype column.
- Ensure first field can be mapped to a column.
- Meta columns must be defined as `#[meta name]:[column index]` instead
of in the `#columns` tag.
- Column labels contain the raw names defined by the file and must be
prettified by the frontend.
* Adjust frontend to new backend column mapping
* Add force flags for is_html and delimiter
* Detect if CSV is HTML by field content
* Update dupe resolution labels
* Simplify selectors
* Fix coalescence of oneofs in TS
* Disable meta columns from selection
Plus a lot of refactoring.
* Make import button stick to the bottom
* Write delimiter and html flag into csv
* Refetch field map after notetype change
* Fix log labels for csv import
* Log notes whose deck/notetype was missing
* Fix hiding of empty log queues
* Implement adding tags to all notes of a csv
* Fix dupe resolution not being set in log
* Implement adding tags to updated notes of a csv
* Check first note field is not empty
* Temporary fix for build on Linux/Mac
* Fix inverted html check (dae)
* Remove unused ftl string
* Delimiter → Separator
* Remove commented-out line
* Don't accept .json files
* Tweak tag ftl strings
* Remove redundant blur call
* Strip sound and add spaces in csv export
* Export HTML by default
* Fix unset deck in Mnemosyne import
Also accept both numbers and strings for notetypes and decks in JSON.
* Make DupeResolution::Update the default
* Fix missing dot in extension
* Make column indices 1-based
* Remove StickContainer from TagEditor
Fixes line breaking, border and z index on ImportCsvPage.
* Assign different key combos to tag editors
* Log all updated duplicates
Add a log field for the true number of found notes.
* Show identical notes as skipped
* Split tag-editor into separate ts module (dae)
* Add progress for CSV export
* Add progress for text import
* Tidy-ups after tag-editor split (dae)
- import-csv no longer depends on editor
- remove some commented lines
252 lines
7 KiB
Python
252 lines
7 KiB
Python
# Copyright: Ankitects Pty Ltd and contributors
|
|
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
|
|
|
"""Serializer for Mnemosyne collections.
|
|
|
|
Some notes about their structure:
|
|
https://github.com/mnemosyne-proj/mnemosyne/blob/master/mnemosyne/libmnemosyne/docs/source/index.rst
|
|
|
|
Anki | Mnemosyne
|
|
----------+-----------
|
|
Note | Fact
|
|
Card Type | Fact View
|
|
Card | Card
|
|
Notetype | Card Type
|
|
"""
|
|
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from dataclasses import dataclass, field
|
|
from typing import Tuple, Type
|
|
|
|
from anki.db import DB
|
|
from anki.decks import DeckId
|
|
from anki.foreign_data import (
|
|
ForeignCard,
|
|
ForeignCardType,
|
|
ForeignData,
|
|
ForeignNote,
|
|
ForeignNotetype,
|
|
)
|
|
|
|
|
|
def serialize(db_path: str, deck_id: DeckId) -> str:
|
|
db = open_mnemosyne_db(db_path)
|
|
return gather_data(db, deck_id).serialize()
|
|
|
|
|
|
def gather_data(db: DB, deck_id: DeckId) -> ForeignData:
|
|
facts = gather_facts(db)
|
|
gather_cards_into_facts(db, facts)
|
|
used_fact_views: dict[Type[MnemoFactView], bool] = {}
|
|
notes = [fact.foreign_note(used_fact_views) for fact in facts.values()]
|
|
notetypes = [fact_view.foreign_notetype() for fact_view in used_fact_views]
|
|
return ForeignData(notes, notetypes, deck_id)
|
|
|
|
|
|
def open_mnemosyne_db(db_path: str) -> DB:
|
|
db = DB(db_path)
|
|
ver = db.scalar("SELECT value FROM global_variables WHERE key='version'")
|
|
if not ver.startswith("Mnemosyne SQL 1") and ver not in ("2", "3"):
|
|
print("Mnemosyne version unknown, trying to import anyway")
|
|
return db
|
|
|
|
|
|
class MnemoFactView(ABC):
|
|
notetype: str
|
|
field_keys: Tuple[str, ...]
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def foreign_notetype(cls) -> ForeignNotetype:
|
|
pass
|
|
|
|
|
|
class FrontOnly(MnemoFactView):
|
|
notetype = "Mnemosyne-FrontOnly"
|
|
field_keys = ("f", "b")
|
|
|
|
@classmethod
|
|
def foreign_notetype(cls) -> ForeignNotetype:
|
|
return ForeignNotetype.basic(cls.notetype)
|
|
|
|
|
|
class FrontBack(MnemoFactView):
|
|
notetype = "Mnemosyne-FrontBack"
|
|
field_keys = ("f", "b")
|
|
|
|
@classmethod
|
|
def foreign_notetype(cls) -> ForeignNotetype:
|
|
return ForeignNotetype.basic_reverse(cls.notetype)
|
|
|
|
|
|
class Vocabulary(MnemoFactView):
|
|
notetype = "Mnemosyne-Vocabulary"
|
|
field_keys = ("f", "p_1", "m_1", "n")
|
|
|
|
@classmethod
|
|
def foreign_notetype(cls) -> ForeignNotetype:
|
|
return ForeignNotetype(
|
|
cls.notetype,
|
|
["Expression", "Pronunciation", "Meaning", "Notes"],
|
|
[cls._recognition_card_type(), cls._production_card_type()],
|
|
)
|
|
|
|
@staticmethod
|
|
def _recognition_card_type() -> ForeignCardType:
|
|
return ForeignCardType(
|
|
name="Recognition",
|
|
qfmt="{{Expression}}",
|
|
afmt="{{Expression}}\n\n<hr id=answer>\n\n{{{{Pronunciation}}}}"
|
|
"<br>\n{{{{Meaning}}}}<br>\n{{{{Notes}}}}",
|
|
)
|
|
|
|
@staticmethod
|
|
def _production_card_type() -> ForeignCardType:
|
|
return ForeignCardType(
|
|
name="Production",
|
|
qfmt="{{Meaning}}",
|
|
afmt="{{Meaning}}\n\n<hr id=answer>\n\n{{{{Expression}}}}"
|
|
"<br>\n{{{{Pronunciation}}}}<br>\n{{{{Notes}}}}",
|
|
)
|
|
|
|
|
|
class Cloze(MnemoFactView):
|
|
notetype = "Mnemosyne-Cloze"
|
|
field_keys = ("text",)
|
|
|
|
@classmethod
|
|
def foreign_notetype(cls) -> ForeignNotetype:
|
|
return ForeignNotetype.cloze(cls.notetype)
|
|
|
|
|
|
@dataclass
|
|
class MnemoCard:
|
|
fact_view_id: str
|
|
tags: str
|
|
next_rep: int
|
|
last_rep: int
|
|
easiness: float
|
|
reps: int
|
|
lapses: int
|
|
|
|
def card_ord(self) -> int:
|
|
ord = self.fact_view_id.rsplit(".", maxsplit=1)[-1]
|
|
try:
|
|
return int(ord) - 1
|
|
except ValueError as err:
|
|
raise Exception(
|
|
f"Fact view id '{self.fact_view_id}' has unknown format"
|
|
) from err
|
|
|
|
def is_new(self) -> bool:
|
|
return self.last_rep == -1
|
|
|
|
def foreign_card(self) -> ForeignCard:
|
|
return ForeignCard(
|
|
ease_factor=self.easiness,
|
|
reps=self.reps,
|
|
lapses=self.lapses,
|
|
interval=self.anki_interval(),
|
|
due=self.next_rep,
|
|
)
|
|
|
|
def anki_interval(self) -> int:
|
|
return max(1, (self.next_rep - self.last_rep) // 86400)
|
|
|
|
|
|
@dataclass
|
|
class MnemoFact:
|
|
id: int
|
|
fields: dict[str, str] = field(default_factory=dict)
|
|
cards: list[MnemoCard] = field(default_factory=list)
|
|
|
|
def foreign_note(
|
|
self, used_fact_views: dict[Type[MnemoFactView], bool]
|
|
) -> ForeignNote:
|
|
fact_view = self.fact_view()
|
|
used_fact_views[fact_view] = True
|
|
return ForeignNote(
|
|
fields=self.anki_fields(fact_view),
|
|
tags=self.anki_tags(),
|
|
notetype=fact_view.notetype,
|
|
cards=self.foreign_cards(),
|
|
)
|
|
|
|
def fact_view(self) -> Type[MnemoFactView]:
|
|
try:
|
|
fact_view = self.cards[0].fact_view_id
|
|
except IndexError as err:
|
|
raise Exception(f"Fact {id} has no cards") from err
|
|
|
|
if fact_view.startswith("1.") or fact_view.startswith("1::"):
|
|
return FrontOnly
|
|
elif fact_view.startswith("2.") or fact_view.startswith("2::"):
|
|
return FrontBack
|
|
elif fact_view.startswith("3.") or fact_view.startswith("3::"):
|
|
return Vocabulary
|
|
elif fact_view.startswith("5.1"):
|
|
return Cloze
|
|
|
|
raise Exception(f"Fact {id} has unknown fact view: {fact_view}")
|
|
|
|
def anki_fields(self, fact_view: Type[MnemoFactView]) -> list[str]:
|
|
return [munge_field(self.fields.get(k, "")) for k in fact_view.field_keys]
|
|
|
|
def anki_tags(self) -> list[str]:
|
|
tags: list[str] = []
|
|
for card in self.cards:
|
|
if not card.tags:
|
|
continue
|
|
tags.extend(
|
|
t.replace(" ", "_").replace("\u3000", "_")
|
|
for t in card.tags.split(", ")
|
|
)
|
|
return tags
|
|
|
|
def foreign_cards(self) -> list[ForeignCard]:
|
|
# generate defaults for new cards
|
|
return [card.foreign_card() for card in self.cards if not card.is_new()]
|
|
|
|
|
|
def munge_field(field: str) -> str:
|
|
# \n -> br
|
|
field = re.sub("\r?\n", "<br>", field)
|
|
# latex differences
|
|
field = re.sub(r"(?i)<(/?(\$|\$\$|latex))>", "[\\1]", field)
|
|
# audio differences
|
|
field = re.sub('<audio src="(.+?)">(</audio>)?', "[sound:\\1]", field)
|
|
return field
|
|
|
|
|
|
def gather_facts(db: DB) -> dict[int, MnemoFact]:
|
|
facts: dict[int, MnemoFact] = {}
|
|
for id, key, value in db.execute(
|
|
"""
|
|
SELECT _id, key, value
|
|
FROM facts, data_for_fact
|
|
WHERE facts._id=data_for_fact._fact_id"""
|
|
):
|
|
if not (fact := facts.get(id)):
|
|
facts[id] = fact = MnemoFact(id)
|
|
fact.fields[key] = value
|
|
return facts
|
|
|
|
|
|
def gather_cards_into_facts(db: DB, facts: dict[int, MnemoFact]) -> None:
|
|
for fact_id, *row in db.execute(
|
|
"""
|
|
SELECT
|
|
_fact_id,
|
|
fact_view_id,
|
|
tags,
|
|
next_rep,
|
|
last_rep,
|
|
easiness,
|
|
acq_reps + ret_reps,
|
|
lapses
|
|
FROM cards"""
|
|
):
|
|
facts[fact_id].cards.append(MnemoCard(*row))
|
|
for fact in facts.values():
|
|
fact.cards.sort(key=lambda c: c.card_ord())
|