from __future__ import annotations import csv import os from collections import defaultdict from enum import Enum from typing import Dict, List, Union class _CodesColumn(Enum): ID = "Id" PART2B = "Part2b" PART2T = "Part2t" PART1 = "Part1" SCOPE = "Scope" TYPE = "Language_Type" REF_NAME = "Ref_Name" COMMENT = "Comment" class _NameIndexColumn(Enum): ID = "Id" PRINT_NAME = "Print_Name" INVERTED_NAME = "Inverted_Name" class _RetirementsColumn(Enum): ID = "Id" REF_NAME = "Ref_Name" RET_REASON = "Ret_Reason" CHANGE_TO = "Change_To" REMEDY = "Ret_Remedy" EFFECTIVE = "Effective" class _MacrolanguagesColumn(Enum): ID = "I_Id" MID = "M_Id" STATUS = "I_Status" _COLUMN_TYPE = Union[ _CodesColumn, _NameIndexColumn, _RetirementsColumn, _MacrolanguagesColumn ] _ROW_TYPE = Dict[_COLUMN_TYPE, str] class _Table(Enum): CODES = _CodesColumn NAME_INDEX = _NameIndexColumn RETIREMENTS = _RetirementsColumn MACROLANGUAGES = _MacrolanguagesColumn _THIS_DIR = os.path.dirname(__file__) _DATA_TSV_PATHS = { _Table.CODES: os.path.join(_THIS_DIR, "iso-639-3.tab"), _Table.NAME_INDEX: os.path.join(_THIS_DIR, "iso-639-3_Name_Index.tab"), _Table.MACROLANGUAGES: os.path.join(_THIS_DIR, "iso-639-3-macrolanguages.tab"), _Table.RETIREMENTS: os.path.join(_THIS_DIR, "iso-639-3_Retirements.tab"), } def _load_tsv(table: _Table) -> List[_ROW_TYPE]: column = table.value with open(_DATA_TSV_PATHS[table], encoding="utf-8", newline="") as tsv_file: tsv_reader = csv.DictReader(tsv_file, delimiter="\t") return [{column(k): v for k, v in row.items()} for row in tsv_reader] _PART3_TO_CODES: Dict[str, _ROW_TYPE] = { row[_CodesColumn.ID]: row for row in _load_tsv(_Table.CODES) } _part3_to_name_index: defaultdict[str, List[_ROW_TYPE]] = defaultdict(list) for row in _load_tsv(_Table.NAME_INDEX): _part3_to_name_index[row[_NameIndexColumn.ID]].append(row) _PART3_TO_NAME_INDEX: Dict[str, List[_ROW_TYPE]] = dict(_part3_to_name_index) _PART3_TO_RETIREMENTS: Dict[str, _ROW_TYPE] = { row[_RetirementsColumn.ID]: row for row in _load_tsv(_Table.RETIREMENTS) } _PART3_TO_MACROLANGUAGES: Dict[str, _ROW_TYPE] = { row[_MacrolanguagesColumn.ID]: row for row in _load_tsv(_Table.MACROLANGUAGES) } _PART2B_TO_PART3: Dict[str, str] = { row[_CodesColumn.PART2B]: part3 for part3, row in _PART3_TO_CODES.items() if row[_CodesColumn.PART2B] } _PART2T_TO_PART3: Dict[str, str] = { row[_CodesColumn.PART2T]: part3 for part3, row in _PART3_TO_CODES.items() if row[_CodesColumn.PART2T] } _PART1_TO_PART3: Dict[str, str] = { row[_CodesColumn.PART1]: part3 for part3, row in _PART3_TO_CODES.items() if row[_CodesColumn.PART1] } _REF_NAME_TO_PART3: Dict[str, str] = { row[_CodesColumn.REF_NAME]: part3 for part3, row in _PART3_TO_CODES.items() if row[_CodesColumn.REF_NAME] } _PRINT_NAME_TO_PART3: Dict[str, str] = { row[_NameIndexColumn.PRINT_NAME]: part3 for part3, rows in _PART3_TO_NAME_INDEX.items() for row in rows if row[_NameIndexColumn.PRINT_NAME] } _INVERTED_NAME_TO_PART3: Dict[str, str] = { row[_NameIndexColumn.INVERTED_NAME]: part3 for part3, rows in _PART3_TO_NAME_INDEX.items() for row in rows if row[_NameIndexColumn.INVERTED_NAME] }
Memory