from dataclasses import dataclass from typing import Union @dataclass class SimpleTableCell: x: int y: int w: int h: int content: str = "" def to_dict(self): return { "x": self.x, "y": self.y, "w": self.w, "h": self.h, "content": self.content, } @classmethod def from_table_transformer_cell(cls, tatr_table_cell: dict[str, Union[list[int], str]]): """ Args: tatr_table_cell (dict): Cell in a format returned by Table Transformer model, for example: { "row_nums": [1,2,3], "column_nums": [2], "cell text": "Text inside cell" } """ row_nums = tatr_table_cell.get("row_nums", []) column_nums = tatr_table_cell.get("column_nums", []) if not row_nums: raise ValueError(f'Cell {tatr_table_cell} has missing values under "row_nums" key') if not column_nums: raise ValueError(f'Cell {tatr_table_cell} has missing values under "column_nums" key') return cls( x=min(column_nums), y=min(row_nums), w=len(column_nums), h=len(row_nums), content=tatr_table_cell.get("cell text", ""), )
Memory