# This is an implementation of the Unicode Standard Annex #9
# Unicode bidirectional algorithm - Revision 48 for Unicode 15.1.0
# https://unicode.org/reports/tr9/
import unicodedata
from collections import deque
from dataclasses import dataclass, replace
from operator import itemgetter
from typing import List, Tuple
from .enums import TextDirection
MAX_DEPTH = 125
# BidiBrackets 15.1.0 2023-01-18
# Loaded from https://www.unicode.org/Public/UNIDATA/BidiBrackets.txt
# This table can be dropped when the information is added on "unicodedata"
BIDI_BRACKETS = {
"(": {"pair": ")", "type": "o"},
")": {"pair": "(", "type": "c"},
"[": {"pair": "]", "type": "o"},
"]": {"pair": "[", "type": "c"},
"{": {"pair": "}", "type": "o"},
"}": {"pair": "{", "type": "c"},
"༺": {"pair": "༻", "type": "o"},
"༻": {"pair": "༺", "type": "c"},
"༼": {"pair": "༽", "type": "o"},
"༽": {"pair": "༼", "type": "c"},
"᚛": {"pair": "᚜", "type": "o"},
"᚜": {"pair": "᚛", "type": "c"},
"⁅": {"pair": "⁆", "type": "o"},
"⁆": {"pair": "⁅", "type": "c"},
"⁽": {"pair": "⁾", "type": "o"},
"⁾": {"pair": "⁽", "type": "c"},
"₍": {"pair": "₎", "type": "o"},
"₎": {"pair": "₍", "type": "c"},
"⌈": {"pair": "⌉", "type": "o"},
"⌉": {"pair": "⌈", "type": "c"},
"⌊": {"pair": "⌋", "type": "o"},
"⌋": {"pair": "⌊", "type": "c"},
"〈": {"pair": "〉", "type": "o"},
"〉": {"pair": "〈", "type": "c"},
"❨": {"pair": "❩", "type": "o"},
"❩": {"pair": "❨", "type": "c"},
"❪": {"pair": "❫", "type": "o"},
"❫": {"pair": "❪", "type": "c"},
"❬": {"pair": "❭", "type": "o"},
"❭": {"pair": "❬", "type": "c"},
"❮": {"pair": "❯", "type": "o"},
"❯": {"pair": "❮", "type": "c"},
"❰": {"pair": "❱", "type": "o"},
"❱": {"pair": "❰", "type": "c"},
"❲": {"pair": "❳", "type": "o"},
"❳": {"pair": "❲", "type": "c"},
"❴": {"pair": "❵", "type": "o"},
"❵": {"pair": "❴", "type": "c"},
"⟅": {"pair": "⟆", "type": "o"},
"⟆": {"pair": "⟅", "type": "c"},
"⟦": {"pair": "⟧", "type": "o"},
"⟧": {"pair": "⟦", "type": "c"},
"⟨": {"pair": "⟩", "type": "o"},
"⟩": {"pair": "⟨", "type": "c"},
"⟪": {"pair": "⟫", "type": "o"},
"⟫": {"pair": "⟪", "type": "c"},
"⟬": {"pair": "⟭", "type": "o"},
"⟭": {"pair": "⟬", "type": "c"},
"⟮": {"pair": "⟯", "type": "o"},
"⟯": {"pair": "⟮", "type": "c"},
"⦃": {"pair": "⦄", "type": "o"},
"⦄": {"pair": "⦃", "type": "c"},
"⦅": {"pair": "⦆", "type": "o"},
"⦆": {"pair": "⦅", "type": "c"},
"⦇": {"pair": "⦈", "type": "o"},
"⦈": {"pair": "⦇", "type": "c"},
"⦉": {"pair": "⦊", "type": "o"},
"⦊": {"pair": "⦉", "type": "c"},
"⦋": {"pair": "⦌", "type": "o"},
"⦌": {"pair": "⦋", "type": "c"},
"⦍": {"pair": "⦐", "type": "o"},
"⦎": {"pair": "⦏", "type": "c"},
"⦏": {"pair": "⦎", "type": "o"},
"⦐": {"pair": "⦍", "type": "c"},
"⦑": {"pair": "⦒", "type": "o"},
"⦒": {"pair": "⦑", "type": "c"},
"⦓": {"pair": "⦔", "type": "o"},
"⦔": {"pair": "⦓", "type": "c"},
"⦕": {"pair": "⦖", "type": "o"},
"⦖": {"pair": "⦕", "type": "c"},
"⦗": {"pair": "⦘", "type": "o"},
"⦘": {"pair": "⦗", "type": "c"},
"⧘": {"pair": "⧙", "type": "o"},
"⧙": {"pair": "⧘", "type": "c"},
"⧚": {"pair": "⧛", "type": "o"},
"⧛": {"pair": "⧚", "type": "c"},
"⧼": {"pair": "⧽", "type": "o"},
"⧽": {"pair": "⧼", "type": "c"},
"⸢": {"pair": "⸣", "type": "o"},
"⸣": {"pair": "⸢", "type": "c"},
"⸤": {"pair": "⸥", "type": "o"},
"⸥": {"pair": "⸤", "type": "c"},
"⸦": {"pair": "⸧", "type": "o"},
"⸧": {"pair": "⸦", "type": "c"},
"⸨": {"pair": "⸩", "type": "o"},
"⸩": {"pair": "⸨", "type": "c"},
"⹕": {"pair": "⹖", "type": "o"},
"⹖": {"pair": "⹕", "type": "c"},
"⹗": {"pair": "⹘", "type": "o"},
"⹘": {"pair": "⹗", "type": "c"},
"⹙": {"pair": "⹚", "type": "o"},
"⹚": {"pair": "⹙", "type": "c"},
"⹛": {"pair": "⹜", "type": "o"},
"⹜": {"pair": "⹛", "type": "c"},
"〈": {"pair": "〉", "type": "o"},
"〉": {"pair": "〈", "type": "c"},
"《": {"pair": "》", "type": "o"},
"》": {"pair": "《", "type": "c"},
"「": {"pair": "」", "type": "o"},
"」": {"pair": "「", "type": "c"},
"『": {"pair": "』", "type": "o"},
"』": {"pair": "『", "type": "c"},
"【": {"pair": "】", "type": "o"},
"】": {"pair": "【", "type": "c"},
"〔": {"pair": "〕", "type": "o"},
"〕": {"pair": "〔", "type": "c"},
"〖": {"pair": "〗", "type": "o"},
"〗": {"pair": "〖", "type": "c"},
"〘": {"pair": "〙", "type": "o"},
"〙": {"pair": "〘", "type": "c"},
"〚": {"pair": "〛", "type": "o"},
"〛": {"pair": "〚", "type": "c"},
"﹙": {"pair": "﹚", "type": "o"},
"﹚": {"pair": "﹙", "type": "c"},
"﹛": {"pair": "﹜", "type": "o"},
"﹜": {"pair": "﹛", "type": "c"},
"﹝": {"pair": "﹞", "type": "o"},
"﹞": {"pair": "﹝", "type": "c"},
"(": {"pair": ")", "type": "o"},
")": {"pair": "(", "type": "c"},
"[": {"pair": "]", "type": "o"},
"]": {"pair": "[", "type": "c"},
"{": {"pair": "}", "type": "o"},
"}": {"pair": "{", "type": "c"},
"⦅": {"pair": "⦆", "type": "o"},
"⦆": {"pair": "⦅", "type": "c"},
"「": {"pair": "」", "type": "o"},
"」": {"pair": "「", "type": "c"},
}
class BidiCharacter:
__slots__ = [
"character_index",
"character",
"bidi_class",
"original_bidi_class",
"embedding_level",
"direction",
]
def __init__(
self, character_index: int, character: str, embedding_level: str, debug: bool
):
self.character_index = character_index
self.character = character
if debug and character.isupper():
self.bidi_class = "R"
else:
self.bidi_class = unicodedata.bidirectional(character)
self.original_bidi_class = self.bidi_class
self.embedding_level = embedding_level
self.direction = None
def get_direction_from_level(self):
return "R" if self.embedding_level % 2 else "L"
def set_class(self, cls):
self.bidi_class = cls
def __repr__(self):
return (
f"character_index: {self.character_index} character: {self.character}"
+ f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}"
+ f" embedding_level: {self.embedding_level} direction: {self.direction}"
)
@dataclass
class DirectionalStatus:
__slots__ = [
"embedding_level",
"directional_override_status",
"directional_isolate_status",
]
embedding_level: int # between 0 and MAX_DEPTH
directional_override_status: str # "N" (Neutral), "L" (Left) or "R" (Right)
directional_isolate_status: bool
class IsolatingRun:
__slots__ = ["characters", "previous_direction", "next_direction"]
def __init__(self, characters: List[BidiCharacter], sos: str, eos: str):
self.characters = characters
self.previous_direction = sos
self.next_direction = eos
self.resolve_weak_types()
self.resolve_neutral_types()
self.resolve_implicit_levels()
def resolve_weak_types(self) -> None:
# W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral
# if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise.
# If the NSM is at the start of the isolating run sequence, it will get the type of sos.
for i, bidi_char in enumerate(self.characters):
if bidi_char.bidi_class == "NSM":
if i == 0:
bidi_char.set_class(self.previous_direction)
else:
bidi_char.set_class(
"ON"
if self.characters[i - 1].bidi_class
in ("LRI", "RLI", "FSI", "PDI")
else self.characters[i - 1].bidi_class
)
# W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found.
# If an AL is found, change the type of the European number to Arabic number.
# W3. Change all ALs to R.
last_strong_type = self.previous_direction
for bidi_char in self.characters:
if bidi_char.bidi_class in ("R", "L", "AL"):
last_strong_type = bidi_char.bidi_class
if bidi_char.bidi_class == "AL":
bidi_char.set_class("R")
if bidi_char.bidi_class == "EN" and last_strong_type == "AL":
bidi_char.set_class("AN")
# W4. A single European separator between two European numbers changes to a European number.
# A single common separator between two numbers of the same type changes to that type.
for i, bidi_char in enumerate(self.characters):
if i in (0, len(self.characters) - 1):
continue
if (
bidi_char.bidi_class == "ES"
and self.characters[i - 1].bidi_class == "EN"
and self.characters[i + 1].bidi_class == "EN"
):
bidi_char.set_class("EN")
if (
bidi_char.bidi_class == "CS"
and self.characters[i - 1].bidi_class in ("AN", "EN")
and self.characters[i + 1].bidi_class
== self.characters[i - 1].bidi_class
):
bidi_char.set_class(self.characters[i - 1].bidi_class)
# W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
# W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral.
def prev_is_en(i: int) -> bool:
if i == 0:
return False
if self.characters[i - 1].bidi_class == "ET":
return prev_is_en(i - 1)
return self.characters[i - 1].bidi_class == "EN"
def next_is_en(i: int) -> bool:
if i == len(self.characters) - 1:
return False
if self.characters[i + 1].bidi_class == "ET":
return next_is_en(i + 1)
return self.characters[i + 1].bidi_class == "EN"
for i, bidi_char in enumerate(self.characters):
if bidi_char.bidi_class == "ET":
if prev_is_en(i) or next_is_en(i):
bidi_char.set_class("EN")
if bidi_char.bidi_class in ("ET", "ES", "CS"):
bidi_char.set_class("ON")
# W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found.
# If an L is found, then change the type of the European number to L.
last_strong_type = self.previous_direction
for bidi_char in self.characters:
if bidi_char.bidi_class in ("R", "L", "AL"):
last_strong_type = bidi_char.bidi_class
if bidi_char.bidi_class == "EN" and last_strong_type == "L":
bidi_char.set_class("L")
def pair_brackets(self) -> List[Tuple[int, int]]:
"""
Calculate all the bracket pairs on an isolate run, to be used on rule N0
How to calculate bracket pairs:
- Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14
- BIDI brackets for dummies: https://www.unicode.org/notes/tn39/
"""
open_brackets = []
open_bracket_count = 0
bracket_pairs = []
for index, char in enumerate(self.characters):
if char.character in BIDI_BRACKETS and char.bidi_class == "ON":
if BIDI_BRACKETS[char.character]["type"] == "o":
if open_bracket_count >= 63:
return []
open_brackets.append((char.character, index))
open_bracket_count += 1
if BIDI_BRACKETS[char.character]["type"] == "c":
if open_bracket_count == 0:
continue
for current_open_bracket in range(open_bracket_count, 0, -1):
open_char, open_index = open_brackets[current_open_bracket - 1]
if (BIDI_BRACKETS[open_char]["pair"] == char.character) or (
BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉")
and char.character in ("〉", "〉")
):
bracket_pairs.append((open_index, index))
open_brackets = open_brackets[: current_open_bracket - 1]
open_bracket_count = current_open_bracket - 1
break
return sorted(bracket_pairs, key=itemgetter(0))
def resolve_neutral_types(self) -> None:
def previous_strong(index: int):
if index == 0:
return self.previous_direction
if self.characters[index - 1].bidi_class == "L":
return "L"
if self.characters[index - 1].bidi_class in ("R", "AN", "EN"):
return "R"
return previous_strong(index - 1)
def next_strong(index: int):
if index >= len(self.characters) - 1:
return self.next_direction
if self.characters[index + 1].bidi_class == "L":
return "L"
if self.characters[index + 1].bidi_class in ("R", "AN", "EN"):
return "R"
return next_strong(index + 1)
# N0-N2: Resolving neutral types
# N0
brackets = self.pair_brackets()
if brackets:
embedding_direction = self.characters[0].get_direction_from_level()
for b in brackets:
strong_same_direction = False
strong_opposite_direction = False
resulting_direction = None
for index in range(b[0], b[1]):
if (
self.characters[index].bidi_class == "L"
and embedding_direction == "L"
) or (
self.characters[index].bidi_class in ("R", "AN", "EN")
and embedding_direction == "R"
):
strong_same_direction = True
break
if (
self.characters[index].bidi_class == "L"
and embedding_direction == "R"
) or (
self.characters[index].bidi_class in ("R", "AN", "EN")
and embedding_direction == "L"
):
strong_opposite_direction = True
if strong_same_direction:
resulting_direction = embedding_direction
elif strong_opposite_direction:
opposite_direction = "L" if embedding_direction == "R" else "R"
if previous_strong(b[0]) == opposite_direction:
resulting_direction = opposite_direction
else:
resulting_direction = embedding_direction
if resulting_direction:
self.characters[b[0]].bidi_class = resulting_direction
self.characters[b[1]].bidi_class = resulting_direction
if len(self.characters) > b[1] + 1:
next_char = self.characters[b[1] + 1]
if (
next_char.original_bidi_class == "NSM"
and next_char.bidi_class == "ON"
):
next_char.bidi_class = resulting_direction
for i, bidi_char in enumerate(self.characters):
# N1-N2
if bidi_char.bidi_class in (
"B",
"S",
"WS",
"ON",
"FSI",
"LRI",
"RLI",
"PDI",
):
if previous_strong(i) == next_strong(i):
bidi_char.bidi_class = previous_strong(i)
else:
bidi_char.bidi_class = bidi_char.get_direction_from_level()
def resolve_implicit_levels(self) -> None:
for bidi_char in self.characters:
# I1. For all characters with an even (left-to-right) embedding level,
# those of type R go up one level and those of type AN or EN go up two levels.
if bidi_char.embedding_level % 2 == 0:
if bidi_char.bidi_class == "R":
bidi_char.embedding_level += 1
if bidi_char.bidi_class in ("AN", "EN"):
bidi_char.embedding_level += 2
# I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
else:
if bidi_char.bidi_class in ("L", "EN", "AN"):
bidi_char.embedding_level += 1
def auto_detect_base_direction(
string: str, stop_at_pdi: bool = False, debug: bool = False
) -> TextDirection:
"""
This function applies rules P2 and P3 to detect the direction of a paragraph, retuning
the first strong direction and skipping over isolate sequences.
P1 must be applied before calling this function (breaking into paragraphs)
stop_at_pdi can be set to True to get the direction of a single isolate sequence
"""
# Auto-LTR (standard BIDI) uses the first L/R/AL character, and is LTR if none is found.
isolate = 0
for char in string:
bidi_class = unicodedata.bidirectional(char)
if debug and bidi_class.isupper():
bidi_class = "R"
if bidi_class == "PDI" and isolate == 0 and stop_at_pdi:
return TextDirection.LTR
if bidi_class in ("LRI", "RLI", "FSI"):
isolate += 1
if bidi_class == "PDI" and isolate > 0:
isolate -= 1
if bidi_class in ("R", "AL") and isolate == 0:
return TextDirection.RTL
if bidi_class == "L" and isolate == 0:
return TextDirection.LTR
return TextDirection.LTR
def calculate_isolate_runs(paragraph: List[BidiCharacter]) -> List[IsolatingRun]:
# BD13 and X10
level_run = []
lr = []
lr_embedding_level = paragraph[0].embedding_level
for bidi_char in paragraph:
if bidi_char.embedding_level != lr_embedding_level:
level_run.append(
{"level": lr_embedding_level, "text": lr, "complete": False}
)
lr = []
lr_embedding_level = bidi_char.embedding_level
lr.append(bidi_char)
level_run.append({"level": lr_embedding_level, "text": lr, "complete": False})
def level_to_direction(level: int) -> str:
if level % 2 == 0:
return "L"
return "R"
# compute sos, eos for each level run
for index, lr in enumerate(level_run):
if lr["complete"]:
continue
if index == 0:
sos = level_to_direction(lr["level"])
else:
sos = level_to_direction(max(lr["level"], level_run[index - 1]["level"]))
if index == len(level_run) - 1:
eos = level_to_direction(lr["level"])
else:
if lr["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"):
# X10 - last char is an isolator without matching PDI - set EOS to embedding level
eos = level_to_direction(lr["level"])
else:
eos = level_to_direction(
max(lr["level"], level_run[index + 1]["level"])
)
lr["sos"] = sos
lr["eos"] = eos
# combine levels runs to create isolate runs
isolate_runs = []
for index, lr in enumerate(level_run):
if lr["complete"]:
continue
sos = lr["sos"]
eos = lr["eos"]
ir_chars = lr["text"]
lr["complete"] = True
if lr["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"):
for nlr in level_run[index + 1 :]:
if (
nlr["level"] == lr["level"]
and nlr["text"][0].original_bidi_class == "PDI"
):
lr["text"] += nlr["text"]
nlr["complete"] = True
eos = nlr["eos"]
if nlr["text"][-1].original_bidi_class not in ("LRI", "RLI", "FSI"):
break
isolate_runs.append(IsolatingRun(characters=ir_chars, sos=sos, eos=eos))
return isolate_runs
class BidiParagraph:
__slots__ = (
"text",
"base_direction",
"debug",
"base_embedding_level",
"characters",
)
def __init__(
self, text: str, base_direction: TextDirection = None, debug: bool = False
):
self.text = text
self.base_direction = (
auto_detect_base_direction(self.text, debug)
if not base_direction
else base_direction
)
self.debug = debug
self.base_embedding_level = (
0 if self.base_direction == TextDirection.LTR else 1
) # base level
self.characters: List[BidiCharacter] = []
self.get_bidi_characters()
def get_characters(self) -> List[BidiCharacter]:
return self.characters
def get_characters_with_embedding_level(self) -> List[BidiCharacter]:
# Calculate embedding level for each character after breaking isolating runs.
# Only used on conformance testing
self.reorder_resolved_levels()
return self.characters
def get_reordered_characters(self) -> List[BidiCharacter]:
return self.reorder_resolved_levels()
def get_all(self):
return self.characters, self.reorder_resolved_levels()
def get_reordered_string(self):
"Used for conformance validation"
return "".join(c.character for c in self.reorder_resolved_levels())
def get_bidi_fragments(self):
return self.split_bidi_fragments()
def get_bidi_characters(self) -> List[BidiCharacter]:
# Explicit leves and directions. Rule X1
stack: List[DirectionalStatus] = deque()
current_status = DirectionalStatus(
embedding_level=self.base_embedding_level,
directional_override_status="N",
directional_isolate_status=False,
)
stack.append(replace(current_status))
overflow_isolate_count = 0
overflow_embedding_count = 0
valid_isolate_count = 0
results = []
# Explicit embeddings. Process each character individually applying rules X2 through X8
for index, char in enumerate(self.text):
bidi_char = BidiCharacter(
index, char, current_status.embedding_level, self.debug
)
new_bidi_class = None
if bidi_char.bidi_class == "FSI":
bidi_char.bidi_class = (
"LRI"
if auto_detect_base_direction(
self.text[index + 1 :], stop_at_pdi=True, debug=self.debug
)
== TextDirection.LTR
else "RLI"
)
if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"):
# X2 - X5: calculate explicit embeddings and explicit overrides
if bidi_char.bidi_class[0] == "R":
new_embedding_level = (
current_status.embedding_level + 1
) | 1 # least greater odd
else:
new_embedding_level = (
current_status.embedding_level + 2
) & ~1 # least greater even
if (
bidi_char.bidi_class[2] == "I"
and current_status.directional_override_status != "N"
):
new_bidi_class = current_status.directional_override_status
if (
new_embedding_level <= MAX_DEPTH
and overflow_isolate_count == 0
and overflow_embedding_count == 0
):
current_status.embedding_level = new_embedding_level
current_status.directional_override_status = (
bidi_char.bidi_class[0]
if bidi_char.bidi_class[2] == "O"
else "N"
)
if bidi_char.bidi_class[2] == "I":
valid_isolate_count += 1
current_status.directional_isolate_status = True
else:
current_status.directional_isolate_status = False
stack.append(replace(current_status))
else:
if bidi_char.bidi_class[2] == "I":
overflow_isolate_count += 1
else:
if overflow_isolate_count == 0:
overflow_embedding_count += 1
if bidi_char.bidi_class not in (
"B",
"BN",
"RLE",
"LRE",
"RLO",
"LRO",
"PDF",
"FSI",
"PDI",
): # X6
if current_status.directional_override_status != "N":
new_bidi_class = current_status.directional_override_status
if bidi_char.bidi_class == "PDI": # X6a
if overflow_isolate_count > 0:
overflow_isolate_count -= 1
elif valid_isolate_count > 0:
overflow_embedding_count = 0
while True:
if not stack[-1].directional_isolate_status:
stack.pop()
continue
break
stack.pop()
current_status = replace(stack[-1])
valid_isolate_count -= 1
assert isinstance(current_status, DirectionalStatus)
bidi_char.embedding_level = current_status.embedding_level
if current_status.directional_override_status != "N":
new_bidi_class = current_status.directional_override_status
if bidi_char.bidi_class == "PDF": # X7
if overflow_isolate_count == 0:
if overflow_embedding_count > 0:
overflow_embedding_count -= 1
else:
if (
not current_status.directional_isolate_status
and len(stack) > 1
):
stack.pop()
current_status = replace(stack[-1])
if new_bidi_class:
bidi_char.bidi_class = new_bidi_class
if bidi_char.bidi_class not in (
"RLE",
"LRE",
"RLO",
"LRO",
"PDF",
"BN",
): # X9
if bidi_char.bidi_class == "B":
bidi_char.embedding_level = self.base_embedding_level
elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"):
bidi_char.embedding_level = current_status.embedding_level
results.append(bidi_char)
if not results:
self.characters = []
return
self.characters = results
calculate_isolate_runs(results)
def split_bidi_fragments(self):
bidi_fragments = []
if len(self.characters) == 0:
return ()
current_fragment = ""
current_direction = ""
for c in self.characters:
if c.get_direction_from_level() != current_direction:
if current_fragment:
bidi_fragments.append(
(
current_fragment,
(
TextDirection.RTL
if current_direction == "R"
else TextDirection.LTR
),
)
)
current_fragment = ""
current_direction = c.get_direction_from_level()
current_fragment += c.character
if current_fragment:
bidi_fragments.append(
(
current_fragment,
(
TextDirection.RTL
if current_direction == "R"
else TextDirection.LTR
),
)
)
return tuple(bidi_fragments)
def reorder_resolved_levels(self):
before_separator = True
end_of_line = True
max_level = 0
min_odd_level = 999
for bidi_char in reversed(self.characters):
# Rule L1. Reset the embedding level of segment separators, paragraph separators,
# and any adjacent whitespace.
if bidi_char.original_bidi_class in ("S", "B"):
bidi_char.embedding_level = self.base_embedding_level
before_separator = True
elif bidi_char.original_bidi_class in (
"BN",
"WS",
"FSI",
"LRI",
"RLI",
"PDI",
):
if before_separator or end_of_line:
bidi_char.embedding_level = self.base_embedding_level
else:
before_separator = False
end_of_line = False
if bidi_char.embedding_level > max_level:
max_level = bidi_char.embedding_level
if (
bidi_char.embedding_level % 2 != 0
and bidi_char.embedding_level < min_odd_level
):
min_odd_level = bidi_char.embedding_level
# Rule L2. From the highest level found in the text to the lowest odd level on each line,
# reverse any contiguous sequence of characters that are at that level or higher.
reordered_paragraph = self.characters.copy()
for level in range(max_level, min_odd_level - 1, -1):
temp_results = []
rev = []
for bidi_char in reordered_paragraph:
if bidi_char.embedding_level >= level:
rev.append(bidi_char)
else:
if rev:
rev.reverse()
temp_results += rev
rev = []
temp_results.append(bidi_char)
if rev:
rev.reverse()
temp_results += rev
reordered_paragraph = temp_results
return tuple(reordered_paragraph)