Module fpdf.bidi
Functions
def auto_detect_base_direction(string: str, stop_at_pdi: bool = False, debug: bool = False) ‑> TextDirection
-
Expand source code Browse git
def auto_detect_base_direction( string: str, stop_at_pdi: bool = False, debug: bool = False ) -> TextDirection: """ This function applies rules P2 and P3 to detect the direction of a paragraph, retuning the first strong direction and skipping over isolate sequences. P1 must be applied before calling this function (breaking into paragraphs) stop_at_pdi can be set to True to get the direction of a single isolate sequence """ # Auto-LTR (standard BIDI) uses the first L/R/AL character, and is LTR if none is found. isolate = 0 for char in string: bidi_class = unicodedata.bidirectional(char) if debug and bidi_class.isupper(): bidi_class = "R" if bidi_class == "PDI" and isolate == 0 and stop_at_pdi: return TextDirection.LTR if bidi_class in ("LRI", "RLI", "FSI"): isolate += 1 if bidi_class == "PDI" and isolate > 0: isolate -= 1 if bidi_class in ("R", "AL") and isolate == 0: return TextDirection.RTL if bidi_class == "L" and isolate == 0: return TextDirection.LTR return TextDirection.LTR
This function applies rules P2 and P3 to detect the direction of a paragraph, retuning the first strong direction and skipping over isolate sequences. P1 must be applied before calling this function (breaking into paragraphs) stop_at_pdi can be set to True to get the direction of a single isolate sequence
def calculate_isolate_runs(paragraph: List[BidiCharacter]) ‑> List[IsolatingRun]
-
Expand source code Browse git
def calculate_isolate_runs(paragraph: List[BidiCharacter]) -> List[IsolatingRun]: # BD13 and X10 level_run = [] lr = [] lr_embedding_level = paragraph[0].embedding_level for bidi_char in paragraph: if bidi_char.embedding_level != lr_embedding_level: level_run.append( {"level": lr_embedding_level, "text": lr, "complete": False} ) lr = [] lr_embedding_level = bidi_char.embedding_level lr.append(bidi_char) level_run.append({"level": lr_embedding_level, "text": lr, "complete": False}) def level_to_direction(level: int) -> str: if level % 2 == 0: return "L" return "R" # compute sos, eos for each level run for index, lr in enumerate(level_run): if lr["complete"]: continue if index == 0: sos = level_to_direction(lr["level"]) else: sos = level_to_direction(max(lr["level"], level_run[index - 1]["level"])) if index == len(level_run) - 1: eos = level_to_direction(lr["level"]) else: if lr["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"): # X10 - last char is an isolator without matching PDI - set EOS to embedding level eos = level_to_direction(lr["level"]) else: eos = level_to_direction( max(lr["level"], level_run[index + 1]["level"]) ) lr["sos"] = sos lr["eos"] = eos # combine levels runs to create isolate runs isolate_runs = [] for index, lr in enumerate(level_run): if lr["complete"]: continue sos = lr["sos"] eos = lr["eos"] ir_chars = lr["text"] lr["complete"] = True if lr["text"][-1].original_bidi_class in ("LRI", "RLI", "FSI"): for nlr in level_run[index + 1 :]: if ( nlr["level"] == lr["level"] and nlr["text"][0].original_bidi_class == "PDI" ): lr["text"] += nlr["text"] nlr["complete"] = True eos = nlr["eos"] if nlr["text"][-1].original_bidi_class not in ("LRI", "RLI", "FSI"): break isolate_runs.append(IsolatingRun(characters=ir_chars, sos=sos, eos=eos)) return isolate_runs
Classes
class BidiCharacter (character_index: int, character: str, embedding_level: str, debug: bool)
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
Instance variables
var bidi_class
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
var character
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
var character_index
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
var direction
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
var embedding_level
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
var original_bidi_class
-
Expand source code Browse git
class BidiCharacter: __slots__ = [ "character_index", "character", "bidi_class", "original_bidi_class", "embedding_level", "direction", ] def __init__( self, character_index: int, character: str, embedding_level: str, debug: bool ): self.character_index = character_index self.character = character if debug and character.isupper(): self.bidi_class = "R" else: self.bidi_class = unicodedata.bidirectional(character) self.original_bidi_class = self.bidi_class self.embedding_level = embedding_level self.direction = None def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L" def set_class(self, cls): self.bidi_class = cls def __repr__(self): return ( f"character_index: {self.character_index} character: {self.character}" + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}" + f" embedding_level: {self.embedding_level} direction: {self.direction}" )
Methods
def get_direction_from_level(self)
-
Expand source code Browse git
def get_direction_from_level(self): return "R" if self.embedding_level % 2 else "L"
def set_class(self, cls)
-
Expand source code Browse git
def set_class(self, cls): self.bidi_class = cls
class BidiParagraph (text: str,
base_direction: TextDirection = None,
debug: bool = False)-
Expand source code Browse git
class BidiParagraph: __slots__ = ( "text", "base_direction", "debug", "base_embedding_level", "characters", ) def __init__( self, text: str, base_direction: TextDirection = None, debug: bool = False ): self.text = text self.base_direction = ( auto_detect_base_direction(self.text, debug) if not base_direction else base_direction ) self.debug = debug self.base_embedding_level = ( 0 if self.base_direction == TextDirection.LTR else 1 ) # base level self.characters: List[BidiCharacter] = [] self.get_bidi_characters() def get_characters(self) -> List[BidiCharacter]: return self.characters def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels() def get_all(self): return self.characters, self.reorder_resolved_levels() def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels()) def get_bidi_fragments(self): return self.split_bidi_fragments() def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results) def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments) def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
Instance variables
var base_direction
-
Expand source code Browse git
class BidiParagraph: __slots__ = ( "text", "base_direction", "debug", "base_embedding_level", "characters", ) def __init__( self, text: str, base_direction: TextDirection = None, debug: bool = False ): self.text = text self.base_direction = ( auto_detect_base_direction(self.text, debug) if not base_direction else base_direction ) self.debug = debug self.base_embedding_level = ( 0 if self.base_direction == TextDirection.LTR else 1 ) # base level self.characters: List[BidiCharacter] = [] self.get_bidi_characters() def get_characters(self) -> List[BidiCharacter]: return self.characters def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels() def get_all(self): return self.characters, self.reorder_resolved_levels() def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels()) def get_bidi_fragments(self): return self.split_bidi_fragments() def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results) def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments) def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
var base_embedding_level
-
Expand source code Browse git
class BidiParagraph: __slots__ = ( "text", "base_direction", "debug", "base_embedding_level", "characters", ) def __init__( self, text: str, base_direction: TextDirection = None, debug: bool = False ): self.text = text self.base_direction = ( auto_detect_base_direction(self.text, debug) if not base_direction else base_direction ) self.debug = debug self.base_embedding_level = ( 0 if self.base_direction == TextDirection.LTR else 1 ) # base level self.characters: List[BidiCharacter] = [] self.get_bidi_characters() def get_characters(self) -> List[BidiCharacter]: return self.characters def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels() def get_all(self): return self.characters, self.reorder_resolved_levels() def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels()) def get_bidi_fragments(self): return self.split_bidi_fragments() def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results) def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments) def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
var characters
-
Expand source code Browse git
class BidiParagraph: __slots__ = ( "text", "base_direction", "debug", "base_embedding_level", "characters", ) def __init__( self, text: str, base_direction: TextDirection = None, debug: bool = False ): self.text = text self.base_direction = ( auto_detect_base_direction(self.text, debug) if not base_direction else base_direction ) self.debug = debug self.base_embedding_level = ( 0 if self.base_direction == TextDirection.LTR else 1 ) # base level self.characters: List[BidiCharacter] = [] self.get_bidi_characters() def get_characters(self) -> List[BidiCharacter]: return self.characters def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels() def get_all(self): return self.characters, self.reorder_resolved_levels() def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels()) def get_bidi_fragments(self): return self.split_bidi_fragments() def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results) def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments) def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
var debug
-
Expand source code Browse git
class BidiParagraph: __slots__ = ( "text", "base_direction", "debug", "base_embedding_level", "characters", ) def __init__( self, text: str, base_direction: TextDirection = None, debug: bool = False ): self.text = text self.base_direction = ( auto_detect_base_direction(self.text, debug) if not base_direction else base_direction ) self.debug = debug self.base_embedding_level = ( 0 if self.base_direction == TextDirection.LTR else 1 ) # base level self.characters: List[BidiCharacter] = [] self.get_bidi_characters() def get_characters(self) -> List[BidiCharacter]: return self.characters def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels() def get_all(self): return self.characters, self.reorder_resolved_levels() def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels()) def get_bidi_fragments(self): return self.split_bidi_fragments() def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results) def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments) def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
var text
-
Expand source code Browse git
class BidiParagraph: __slots__ = ( "text", "base_direction", "debug", "base_embedding_level", "characters", ) def __init__( self, text: str, base_direction: TextDirection = None, debug: bool = False ): self.text = text self.base_direction = ( auto_detect_base_direction(self.text, debug) if not base_direction else base_direction ) self.debug = debug self.base_embedding_level = ( 0 if self.base_direction == TextDirection.LTR else 1 ) # base level self.characters: List[BidiCharacter] = [] self.get_bidi_characters() def get_characters(self) -> List[BidiCharacter]: return self.characters def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels() def get_all(self): return self.characters, self.reorder_resolved_levels() def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels()) def get_bidi_fragments(self): return self.split_bidi_fragments() def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results) def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments) def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
Methods
def get_all(self)
-
Expand source code Browse git
def get_all(self): return self.characters, self.reorder_resolved_levels()
def get_bidi_characters(self) ‑> List[BidiCharacter]
-
Expand source code Browse git
def get_bidi_characters(self) -> List[BidiCharacter]: # Explicit leves and directions. Rule X1 stack: List[DirectionalStatus] = deque() current_status = DirectionalStatus( embedding_level=self.base_embedding_level, directional_override_status="N", directional_isolate_status=False, ) stack.append(replace(current_status)) overflow_isolate_count = 0 overflow_embedding_count = 0 valid_isolate_count = 0 results = [] # Explicit embeddings. Process each character individually applying rules X2 through X8 for index, char in enumerate(self.text): bidi_char = BidiCharacter( index, char, current_status.embedding_level, self.debug ) new_bidi_class = None if bidi_char.bidi_class == "FSI": bidi_char.bidi_class = ( "LRI" if auto_detect_base_direction( self.text[index + 1 :], stop_at_pdi=True, debug=self.debug ) == TextDirection.LTR else "RLI" ) if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"): # X2 - X5: calculate explicit embeddings and explicit overrides if bidi_char.bidi_class[0] == "R": new_embedding_level = ( current_status.embedding_level + 1 ) | 1 # least greater odd else: new_embedding_level = ( current_status.embedding_level + 2 ) & ~1 # least greater even if ( bidi_char.bidi_class[2] == "I" and current_status.directional_override_status != "N" ): new_bidi_class = current_status.directional_override_status if ( new_embedding_level <= MAX_DEPTH and overflow_isolate_count == 0 and overflow_embedding_count == 0 ): current_status.embedding_level = new_embedding_level current_status.directional_override_status = ( bidi_char.bidi_class[0] if bidi_char.bidi_class[2] == "O" else "N" ) if bidi_char.bidi_class[2] == "I": valid_isolate_count += 1 current_status.directional_isolate_status = True else: current_status.directional_isolate_status = False stack.append(replace(current_status)) else: if bidi_char.bidi_class[2] == "I": overflow_isolate_count += 1 else: if overflow_isolate_count == 0: overflow_embedding_count += 1 if bidi_char.bidi_class not in ( "B", "BN", "RLE", "LRE", "RLO", "LRO", "PDF", "FSI", "PDI", ): # X6 if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDI": # X6a if overflow_isolate_count > 0: overflow_isolate_count -= 1 elif valid_isolate_count > 0: overflow_embedding_count = 0 while True: if not stack[-1].directional_isolate_status: stack.pop() continue break stack.pop() current_status = replace(stack[-1]) valid_isolate_count -= 1 assert isinstance(current_status, DirectionalStatus) bidi_char.embedding_level = current_status.embedding_level if current_status.directional_override_status != "N": new_bidi_class = current_status.directional_override_status if bidi_char.bidi_class == "PDF": # X7 if overflow_isolate_count == 0: if overflow_embedding_count > 0: overflow_embedding_count -= 1 else: if ( not current_status.directional_isolate_status and len(stack) > 1 ): stack.pop() current_status = replace(stack[-1]) if new_bidi_class: bidi_char.bidi_class = new_bidi_class if bidi_char.bidi_class not in ( "RLE", "LRE", "RLO", "LRO", "PDF", "BN", ): # X9 if bidi_char.bidi_class == "B": bidi_char.embedding_level = self.base_embedding_level elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"): bidi_char.embedding_level = current_status.embedding_level results.append(bidi_char) if not results: self.characters = [] return self.characters = results calculate_isolate_runs(results)
def get_bidi_fragments(self)
-
Expand source code Browse git
def get_bidi_fragments(self): return self.split_bidi_fragments()
def get_characters(self) ‑> List[BidiCharacter]
-
Expand source code Browse git
def get_characters(self) -> List[BidiCharacter]: return self.characters
def get_characters_with_embedding_level(self) ‑> List[BidiCharacter]
-
Expand source code Browse git
def get_characters_with_embedding_level(self) -> List[BidiCharacter]: # Calculate embedding level for each character after breaking isolating runs. # Only used on conformance testing self.reorder_resolved_levels() return self.characters
def get_reordered_characters(self) ‑> List[BidiCharacter]
-
Expand source code Browse git
def get_reordered_characters(self) -> List[BidiCharacter]: return self.reorder_resolved_levels()
def get_reordered_string(self)
-
Expand source code Browse git
def get_reordered_string(self): "Used for conformance validation" return "".join(c.character for c in self.reorder_resolved_levels())
Used for conformance validation
def reorder_resolved_levels(self)
-
Expand source code Browse git
def reorder_resolved_levels(self): before_separator = True end_of_line = True max_level = 0 min_odd_level = 999 for bidi_char in reversed(self.characters): # Rule L1. Reset the embedding level of segment separators, paragraph separators, # and any adjacent whitespace. if bidi_char.original_bidi_class in ("S", "B"): bidi_char.embedding_level = self.base_embedding_level before_separator = True elif bidi_char.original_bidi_class in ( "BN", "WS", "FSI", "LRI", "RLI", "PDI", ): if before_separator or end_of_line: bidi_char.embedding_level = self.base_embedding_level else: before_separator = False end_of_line = False if bidi_char.embedding_level > max_level: max_level = bidi_char.embedding_level if ( bidi_char.embedding_level % 2 != 0 and bidi_char.embedding_level < min_odd_level ): min_odd_level = bidi_char.embedding_level # Rule L2. From the highest level found in the text to the lowest odd level on each line, # reverse any contiguous sequence of characters that are at that level or higher. reordered_paragraph = self.characters.copy() for level in range(max_level, min_odd_level - 1, -1): temp_results = [] rev = [] for bidi_char in reordered_paragraph: if bidi_char.embedding_level >= level: rev.append(bidi_char) else: if rev: rev.reverse() temp_results += rev rev = [] temp_results.append(bidi_char) if rev: rev.reverse() temp_results += rev reordered_paragraph = temp_results return tuple(reordered_paragraph)
def split_bidi_fragments(self)
-
Expand source code Browse git
def split_bidi_fragments(self): bidi_fragments = [] if len(self.characters) == 0: return () current_fragment = "" current_direction = "" for c in self.characters: if c.get_direction_from_level() != current_direction: if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) current_fragment = "" current_direction = c.get_direction_from_level() current_fragment += c.character if current_fragment: bidi_fragments.append( ( current_fragment, ( TextDirection.RTL if current_direction == "R" else TextDirection.LTR ), ) ) return tuple(bidi_fragments)
class DirectionalStatus (embedding_level: int,
directional_override_status: str,
directional_isolate_status: bool)-
Expand source code Browse git
@dataclass class DirectionalStatus: __slots__ = [ "embedding_level", "directional_override_status", "directional_isolate_status", ] embedding_level: int # between 0 and MAX_DEPTH directional_override_status: str # "N" (Neutral), "L" (Left) or "R" (Right) directional_isolate_status: bool
DirectionalStatus(embedding_level: int, directional_override_status: str, directional_isolate_status: bool)
Instance variables
var directional_isolate_status : bool
-
Expand source code Browse git
@dataclass class DirectionalStatus: __slots__ = [ "embedding_level", "directional_override_status", "directional_isolate_status", ] embedding_level: int # between 0 and MAX_DEPTH directional_override_status: str # "N" (Neutral), "L" (Left) or "R" (Right) directional_isolate_status: bool
var directional_override_status : str
-
Expand source code Browse git
@dataclass class DirectionalStatus: __slots__ = [ "embedding_level", "directional_override_status", "directional_isolate_status", ] embedding_level: int # between 0 and MAX_DEPTH directional_override_status: str # "N" (Neutral), "L" (Left) or "R" (Right) directional_isolate_status: bool
var embedding_level : int
-
Expand source code Browse git
@dataclass class DirectionalStatus: __slots__ = [ "embedding_level", "directional_override_status", "directional_isolate_status", ] embedding_level: int # between 0 and MAX_DEPTH directional_override_status: str # "N" (Neutral), "L" (Left) or "R" (Right) directional_isolate_status: bool
class IsolatingRun (characters: List[BidiCharacter],
sos: str,
eos: str)-
Expand source code Browse git
class IsolatingRun: __slots__ = ["characters", "previous_direction", "next_direction"] def __init__(self, characters: List[BidiCharacter], sos: str, eos: str): self.characters = characters self.previous_direction = sos self.next_direction = eos self.resolve_weak_types() self.resolve_neutral_types() self.resolve_implicit_levels() def resolve_weak_types(self) -> None: # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral # if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise. # If the NSM is at the start of the isolating run sequence, it will get the type of sos. for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "NSM": if i == 0: bidi_char.set_class(self.previous_direction) else: bidi_char.set_class( "ON" if self.characters[i - 1].bidi_class in ("LRI", "RLI", "FSI", "PDI") else self.characters[i - 1].bidi_class ) # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found. # If an AL is found, change the type of the European number to Arabic number. # W3. Change all ALs to R. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "AL": bidi_char.set_class("R") if bidi_char.bidi_class == "EN" and last_strong_type == "AL": bidi_char.set_class("AN") # W4. A single European separator between two European numbers changes to a European number. # A single common separator between two numbers of the same type changes to that type. for i, bidi_char in enumerate(self.characters): if i in (0, len(self.characters) - 1): continue if ( bidi_char.bidi_class == "ES" and self.characters[i - 1].bidi_class == "EN" and self.characters[i + 1].bidi_class == "EN" ): bidi_char.set_class("EN") if ( bidi_char.bidi_class == "CS" and self.characters[i - 1].bidi_class in ("AN", "EN") and self.characters[i + 1].bidi_class == self.characters[i - 1].bidi_class ): bidi_char.set_class(self.characters[i - 1].bidi_class) # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral. def prev_is_en(i: int) -> bool: if i == 0: return False if self.characters[i - 1].bidi_class == "ET": return prev_is_en(i - 1) return self.characters[i - 1].bidi_class == "EN" def next_is_en(i: int) -> bool: if i == len(self.characters) - 1: return False if self.characters[i + 1].bidi_class == "ET": return next_is_en(i + 1) return self.characters[i + 1].bidi_class == "EN" for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "ET": if prev_is_en(i) or next_is_en(i): bidi_char.set_class("EN") if bidi_char.bidi_class in ("ET", "ES", "CS"): bidi_char.set_class("ON") # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found. # If an L is found, then change the type of the European number to L. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "EN" and last_strong_type == "L": bidi_char.set_class("L") def pair_brackets(self) -> List[Tuple[int, int]]: """ Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/ """ open_brackets = [] open_bracket_count = 0 bracket_pairs = [] for index, char in enumerate(self.characters): if char.character in BIDI_BRACKETS and char.bidi_class == "ON": if BIDI_BRACKETS[char.character]["type"] == "o": if open_bracket_count >= 63: return [] open_brackets.append((char.character, index)) open_bracket_count += 1 if BIDI_BRACKETS[char.character]["type"] == "c": if open_bracket_count == 0: continue for current_open_bracket in range(open_bracket_count, 0, -1): open_char, open_index = open_brackets[current_open_bracket - 1] if (BIDI_BRACKETS[open_char]["pair"] == char.character) or ( BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉") and char.character in ("〉", "〉") ): bracket_pairs.append((open_index, index)) open_brackets = open_brackets[: current_open_bracket - 1] open_bracket_count = current_open_bracket - 1 break return sorted(bracket_pairs, key=itemgetter(0)) def resolve_neutral_types(self) -> None: def previous_strong(index: int): if index == 0: return self.previous_direction if self.characters[index - 1].bidi_class == "L": return "L" if self.characters[index - 1].bidi_class in ("R", "AN", "EN"): return "R" return previous_strong(index - 1) def next_strong(index: int): if index >= len(self.characters) - 1: return self.next_direction if self.characters[index + 1].bidi_class == "L": return "L" if self.characters[index + 1].bidi_class in ("R", "AN", "EN"): return "R" return next_strong(index + 1) # N0-N2: Resolving neutral types # N0 brackets = self.pair_brackets() if brackets: embedding_direction = self.characters[0].get_direction_from_level() for b in brackets: strong_same_direction = False strong_opposite_direction = False resulting_direction = None for index in range(b[0], b[1]): if ( self.characters[index].bidi_class == "L" and embedding_direction == "L" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "R" ): strong_same_direction = True break if ( self.characters[index].bidi_class == "L" and embedding_direction == "R" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "L" ): strong_opposite_direction = True if strong_same_direction: resulting_direction = embedding_direction elif strong_opposite_direction: opposite_direction = "L" if embedding_direction == "R" else "R" if previous_strong(b[0]) == opposite_direction: resulting_direction = opposite_direction else: resulting_direction = embedding_direction if resulting_direction: self.characters[b[0]].bidi_class = resulting_direction self.characters[b[1]].bidi_class = resulting_direction if len(self.characters) > b[1] + 1: next_char = self.characters[b[1] + 1] if ( next_char.original_bidi_class == "NSM" and next_char.bidi_class == "ON" ): next_char.bidi_class = resulting_direction for i, bidi_char in enumerate(self.characters): # N1-N2 if bidi_char.bidi_class in ( "B", "S", "WS", "ON", "FSI", "LRI", "RLI", "PDI", ): if previous_strong(i) == next_strong(i): bidi_char.bidi_class = previous_strong(i) else: bidi_char.bidi_class = bidi_char.get_direction_from_level() def resolve_implicit_levels(self) -> None: for bidi_char in self.characters: # I1. For all characters with an even (left-to-right) embedding level, # those of type R go up one level and those of type AN or EN go up two levels. if bidi_char.embedding_level % 2 == 0: if bidi_char.bidi_class == "R": bidi_char.embedding_level += 1 if bidi_char.bidi_class in ("AN", "EN"): bidi_char.embedding_level += 2 # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level. else: if bidi_char.bidi_class in ("L", "EN", "AN"): bidi_char.embedding_level += 1
Instance variables
var characters
-
Expand source code Browse git
class IsolatingRun: __slots__ = ["characters", "previous_direction", "next_direction"] def __init__(self, characters: List[BidiCharacter], sos: str, eos: str): self.characters = characters self.previous_direction = sos self.next_direction = eos self.resolve_weak_types() self.resolve_neutral_types() self.resolve_implicit_levels() def resolve_weak_types(self) -> None: # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral # if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise. # If the NSM is at the start of the isolating run sequence, it will get the type of sos. for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "NSM": if i == 0: bidi_char.set_class(self.previous_direction) else: bidi_char.set_class( "ON" if self.characters[i - 1].bidi_class in ("LRI", "RLI", "FSI", "PDI") else self.characters[i - 1].bidi_class ) # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found. # If an AL is found, change the type of the European number to Arabic number. # W3. Change all ALs to R. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "AL": bidi_char.set_class("R") if bidi_char.bidi_class == "EN" and last_strong_type == "AL": bidi_char.set_class("AN") # W4. A single European separator between two European numbers changes to a European number. # A single common separator between two numbers of the same type changes to that type. for i, bidi_char in enumerate(self.characters): if i in (0, len(self.characters) - 1): continue if ( bidi_char.bidi_class == "ES" and self.characters[i - 1].bidi_class == "EN" and self.characters[i + 1].bidi_class == "EN" ): bidi_char.set_class("EN") if ( bidi_char.bidi_class == "CS" and self.characters[i - 1].bidi_class in ("AN", "EN") and self.characters[i + 1].bidi_class == self.characters[i - 1].bidi_class ): bidi_char.set_class(self.characters[i - 1].bidi_class) # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral. def prev_is_en(i: int) -> bool: if i == 0: return False if self.characters[i - 1].bidi_class == "ET": return prev_is_en(i - 1) return self.characters[i - 1].bidi_class == "EN" def next_is_en(i: int) -> bool: if i == len(self.characters) - 1: return False if self.characters[i + 1].bidi_class == "ET": return next_is_en(i + 1) return self.characters[i + 1].bidi_class == "EN" for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "ET": if prev_is_en(i) or next_is_en(i): bidi_char.set_class("EN") if bidi_char.bidi_class in ("ET", "ES", "CS"): bidi_char.set_class("ON") # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found. # If an L is found, then change the type of the European number to L. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "EN" and last_strong_type == "L": bidi_char.set_class("L") def pair_brackets(self) -> List[Tuple[int, int]]: """ Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/ """ open_brackets = [] open_bracket_count = 0 bracket_pairs = [] for index, char in enumerate(self.characters): if char.character in BIDI_BRACKETS and char.bidi_class == "ON": if BIDI_BRACKETS[char.character]["type"] == "o": if open_bracket_count >= 63: return [] open_brackets.append((char.character, index)) open_bracket_count += 1 if BIDI_BRACKETS[char.character]["type"] == "c": if open_bracket_count == 0: continue for current_open_bracket in range(open_bracket_count, 0, -1): open_char, open_index = open_brackets[current_open_bracket - 1] if (BIDI_BRACKETS[open_char]["pair"] == char.character) or ( BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉") and char.character in ("〉", "〉") ): bracket_pairs.append((open_index, index)) open_brackets = open_brackets[: current_open_bracket - 1] open_bracket_count = current_open_bracket - 1 break return sorted(bracket_pairs, key=itemgetter(0)) def resolve_neutral_types(self) -> None: def previous_strong(index: int): if index == 0: return self.previous_direction if self.characters[index - 1].bidi_class == "L": return "L" if self.characters[index - 1].bidi_class in ("R", "AN", "EN"): return "R" return previous_strong(index - 1) def next_strong(index: int): if index >= len(self.characters) - 1: return self.next_direction if self.characters[index + 1].bidi_class == "L": return "L" if self.characters[index + 1].bidi_class in ("R", "AN", "EN"): return "R" return next_strong(index + 1) # N0-N2: Resolving neutral types # N0 brackets = self.pair_brackets() if brackets: embedding_direction = self.characters[0].get_direction_from_level() for b in brackets: strong_same_direction = False strong_opposite_direction = False resulting_direction = None for index in range(b[0], b[1]): if ( self.characters[index].bidi_class == "L" and embedding_direction == "L" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "R" ): strong_same_direction = True break if ( self.characters[index].bidi_class == "L" and embedding_direction == "R" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "L" ): strong_opposite_direction = True if strong_same_direction: resulting_direction = embedding_direction elif strong_opposite_direction: opposite_direction = "L" if embedding_direction == "R" else "R" if previous_strong(b[0]) == opposite_direction: resulting_direction = opposite_direction else: resulting_direction = embedding_direction if resulting_direction: self.characters[b[0]].bidi_class = resulting_direction self.characters[b[1]].bidi_class = resulting_direction if len(self.characters) > b[1] + 1: next_char = self.characters[b[1] + 1] if ( next_char.original_bidi_class == "NSM" and next_char.bidi_class == "ON" ): next_char.bidi_class = resulting_direction for i, bidi_char in enumerate(self.characters): # N1-N2 if bidi_char.bidi_class in ( "B", "S", "WS", "ON", "FSI", "LRI", "RLI", "PDI", ): if previous_strong(i) == next_strong(i): bidi_char.bidi_class = previous_strong(i) else: bidi_char.bidi_class = bidi_char.get_direction_from_level() def resolve_implicit_levels(self) -> None: for bidi_char in self.characters: # I1. For all characters with an even (left-to-right) embedding level, # those of type R go up one level and those of type AN or EN go up two levels. if bidi_char.embedding_level % 2 == 0: if bidi_char.bidi_class == "R": bidi_char.embedding_level += 1 if bidi_char.bidi_class in ("AN", "EN"): bidi_char.embedding_level += 2 # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level. else: if bidi_char.bidi_class in ("L", "EN", "AN"): bidi_char.embedding_level += 1
var next_direction
-
Expand source code Browse git
class IsolatingRun: __slots__ = ["characters", "previous_direction", "next_direction"] def __init__(self, characters: List[BidiCharacter], sos: str, eos: str): self.characters = characters self.previous_direction = sos self.next_direction = eos self.resolve_weak_types() self.resolve_neutral_types() self.resolve_implicit_levels() def resolve_weak_types(self) -> None: # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral # if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise. # If the NSM is at the start of the isolating run sequence, it will get the type of sos. for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "NSM": if i == 0: bidi_char.set_class(self.previous_direction) else: bidi_char.set_class( "ON" if self.characters[i - 1].bidi_class in ("LRI", "RLI", "FSI", "PDI") else self.characters[i - 1].bidi_class ) # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found. # If an AL is found, change the type of the European number to Arabic number. # W3. Change all ALs to R. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "AL": bidi_char.set_class("R") if bidi_char.bidi_class == "EN" and last_strong_type == "AL": bidi_char.set_class("AN") # W4. A single European separator between two European numbers changes to a European number. # A single common separator between two numbers of the same type changes to that type. for i, bidi_char in enumerate(self.characters): if i in (0, len(self.characters) - 1): continue if ( bidi_char.bidi_class == "ES" and self.characters[i - 1].bidi_class == "EN" and self.characters[i + 1].bidi_class == "EN" ): bidi_char.set_class("EN") if ( bidi_char.bidi_class == "CS" and self.characters[i - 1].bidi_class in ("AN", "EN") and self.characters[i + 1].bidi_class == self.characters[i - 1].bidi_class ): bidi_char.set_class(self.characters[i - 1].bidi_class) # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral. def prev_is_en(i: int) -> bool: if i == 0: return False if self.characters[i - 1].bidi_class == "ET": return prev_is_en(i - 1) return self.characters[i - 1].bidi_class == "EN" def next_is_en(i: int) -> bool: if i == len(self.characters) - 1: return False if self.characters[i + 1].bidi_class == "ET": return next_is_en(i + 1) return self.characters[i + 1].bidi_class == "EN" for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "ET": if prev_is_en(i) or next_is_en(i): bidi_char.set_class("EN") if bidi_char.bidi_class in ("ET", "ES", "CS"): bidi_char.set_class("ON") # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found. # If an L is found, then change the type of the European number to L. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "EN" and last_strong_type == "L": bidi_char.set_class("L") def pair_brackets(self) -> List[Tuple[int, int]]: """ Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/ """ open_brackets = [] open_bracket_count = 0 bracket_pairs = [] for index, char in enumerate(self.characters): if char.character in BIDI_BRACKETS and char.bidi_class == "ON": if BIDI_BRACKETS[char.character]["type"] == "o": if open_bracket_count >= 63: return [] open_brackets.append((char.character, index)) open_bracket_count += 1 if BIDI_BRACKETS[char.character]["type"] == "c": if open_bracket_count == 0: continue for current_open_bracket in range(open_bracket_count, 0, -1): open_char, open_index = open_brackets[current_open_bracket - 1] if (BIDI_BRACKETS[open_char]["pair"] == char.character) or ( BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉") and char.character in ("〉", "〉") ): bracket_pairs.append((open_index, index)) open_brackets = open_brackets[: current_open_bracket - 1] open_bracket_count = current_open_bracket - 1 break return sorted(bracket_pairs, key=itemgetter(0)) def resolve_neutral_types(self) -> None: def previous_strong(index: int): if index == 0: return self.previous_direction if self.characters[index - 1].bidi_class == "L": return "L" if self.characters[index - 1].bidi_class in ("R", "AN", "EN"): return "R" return previous_strong(index - 1) def next_strong(index: int): if index >= len(self.characters) - 1: return self.next_direction if self.characters[index + 1].bidi_class == "L": return "L" if self.characters[index + 1].bidi_class in ("R", "AN", "EN"): return "R" return next_strong(index + 1) # N0-N2: Resolving neutral types # N0 brackets = self.pair_brackets() if brackets: embedding_direction = self.characters[0].get_direction_from_level() for b in brackets: strong_same_direction = False strong_opposite_direction = False resulting_direction = None for index in range(b[0], b[1]): if ( self.characters[index].bidi_class == "L" and embedding_direction == "L" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "R" ): strong_same_direction = True break if ( self.characters[index].bidi_class == "L" and embedding_direction == "R" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "L" ): strong_opposite_direction = True if strong_same_direction: resulting_direction = embedding_direction elif strong_opposite_direction: opposite_direction = "L" if embedding_direction == "R" else "R" if previous_strong(b[0]) == opposite_direction: resulting_direction = opposite_direction else: resulting_direction = embedding_direction if resulting_direction: self.characters[b[0]].bidi_class = resulting_direction self.characters[b[1]].bidi_class = resulting_direction if len(self.characters) > b[1] + 1: next_char = self.characters[b[1] + 1] if ( next_char.original_bidi_class == "NSM" and next_char.bidi_class == "ON" ): next_char.bidi_class = resulting_direction for i, bidi_char in enumerate(self.characters): # N1-N2 if bidi_char.bidi_class in ( "B", "S", "WS", "ON", "FSI", "LRI", "RLI", "PDI", ): if previous_strong(i) == next_strong(i): bidi_char.bidi_class = previous_strong(i) else: bidi_char.bidi_class = bidi_char.get_direction_from_level() def resolve_implicit_levels(self) -> None: for bidi_char in self.characters: # I1. For all characters with an even (left-to-right) embedding level, # those of type R go up one level and those of type AN or EN go up two levels. if bidi_char.embedding_level % 2 == 0: if bidi_char.bidi_class == "R": bidi_char.embedding_level += 1 if bidi_char.bidi_class in ("AN", "EN"): bidi_char.embedding_level += 2 # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level. else: if bidi_char.bidi_class in ("L", "EN", "AN"): bidi_char.embedding_level += 1
var previous_direction
-
Expand source code Browse git
class IsolatingRun: __slots__ = ["characters", "previous_direction", "next_direction"] def __init__(self, characters: List[BidiCharacter], sos: str, eos: str): self.characters = characters self.previous_direction = sos self.next_direction = eos self.resolve_weak_types() self.resolve_neutral_types() self.resolve_implicit_levels() def resolve_weak_types(self) -> None: # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral # if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise. # If the NSM is at the start of the isolating run sequence, it will get the type of sos. for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "NSM": if i == 0: bidi_char.set_class(self.previous_direction) else: bidi_char.set_class( "ON" if self.characters[i - 1].bidi_class in ("LRI", "RLI", "FSI", "PDI") else self.characters[i - 1].bidi_class ) # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found. # If an AL is found, change the type of the European number to Arabic number. # W3. Change all ALs to R. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "AL": bidi_char.set_class("R") if bidi_char.bidi_class == "EN" and last_strong_type == "AL": bidi_char.set_class("AN") # W4. A single European separator between two European numbers changes to a European number. # A single common separator between two numbers of the same type changes to that type. for i, bidi_char in enumerate(self.characters): if i in (0, len(self.characters) - 1): continue if ( bidi_char.bidi_class == "ES" and self.characters[i - 1].bidi_class == "EN" and self.characters[i + 1].bidi_class == "EN" ): bidi_char.set_class("EN") if ( bidi_char.bidi_class == "CS" and self.characters[i - 1].bidi_class in ("AN", "EN") and self.characters[i + 1].bidi_class == self.characters[i - 1].bidi_class ): bidi_char.set_class(self.characters[i - 1].bidi_class) # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral. def prev_is_en(i: int) -> bool: if i == 0: return False if self.characters[i - 1].bidi_class == "ET": return prev_is_en(i - 1) return self.characters[i - 1].bidi_class == "EN" def next_is_en(i: int) -> bool: if i == len(self.characters) - 1: return False if self.characters[i + 1].bidi_class == "ET": return next_is_en(i + 1) return self.characters[i + 1].bidi_class == "EN" for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "ET": if prev_is_en(i) or next_is_en(i): bidi_char.set_class("EN") if bidi_char.bidi_class in ("ET", "ES", "CS"): bidi_char.set_class("ON") # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found. # If an L is found, then change the type of the European number to L. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "EN" and last_strong_type == "L": bidi_char.set_class("L") def pair_brackets(self) -> List[Tuple[int, int]]: """ Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/ """ open_brackets = [] open_bracket_count = 0 bracket_pairs = [] for index, char in enumerate(self.characters): if char.character in BIDI_BRACKETS and char.bidi_class == "ON": if BIDI_BRACKETS[char.character]["type"] == "o": if open_bracket_count >= 63: return [] open_brackets.append((char.character, index)) open_bracket_count += 1 if BIDI_BRACKETS[char.character]["type"] == "c": if open_bracket_count == 0: continue for current_open_bracket in range(open_bracket_count, 0, -1): open_char, open_index = open_brackets[current_open_bracket - 1] if (BIDI_BRACKETS[open_char]["pair"] == char.character) or ( BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉") and char.character in ("〉", "〉") ): bracket_pairs.append((open_index, index)) open_brackets = open_brackets[: current_open_bracket - 1] open_bracket_count = current_open_bracket - 1 break return sorted(bracket_pairs, key=itemgetter(0)) def resolve_neutral_types(self) -> None: def previous_strong(index: int): if index == 0: return self.previous_direction if self.characters[index - 1].bidi_class == "L": return "L" if self.characters[index - 1].bidi_class in ("R", "AN", "EN"): return "R" return previous_strong(index - 1) def next_strong(index: int): if index >= len(self.characters) - 1: return self.next_direction if self.characters[index + 1].bidi_class == "L": return "L" if self.characters[index + 1].bidi_class in ("R", "AN", "EN"): return "R" return next_strong(index + 1) # N0-N2: Resolving neutral types # N0 brackets = self.pair_brackets() if brackets: embedding_direction = self.characters[0].get_direction_from_level() for b in brackets: strong_same_direction = False strong_opposite_direction = False resulting_direction = None for index in range(b[0], b[1]): if ( self.characters[index].bidi_class == "L" and embedding_direction == "L" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "R" ): strong_same_direction = True break if ( self.characters[index].bidi_class == "L" and embedding_direction == "R" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "L" ): strong_opposite_direction = True if strong_same_direction: resulting_direction = embedding_direction elif strong_opposite_direction: opposite_direction = "L" if embedding_direction == "R" else "R" if previous_strong(b[0]) == opposite_direction: resulting_direction = opposite_direction else: resulting_direction = embedding_direction if resulting_direction: self.characters[b[0]].bidi_class = resulting_direction self.characters[b[1]].bidi_class = resulting_direction if len(self.characters) > b[1] + 1: next_char = self.characters[b[1] + 1] if ( next_char.original_bidi_class == "NSM" and next_char.bidi_class == "ON" ): next_char.bidi_class = resulting_direction for i, bidi_char in enumerate(self.characters): # N1-N2 if bidi_char.bidi_class in ( "B", "S", "WS", "ON", "FSI", "LRI", "RLI", "PDI", ): if previous_strong(i) == next_strong(i): bidi_char.bidi_class = previous_strong(i) else: bidi_char.bidi_class = bidi_char.get_direction_from_level() def resolve_implicit_levels(self) -> None: for bidi_char in self.characters: # I1. For all characters with an even (left-to-right) embedding level, # those of type R go up one level and those of type AN or EN go up two levels. if bidi_char.embedding_level % 2 == 0: if bidi_char.bidi_class == "R": bidi_char.embedding_level += 1 if bidi_char.bidi_class in ("AN", "EN"): bidi_char.embedding_level += 2 # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level. else: if bidi_char.bidi_class in ("L", "EN", "AN"): bidi_char.embedding_level += 1
Methods
def pair_brackets(self) ‑> List[Tuple[int, int]]
-
Expand source code Browse git
def pair_brackets(self) -> List[Tuple[int, int]]: """ Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/ """ open_brackets = [] open_bracket_count = 0 bracket_pairs = [] for index, char in enumerate(self.characters): if char.character in BIDI_BRACKETS and char.bidi_class == "ON": if BIDI_BRACKETS[char.character]["type"] == "o": if open_bracket_count >= 63: return [] open_brackets.append((char.character, index)) open_bracket_count += 1 if BIDI_BRACKETS[char.character]["type"] == "c": if open_bracket_count == 0: continue for current_open_bracket in range(open_bracket_count, 0, -1): open_char, open_index = open_brackets[current_open_bracket - 1] if (BIDI_BRACKETS[open_char]["pair"] == char.character) or ( BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉") and char.character in ("〉", "〉") ): bracket_pairs.append((open_index, index)) open_brackets = open_brackets[: current_open_bracket - 1] open_bracket_count = current_open_bracket - 1 break return sorted(bracket_pairs, key=itemgetter(0))
Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/
def resolve_implicit_levels(self) ‑> None
-
Expand source code Browse git
def resolve_implicit_levels(self) -> None: for bidi_char in self.characters: # I1. For all characters with an even (left-to-right) embedding level, # those of type R go up one level and those of type AN or EN go up two levels. if bidi_char.embedding_level % 2 == 0: if bidi_char.bidi_class == "R": bidi_char.embedding_level += 1 if bidi_char.bidi_class in ("AN", "EN"): bidi_char.embedding_level += 2 # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level. else: if bidi_char.bidi_class in ("L", "EN", "AN"): bidi_char.embedding_level += 1
def resolve_neutral_types(self) ‑> None
-
Expand source code Browse git
def resolve_neutral_types(self) -> None: def previous_strong(index: int): if index == 0: return self.previous_direction if self.characters[index - 1].bidi_class == "L": return "L" if self.characters[index - 1].bidi_class in ("R", "AN", "EN"): return "R" return previous_strong(index - 1) def next_strong(index: int): if index >= len(self.characters) - 1: return self.next_direction if self.characters[index + 1].bidi_class == "L": return "L" if self.characters[index + 1].bidi_class in ("R", "AN", "EN"): return "R" return next_strong(index + 1) # N0-N2: Resolving neutral types # N0 brackets = self.pair_brackets() if brackets: embedding_direction = self.characters[0].get_direction_from_level() for b in brackets: strong_same_direction = False strong_opposite_direction = False resulting_direction = None for index in range(b[0], b[1]): if ( self.characters[index].bidi_class == "L" and embedding_direction == "L" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "R" ): strong_same_direction = True break if ( self.characters[index].bidi_class == "L" and embedding_direction == "R" ) or ( self.characters[index].bidi_class in ("R", "AN", "EN") and embedding_direction == "L" ): strong_opposite_direction = True if strong_same_direction: resulting_direction = embedding_direction elif strong_opposite_direction: opposite_direction = "L" if embedding_direction == "R" else "R" if previous_strong(b[0]) == opposite_direction: resulting_direction = opposite_direction else: resulting_direction = embedding_direction if resulting_direction: self.characters[b[0]].bidi_class = resulting_direction self.characters[b[1]].bidi_class = resulting_direction if len(self.characters) > b[1] + 1: next_char = self.characters[b[1] + 1] if ( next_char.original_bidi_class == "NSM" and next_char.bidi_class == "ON" ): next_char.bidi_class = resulting_direction for i, bidi_char in enumerate(self.characters): # N1-N2 if bidi_char.bidi_class in ( "B", "S", "WS", "ON", "FSI", "LRI", "RLI", "PDI", ): if previous_strong(i) == next_strong(i): bidi_char.bidi_class = previous_strong(i) else: bidi_char.bidi_class = bidi_char.get_direction_from_level()
def resolve_weak_types(self) ‑> None
-
Expand source code Browse git
def resolve_weak_types(self) -> None: # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral # if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise. # If the NSM is at the start of the isolating run sequence, it will get the type of sos. for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "NSM": if i == 0: bidi_char.set_class(self.previous_direction) else: bidi_char.set_class( "ON" if self.characters[i - 1].bidi_class in ("LRI", "RLI", "FSI", "PDI") else self.characters[i - 1].bidi_class ) # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found. # If an AL is found, change the type of the European number to Arabic number. # W3. Change all ALs to R. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "AL": bidi_char.set_class("R") if bidi_char.bidi_class == "EN" and last_strong_type == "AL": bidi_char.set_class("AN") # W4. A single European separator between two European numbers changes to a European number. # A single common separator between two numbers of the same type changes to that type. for i, bidi_char in enumerate(self.characters): if i in (0, len(self.characters) - 1): continue if ( bidi_char.bidi_class == "ES" and self.characters[i - 1].bidi_class == "EN" and self.characters[i + 1].bidi_class == "EN" ): bidi_char.set_class("EN") if ( bidi_char.bidi_class == "CS" and self.characters[i - 1].bidi_class in ("AN", "EN") and self.characters[i + 1].bidi_class == self.characters[i - 1].bidi_class ): bidi_char.set_class(self.characters[i - 1].bidi_class) # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral. def prev_is_en(i: int) -> bool: if i == 0: return False if self.characters[i - 1].bidi_class == "ET": return prev_is_en(i - 1) return self.characters[i - 1].bidi_class == "EN" def next_is_en(i: int) -> bool: if i == len(self.characters) - 1: return False if self.characters[i + 1].bidi_class == "ET": return next_is_en(i + 1) return self.characters[i + 1].bidi_class == "EN" for i, bidi_char in enumerate(self.characters): if bidi_char.bidi_class == "ET": if prev_is_en(i) or next_is_en(i): bidi_char.set_class("EN") if bidi_char.bidi_class in ("ET", "ES", "CS"): bidi_char.set_class("ON") # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found. # If an L is found, then change the type of the European number to L. last_strong_type = self.previous_direction for bidi_char in self.characters: if bidi_char.bidi_class in ("R", "L", "AL"): last_strong_type = bidi_char.bidi_class if bidi_char.bidi_class == "EN" and last_strong_type == "L": bidi_char.set_class("L")