Module fpdf.bidi

Functions

def auto_detect_base_direction(string: str, stop_at_pdi: bool = False, debug: bool = False) ‑> TextDirection

This function applies rules P2 and P3 to detect the direction of a paragraph, retuning the first strong direction and skipping over isolate sequences. P1 must be applied before calling this function (breaking into paragraphs) stop_at_pdi can be set to True to get the direction of a single isolate sequence

def calculate_isolate_runs(paragraph: List[BidiCharacter]) ‑> List[IsolatingRun]

Classes

class BidiCharacter (character_index: int, character: str, embedding_level: str, debug: bool)
Expand source code Browse git
class BidiCharacter:
    __slots__ = [
        "character_index",
        "character",
        "bidi_class",
        "original_bidi_class",
        "embedding_level",
        "direction",
    ]

    def __init__(
        self, character_index: int, character: str, embedding_level: str, debug: bool
    ):
        self.character_index = character_index
        self.character = character
        if debug and character.isupper():
            self.bidi_class = "R"
        else:
            self.bidi_class = unicodedata.bidirectional(character)
        self.original_bidi_class = self.bidi_class
        self.embedding_level = embedding_level
        self.direction = None

    def get_direction_from_level(self):
        return "R" if self.embedding_level % 2 else "L"

    def set_class(self, cls):
        self.bidi_class = cls

    def __repr__(self):
        return (
            f"character_index: {self.character_index} character: {self.character}"
            + f" bidi_class: {self.bidi_class} original_bidi_class: {self.original_bidi_class}"
            + f" embedding_level: {self.embedding_level} direction: {self.direction}"
        )

Instance variables

var bidi_class
var character
var character_index
var direction
var embedding_level
var original_bidi_class

Methods

def get_direction_from_level(self)
def set_class(self, cls)
class BidiParagraph (text: str, base_direction: TextDirection = None, debug: bool = False)
Expand source code Browse git
class BidiParagraph:
    __slots__ = (
        "text",
        "base_direction",
        "debug",
        "base_embedding_level",
        "characters",
    )

    def __init__(
        self, text: str, base_direction: TextDirection = None, debug: bool = False
    ):
        self.text = text
        self.base_direction = (
            auto_detect_base_direction(self.text, debug)
            if not base_direction
            else base_direction
        )
        self.debug = debug
        self.base_embedding_level = (
            0 if self.base_direction == TextDirection.LTR else 1
        )  # base level
        self.characters: List[BidiCharacter] = []
        self.get_bidi_characters()

    def get_characters(self) -> List[BidiCharacter]:
        return self.characters

    def get_characters_with_embedding_level(self) -> List[BidiCharacter]:
        # Calculate embedding level for each character after breaking isolating runs.
        # Only used on conformance testing
        self.reorder_resolved_levels()
        return self.characters

    def get_reordered_characters(self) -> List[BidiCharacter]:
        return self.reorder_resolved_levels()

    def get_all(self):
        return self.characters, self.reorder_resolved_levels()

    def get_reordered_string(self):
        "Used for conformance validation"
        return "".join(c.character for c in self.reorder_resolved_levels())

    def get_bidi_fragments(self):
        return self.split_bidi_fragments()

    def get_bidi_characters(self) -> List[BidiCharacter]:
        # Explicit leves and directions. Rule X1

        stack: List[DirectionalStatus] = deque()
        current_status = DirectionalStatus(
            embedding_level=self.base_embedding_level,
            directional_override_status="N",
            directional_isolate_status=False,
        )
        stack.append(replace(current_status))
        overflow_isolate_count = 0
        overflow_embedding_count = 0
        valid_isolate_count = 0
        results = []

        # Explicit embeddings. Process each character individually applying rules X2 through X8
        for index, char in enumerate(self.text):
            bidi_char = BidiCharacter(
                index, char, current_status.embedding_level, self.debug
            )
            new_bidi_class = None

            if bidi_char.bidi_class == "FSI":
                bidi_char.bidi_class = (
                    "LRI"
                    if auto_detect_base_direction(
                        self.text[index + 1 :], stop_at_pdi=True, debug=self.debug
                    )
                    == TextDirection.LTR
                    else "RLI"
                )

            if bidi_char.bidi_class in ("RLE", "LRE", "RLO", "LRO", "RLI", "LRI"):
                # X2 - X5: calculate explicit embeddings and explicit overrides
                if bidi_char.bidi_class[0] == "R":
                    new_embedding_level = (
                        current_status.embedding_level + 1
                    ) | 1  # least greater odd
                else:
                    new_embedding_level = (
                        current_status.embedding_level + 2
                    ) & ~1  # least greater even
                if (
                    bidi_char.bidi_class[2] == "I"
                    and current_status.directional_override_status != "N"
                ):
                    new_bidi_class = current_status.directional_override_status
                if (
                    new_embedding_level <= MAX_DEPTH
                    and overflow_isolate_count == 0
                    and overflow_embedding_count == 0
                ):
                    current_status.embedding_level = new_embedding_level
                    current_status.directional_override_status = (
                        bidi_char.bidi_class[0]
                        if bidi_char.bidi_class[2] == "O"
                        else "N"
                    )
                    if bidi_char.bidi_class[2] == "I":
                        valid_isolate_count += 1
                        current_status.directional_isolate_status = True
                    else:
                        current_status.directional_isolate_status = False
                    stack.append(replace(current_status))
                else:
                    if bidi_char.bidi_class[2] == "I":
                        overflow_isolate_count += 1
                    else:
                        if overflow_isolate_count == 0:
                            overflow_embedding_count += 1

            if bidi_char.bidi_class not in (
                "B",
                "BN",
                "RLE",
                "LRE",
                "RLO",
                "LRO",
                "PDF",
                "FSI",
                "PDI",
            ):  # X6
                if current_status.directional_override_status != "N":
                    new_bidi_class = current_status.directional_override_status

            if bidi_char.bidi_class == "PDI":  # X6a
                if overflow_isolate_count > 0:
                    overflow_isolate_count -= 1
                elif valid_isolate_count > 0:
                    overflow_embedding_count = 0
                    while True:
                        if not stack[-1].directional_isolate_status:
                            stack.pop()
                            continue
                        break
                    stack.pop()
                    current_status = replace(stack[-1])
                    valid_isolate_count -= 1
                assert isinstance(current_status, DirectionalStatus)
                bidi_char.embedding_level = current_status.embedding_level
                if current_status.directional_override_status != "N":
                    new_bidi_class = current_status.directional_override_status

            if bidi_char.bidi_class == "PDF":  # X7
                if overflow_isolate_count == 0:
                    if overflow_embedding_count > 0:
                        overflow_embedding_count -= 1
                    else:
                        if (
                            not current_status.directional_isolate_status
                            and len(stack) > 1
                        ):
                            stack.pop()
                            current_status = replace(stack[-1])

            if new_bidi_class:
                bidi_char.bidi_class = new_bidi_class
            if bidi_char.bidi_class not in (
                "RLE",
                "LRE",
                "RLO",
                "LRO",
                "PDF",
                "BN",
            ):  # X9
                if bidi_char.bidi_class == "B":
                    bidi_char.embedding_level = self.base_embedding_level
                elif bidi_char.original_bidi_class not in ("LRI", "RLI", "FSI"):
                    bidi_char.embedding_level = current_status.embedding_level
                results.append(bidi_char)

        if not results:
            self.characters = []
            return
        self.characters = results
        calculate_isolate_runs(results)

    def split_bidi_fragments(self):
        bidi_fragments = []
        if len(self.characters) == 0:
            return ()
        current_fragment = ""
        current_direction = ""
        for c in self.characters:
            if c.get_direction_from_level() != current_direction:
                if current_fragment:
                    bidi_fragments.append(
                        (
                            current_fragment,
                            (
                                TextDirection.RTL
                                if current_direction == "R"
                                else TextDirection.LTR
                            ),
                        )
                    )
                current_fragment = ""
                current_direction = c.get_direction_from_level()
            current_fragment += c.character
        if current_fragment:
            bidi_fragments.append(
                (
                    current_fragment,
                    (
                        TextDirection.RTL
                        if current_direction == "R"
                        else TextDirection.LTR
                    ),
                )
            )
        return tuple(bidi_fragments)

    def reorder_resolved_levels(self):
        before_separator = True
        end_of_line = True
        max_level = 0
        min_odd_level = 999
        for bidi_char in reversed(self.characters):
            # Rule L1. Reset the embedding level of segment separators, paragraph separators,
            # and any adjacent whitespace.
            if bidi_char.original_bidi_class in ("S", "B"):
                bidi_char.embedding_level = self.base_embedding_level
                before_separator = True
            elif bidi_char.original_bidi_class in (
                "BN",
                "WS",
                "FSI",
                "LRI",
                "RLI",
                "PDI",
            ):
                if before_separator or end_of_line:
                    bidi_char.embedding_level = self.base_embedding_level
            else:
                before_separator = False
                end_of_line = False

            if bidi_char.embedding_level > max_level:
                max_level = bidi_char.embedding_level
            if (
                bidi_char.embedding_level % 2 != 0
                and bidi_char.embedding_level < min_odd_level
            ):
                min_odd_level = bidi_char.embedding_level

        # Rule L2. From the highest level found in the text to the lowest odd level on each line,
        # reverse any contiguous sequence of characters that are at that level or higher.
        reordered_paragraph = self.characters.copy()
        for level in range(max_level, min_odd_level - 1, -1):
            temp_results = []
            rev = []
            for bidi_char in reordered_paragraph:
                if bidi_char.embedding_level >= level:
                    rev.append(bidi_char)
                else:
                    if rev:
                        rev.reverse()
                        temp_results += rev
                        rev = []
                    temp_results.append(bidi_char)
            if rev:
                rev.reverse()
                temp_results += rev
            reordered_paragraph = temp_results
        return tuple(reordered_paragraph)

Instance variables

var base_direction
var base_embedding_level
var characters
var debug
var text

Methods

def get_all(self)
def get_bidi_characters(self) ‑> List[BidiCharacter]
def get_bidi_fragments(self)
def get_characters(self) ‑> List[BidiCharacter]
def get_characters_with_embedding_level(self) ‑> List[BidiCharacter]
def get_reordered_characters(self) ‑> List[BidiCharacter]
def get_reordered_string(self)

Used for conformance validation

def reorder_resolved_levels(self)
def split_bidi_fragments(self)
class DirectionalStatus (embedding_level: int, directional_override_status: str, directional_isolate_status: bool)

DirectionalStatus(embedding_level: int, directional_override_status: str, directional_isolate_status: bool)

Expand source code Browse git
@dataclass
class DirectionalStatus:
    __slots__ = [
        "embedding_level",
        "directional_override_status",
        "directional_isolate_status",
    ]
    embedding_level: int  # between 0 and MAX_DEPTH
    directional_override_status: str  # "N" (Neutral), "L" (Left) or "R" (Right)
    directional_isolate_status: bool

Instance variables

var directional_isolate_status : bool
var directional_override_status : str
var embedding_level : int
class IsolatingRun (characters: List[BidiCharacter], sos: str, eos: str)
Expand source code Browse git
class IsolatingRun:
    __slots__ = ["characters", "previous_direction", "next_direction"]

    def __init__(self, characters: List[BidiCharacter], sos: str, eos: str):
        self.characters = characters
        self.previous_direction = sos
        self.next_direction = eos
        self.resolve_weak_types()
        self.resolve_neutral_types()
        self.resolve_implicit_levels()

    def resolve_weak_types(self) -> None:
        # W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and change the type of the NSM to Other Neutral
        #     if the previous character is an isolate initiator or PDI, and to the type of the previous character otherwise.
        #     If the NSM is at the start of the isolating run sequence, it will get the type of sos.
        for i, bidi_char in enumerate(self.characters):
            if bidi_char.bidi_class == "NSM":
                if i == 0:
                    bidi_char.set_class(self.previous_direction)
                else:
                    bidi_char.set_class(
                        "ON"
                        if self.characters[i - 1].bidi_class
                        in ("LRI", "RLI", "FSI", "PDI")
                        else self.characters[i - 1].bidi_class
                    )

        # W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos) is found.
        #     If an AL is found, change the type of the European number to Arabic number.
        # W3. Change all ALs to R.

        last_strong_type = self.previous_direction
        for bidi_char in self.characters:
            if bidi_char.bidi_class in ("R", "L", "AL"):
                last_strong_type = bidi_char.bidi_class
            if bidi_char.bidi_class == "AL":
                bidi_char.set_class("R")
            if bidi_char.bidi_class == "EN" and last_strong_type == "AL":
                bidi_char.set_class("AN")

        # W4. A single European separator between two European numbers changes to a European number.
        #     A single common separator between two numbers of the same type changes to that type.
        for i, bidi_char in enumerate(self.characters):
            if i in (0, len(self.characters) - 1):
                continue
            if (
                bidi_char.bidi_class == "ES"
                and self.characters[i - 1].bidi_class == "EN"
                and self.characters[i + 1].bidi_class == "EN"
            ):
                bidi_char.set_class("EN")

            if (
                bidi_char.bidi_class == "CS"
                and self.characters[i - 1].bidi_class in ("AN", "EN")
                and self.characters[i + 1].bidi_class
                == self.characters[i - 1].bidi_class
            ):
                bidi_char.set_class(self.characters[i - 1].bidi_class)

        # W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
        # W6. All remaining separators and terminators (after the application of W4 and W5) change to Other Neutral.
        def prev_is_en(i: int) -> bool:
            if i == 0:
                return False
            if self.characters[i - 1].bidi_class == "ET":
                return prev_is_en(i - 1)
            return self.characters[i - 1].bidi_class == "EN"

        def next_is_en(i: int) -> bool:
            if i == len(self.characters) - 1:
                return False
            if self.characters[i + 1].bidi_class == "ET":
                return next_is_en(i + 1)
            return self.characters[i + 1].bidi_class == "EN"

        for i, bidi_char in enumerate(self.characters):
            if bidi_char.bidi_class == "ET":
                if prev_is_en(i) or next_is_en(i):
                    bidi_char.set_class("EN")

            if bidi_char.bidi_class in ("ET", "ES", "CS"):
                bidi_char.set_class("ON")
        # W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found.
        #     If an L is found, then change the type of the European number to L.
        last_strong_type = self.previous_direction
        for bidi_char in self.characters:
            if bidi_char.bidi_class in ("R", "L", "AL"):
                last_strong_type = bidi_char.bidi_class
            if bidi_char.bidi_class == "EN" and last_strong_type == "L":
                bidi_char.set_class("L")

    def pair_brackets(self) -> List[Tuple[int, int]]:
        """
        Calculate all the bracket pairs on an isolate run, to be used on rule N0
        How to calculate bracket pairs:
        - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14
        - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/
        """
        open_brackets = []
        open_bracket_count = 0
        bracket_pairs = []
        for index, char in enumerate(self.characters):
            if char.character in BIDI_BRACKETS and char.bidi_class == "ON":
                if BIDI_BRACKETS[char.character]["type"] == "o":
                    if open_bracket_count >= 63:
                        return []
                    open_brackets.append((char.character, index))
                    open_bracket_count += 1
                if BIDI_BRACKETS[char.character]["type"] == "c":
                    if open_bracket_count == 0:
                        continue
                    for current_open_bracket in range(open_bracket_count, 0, -1):
                        open_char, open_index = open_brackets[current_open_bracket - 1]
                        if (BIDI_BRACKETS[open_char]["pair"] == char.character) or (
                            BIDI_BRACKETS[open_char]["pair"] in ("〉", "〉")
                            and char.character in ("〉", "〉")
                        ):
                            bracket_pairs.append((open_index, index))
                            open_brackets = open_brackets[: current_open_bracket - 1]
                            open_bracket_count = current_open_bracket - 1
                            break
        return sorted(bracket_pairs, key=itemgetter(0))

    def resolve_neutral_types(self) -> None:
        def previous_strong(index: int):
            if index == 0:
                return self.previous_direction
            if self.characters[index - 1].bidi_class == "L":
                return "L"
            if self.characters[index - 1].bidi_class in ("R", "AN", "EN"):
                return "R"
            return previous_strong(index - 1)

        def next_strong(index: int):
            if index >= len(self.characters) - 1:
                return self.next_direction
            if self.characters[index + 1].bidi_class == "L":
                return "L"
            if self.characters[index + 1].bidi_class in ("R", "AN", "EN"):
                return "R"
            return next_strong(index + 1)

        # N0-N2: Resolving neutral types
        # N0
        brackets = self.pair_brackets()
        if brackets:
            embedding_direction = self.characters[0].get_direction_from_level()
            for b in brackets:
                strong_same_direction = False
                strong_opposite_direction = False
                resulting_direction = None
                for index in range(b[0], b[1]):
                    if (
                        self.characters[index].bidi_class == "L"
                        and embedding_direction == "L"
                    ) or (
                        self.characters[index].bidi_class in ("R", "AN", "EN")
                        and embedding_direction == "R"
                    ):
                        strong_same_direction = True
                        break
                    if (
                        self.characters[index].bidi_class == "L"
                        and embedding_direction == "R"
                    ) or (
                        self.characters[index].bidi_class in ("R", "AN", "EN")
                        and embedding_direction == "L"
                    ):
                        strong_opposite_direction = True
                if strong_same_direction:
                    resulting_direction = embedding_direction
                elif strong_opposite_direction:
                    opposite_direction = "L" if embedding_direction == "R" else "R"
                    if previous_strong(b[0]) == opposite_direction:
                        resulting_direction = opposite_direction
                    else:
                        resulting_direction = embedding_direction
                if resulting_direction:
                    self.characters[b[0]].bidi_class = resulting_direction
                    self.characters[b[1]].bidi_class = resulting_direction
                    if len(self.characters) > b[1] + 1:
                        next_char = self.characters[b[1] + 1]
                        if (
                            next_char.original_bidi_class == "NSM"
                            and next_char.bidi_class == "ON"
                        ):
                            next_char.bidi_class = resulting_direction

        for i, bidi_char in enumerate(self.characters):
            # N1-N2
            if bidi_char.bidi_class in (
                "B",
                "S",
                "WS",
                "ON",
                "FSI",
                "LRI",
                "RLI",
                "PDI",
            ):
                if previous_strong(i) == next_strong(i):
                    bidi_char.bidi_class = previous_strong(i)
                else:
                    bidi_char.bidi_class = bidi_char.get_direction_from_level()

    def resolve_implicit_levels(self) -> None:
        for bidi_char in self.characters:
            # I1. For all characters with an even (left-to-right) embedding level,
            #     those of type R go up one level and those of type AN or EN go up two levels.
            if bidi_char.embedding_level % 2 == 0:
                if bidi_char.bidi_class == "R":
                    bidi_char.embedding_level += 1
                if bidi_char.bidi_class in ("AN", "EN"):
                    bidi_char.embedding_level += 2

            # I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
            else:
                if bidi_char.bidi_class in ("L", "EN", "AN"):
                    bidi_char.embedding_level += 1

Instance variables

var characters
var next_direction
var previous_direction

Methods

def pair_brackets(self) ‑> List[Tuple[int, int]]

Calculate all the bracket pairs on an isolate run, to be used on rule N0 How to calculate bracket pairs: - Basic definitions 14, 15 and 16: http://www.unicode.org/reports/tr9/#BD14 - BIDI brackets for dummies: https://www.unicode.org/notes/tn39/

def resolve_implicit_levels(self) ‑> None
def resolve_neutral_types(self) ‑> None
def resolve_weak_types(self) ‑> None