Module fpdf.image_parsing

Functions

def ccitt_payload_location_from_pil(img)
Expand source code Browse git
def ccitt_payload_location_from_pil(img):
    """
    returns the byte offset and length of the CCITT payload in the original TIFF data
    """
    # assert(img.info["compression"] == "group4")

    # Read the TIFF tags to find the offset(s) of the compressed data strips.
    strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS]
    strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]

    # PIL always seems to create a single strip even for very large TIFFs when
    # it saves images, so assume we only have to read a single strip.
    # A test ~10 GPixel image was still encoded as a single strip. Just to be
    # safe check throw an error if there is more than one offset.
    if len(strip_offsets) != 1 or len(strip_bytes) != 1:
        raise NotImplementedError(
            "Transcoding multiple strips not supported by the PDF format"
        )

    (offset,), (length,) = strip_offsets, strip_bytes

    return offset, length

returns the byte offset and length of the CCITT payload in the original TIFF data

def clear_table()
Expand source code Browse git
def clear_table():
    """
    Reset the encoding table and coding state to initial conditions.

    """

    table = {bytes([i]): i for i in range(256)}
    next_code = LZW_EOD_MARKER + 1
    bits_per_code = LZW_INITIAL_BITS_PER_CODE
    max_code_value = (1 << bits_per_code) - 1
    return table, next_code, bits_per_code, max_code_value

Reset the encoding table and coding state to initial conditions.

def get_img_info(filename, img=None, image_filter='AUTO', dims=None)
Expand source code Browse git
def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
    """
    Args:
        filename: in a format that can be passed to load_image
        img: optional `bytes`, `BytesIO` or `PIL.Image.Image` instance
        image_filter (str): one of the SUPPORTED_IMAGE_FILTERS
    """
    if Image is None:
        raise EnvironmentError("Pillow not available - fpdf2 cannot insert images")

    is_pil_img = True
    keep_bytes_io_open = False
    # Flag to check whether a cmyk image is jpeg or not, if set to True the decode array
    # is inverted in output.py
    jpeg_inverted = False
    img_raw_data = None
    if not img or isinstance(img, (Path, str)):
        img_raw_data = load_image(filename)
        img = Image.open(img_raw_data)
        is_pil_img = False
    elif not isinstance(img, Image.Image):
        keep_bytes_io_open = isinstance(img, BytesIO)
        img_raw_data = BytesIO(img) if isinstance(img, bytes) else img
        img = Image.open(img_raw_data)
        is_pil_img = False

    img_altered = False
    if dims:
        img = img.resize(dims, resample=RESAMPLE)
        img_altered = True

    if image_filter == "AUTO":
        # Very simple logic for now:
        if img.format == "JPEG":
            image_filter = "DCTDecode"
        elif img.mode == "1" and hasattr(Image.core, "libtiff_support_custom_tags"):
            # The 2nd condition prevents from running in a bug sometimes,
            # cf. test_transcode_monochrome_and_libtiff_support_custom_tags()
            image_filter = "CCITTFaxDecode"
        else:
            image_filter = "FlateDecode"

    if img.mode in ("P", "PA") and image_filter != "FlateDecode":
        img = img.convert("RGBA")

    if img.mode not in ("1", "L", "LA", "RGB", "RGBA", "P", "PA", "CMYK"):
        img = img.convert("RGBA")
        img_altered = True

    if img.mode in ("P", "RGBA") and image_filter == "LZWDecode":
        img = img.convert("RGB")
    elif img.mode in ("LA") and image_filter == "LZWDecode":
        img = img.convert("L")

    w, h = img.size
    info = RasterImageInfo()

    iccp = None
    if "icc_profile" in img.info:
        if is_iccp_valid(img.info["icc_profile"], filename):
            iccp = img.info["icc_profile"]

    if img_raw_data is not None and not img_altered:
        # if we can use the original image bytes directly we do (JPEG and group4 TIFF only):
        if img.format == "JPEG" and image_filter == "DCTDecode":
            if img.mode in ("RGB", "RGBA"):
                dpn, bpc, colspace = 3, 8, "DeviceRGB"
            elif img.mode == "CMYK":
                dpn, bpc, colspace = 4, 8, "DeviceCMYK"
                jpeg_inverted = True
            elif img.mode == "L":
                dpn, bpc, colspace = 1, 8, "DeviceGray"
            else:
                raise ValueError(f"Unsupported image mode: {img.mode}")
            img_raw_data.seek(0)
            info.update(
                {
                    "data": img_raw_data.read(),
                    "w": w,
                    "h": h,
                    "cs": colspace,
                    "iccp": iccp,
                    "dpn": dpn,
                    "bpc": bpc,
                    "f": image_filter,
                    "inverted": jpeg_inverted,
                    "dp": f"/Predictor 15 /Colors {dpn} /Columns {w}",
                }
            )
            return info
        # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it
        # only contains a single strip
        if (
            img.format == "TIFF"
            and image_filter == "CCITTFaxDecode"
            and img.info["compression"] == "group4"
            and len(img.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1
            and len(img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]) == 1
        ):
            photo = img.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION]
            inverted = False
            if photo == 0:
                inverted = True
            elif photo != 1:
                raise ValueError(
                    f"unsupported photometric interpretation for g4 tiff: {photo}"
                )
            offset, length = ccitt_payload_location_from_pil(img)
            img_raw_data.seek(offset)
            ccittrawdata = img_raw_data.read(length)
            fillorder = img.tag_v2.get(TiffImagePlugin.FILLORDER)
            if fillorder is None or fillorder == 1:
                # no FillOrder or msb-to-lsb: nothing to do
                pass
            elif fillorder == 2:
                # lsb-to-msb: reverse bits of each byte
                ccittrawdata = bytearray(ccittrawdata)
                for i, n in enumerate(ccittrawdata):
                    ccittrawdata[i] = TIFFBitRevTable[n]
                ccittrawdata = bytes(ccittrawdata)
            else:
                raise ValueError(f"unsupported FillOrder: {fillorder}")
            dpn, bpc, colspace = 1, 1, "DeviceGray"
            info.update(
                {
                    "data": ccittrawdata,
                    "w": w,
                    "h": h,
                    "iccp": None,
                    "dpn": dpn,
                    "cs": colspace,
                    "bpc": bpc,
                    "f": image_filter,
                    "inverted": jpeg_inverted,
                    "dp": f"/BlackIs1 {str(not inverted).lower()} /Columns {w} /K -1 /Rows {h}",
                }
            )
            return info

    # garbage collection
    img_raw_data = None

    if img.mode == "1":
        dpn, bpc, colspace = 1, 1, "DeviceGray"
        info["data"] = _to_data(img, image_filter)
    elif img.mode == "L":
        dpn, bpc, colspace = 1, 8, "DeviceGray"
        info["data"] = _to_data(img, image_filter)
    elif img.mode == "LA":
        dpn, bpc, colspace = 1, 8, "DeviceGray"
        alpha_channel = slice(1, None, 2)
        info["data"] = _to_data(img, image_filter, remove_slice=alpha_channel)
        if _has_alpha(img, alpha_channel) and image_filter not in (
            "DCTDecode",
            "JPXDecode",
        ):
            info["smask"] = _to_data(img, image_filter, select_slice=alpha_channel)
    elif img.mode == "P":
        dpn, bpc, colspace = 1, 8, "Indexed"
        info["data"] = _to_data(img, image_filter)
        info["pal"] = img.palette.palette

        # check if the P image has transparency
        if img.info.get("transparency", None) is not None and image_filter not in (
            "DCTDecode",
            "JPXDecode",
        ):
            # convert to RGBA to get the alpha channel for creating the smask
            info["smask"] = _to_data(
                img.convert("RGBA"), image_filter, select_slice=slice(3, None, 4)
            )
    elif img.mode == "PA":
        dpn, bpc, colspace = 1, 8, "Indexed"
        info["pal"] = img.palette.palette
        alpha_channel = slice(1, None, 2)
        info["data"] = _to_data(img, image_filter, remove_slice=alpha_channel)
        if _has_alpha(img, alpha_channel) and image_filter not in (
            "DCTDecode",
            "JPXDecode",
        ):
            info["smask"] = _to_data(img, image_filter, select_slice=alpha_channel)
    elif img.mode == "CMYK":
        dpn, bpc, colspace = 4, 8, "DeviceCMYK"
        info["data"] = _to_data(img, image_filter)
    elif img.mode == "RGB":
        dpn, bpc, colspace = 3, 8, "DeviceRGB"
        info["data"] = _to_data(img, image_filter)
    else:  # RGBA image
        dpn, bpc, colspace = 3, 8, "DeviceRGB"
        alpha_channel = slice(3, None, 4)
        info["data"] = _to_data(img, image_filter, remove_slice=alpha_channel)
        if _has_alpha(img, alpha_channel) and image_filter not in (
            "DCTDecode",
            "JPXDecode",
        ):
            info["smask"] = _to_data(img, image_filter, select_slice=alpha_channel)

    dp = f"/Predictor 15 /Colors {dpn} /Columns {w}"

    if img.mode == "1":
        dp = f"/BlackIs1 true /Columns {w} /K -1 /Rows {h}"

    if not is_pil_img:
        if keep_bytes_io_open:
            img.fp = None  # cf. issue #881
        else:
            img.close()

    info.update(
        {
            "w": w,
            "h": h,
            "cs": colspace,
            "iccp": iccp,
            "bpc": bpc,
            "dpn": dpn,
            "f": image_filter,
            "inverted": jpeg_inverted,
            "dp": dp,
        }
    )
    return info

Args

filename
in a format that can be passed to load_image
img
optional bytes, BytesIO or PIL.Image.Image instance
image_filter : str
one of the SUPPORTED_IMAGE_FILTERS
def get_svg_info(filename, img, image_cache)
Expand source code Browse git
def get_svg_info(filename, img, image_cache):
    svg = SVGObject(img.getvalue(), image_cache=image_cache)
    if svg.viewbox:
        _, _, w, h = svg.viewbox
    else:
        w = h = 0.0
    if svg.width:
        w = svg.width
    if svg.height:
        h = svg.height
    info = VectorImageInfo(data=svg, w=w, h=h)
    return filename, svg, info
def is_iccp_valid(iccp, filename)
Expand source code Browse git
def is_iccp_valid(iccp, filename):
    "Checks the validity of an ICC profile"
    try:
        profile = ImageCms.getOpenProfile(BytesIO(iccp))
    except ImageCms.PyCMSError:
        LOGGER.info("Invalid ICC Profile in file %s", filename)
        return False
    color_space = profile.profile.xcolor_space.strip()
    if color_space not in ("GRAY", "RGB"):
        LOGGER.info(
            "Unsupported color space %s in ICC Profile of file %s - cf. issue #711",
            color_space,
            filename,
        )
        return False
    return True

Checks the validity of an ICC profile

def load_image(filename)
Expand source code Browse git
def load_image(filename):
    """
    This method is used to load external resources, such as images.
    It is automatically called when resource added to document by `fpdf.FPDF.image()`.
    It always return a BytesIO buffer.
    """
    # if a bytesio instance is passed in, use it as is.
    if isinstance(filename, BytesIO):
        return filename
    if isinstance(filename, Path):
        filename = str(filename)
    # by default loading from network is allowed for all images
    if filename.startswith(("http://", "https://")):
        # disabling bandit & semgrep rules as permitted schemes are whitelisted:
        # nosemgrep: python.lang.security.audit.dynamic-urllib-use-detected.dynamic-urllib-use-detected
        with urlopen(filename) as url_file:  # nosec B310
            return BytesIO(url_file.read())
    elif filename.startswith("data:"):
        return _decode_base64_image(filename)
    with open(filename, "rb") as local_file:
        return BytesIO(local_file.read())

This method is used to load external resources, such as images. It is automatically called when resource added to document by fpdf.FPDF.image(). It always return a BytesIO buffer.

def pack_codes_into_bytes(codes)
Expand source code Browse git
def pack_codes_into_bytes(codes):
    """
    Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
    The bit-width starts at 9 bits and expands as needed.

    """

    (
        _,
        next_code,
        bits_per_code,
        max_code_value,
    ) = clear_table()
    buffer = 0
    bits_in_buffer = 0
    output = bytearray()

    for code in codes:
        buffer = (buffer << bits_per_code) | code
        bits_in_buffer += bits_per_code

        while bits_in_buffer >= 8:
            bits_in_buffer -= 8
            output.append((buffer >> bits_in_buffer) & 0xFF)

        if code == LZW_CLEAR_TABLE_MARKER:
            _, next_code, bits_per_code, max_code_value = clear_table()
        elif code != LZW_EOD_MARKER:
            next_code += 1
            if next_code > max_code_value and bits_per_code < LZW_MAX_BITS_PER_CODE:
                bits_per_code += 1
                max_code_value = (1 << bits_per_code) - 1

    if bits_in_buffer > 0:
        output.append((buffer << (8 - bits_in_buffer)) & 0xFF)

    return bytes(output)

Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width. The bit-width starts at 9 bits and expands as needed.

def preload_image(image_cache: ImageCache,
name,
dims=None)
Expand source code Browse git
def preload_image(image_cache: ImageCache, name, dims=None):
    """
    Read an image and load it into memory.

    For raster images: following this call, the image is inserted in `image_cache.images`,
    and following calls to `FPDF.image()` will re-use the same cached values, without re-reading the image.

    For vector images: the data is loaded and the metadata extracted.

    Args:
        image_cache: an `ImageCache` instance, usually the `.image_cache` attribute of a `FPDF` instance.
        name: either a string representing a file path to an image, an URL to an image,
            an io.BytesIO, or a instance of `PIL.Image.Image`.
        dims (Tuple[float]): optional dimensions as a tuple (width, height) to resize the image
            (raster only) before storing it in the PDF.

    Returns: A tuple, consisting of 3 values: the name, the image data,
        and an instance of a subclass of `ImageInfo`.
    """
    # Identify and load SVG data:
    if str(name).endswith(".svg"):
        try:
            return get_svg_info(name, load_image(str(name)), image_cache=image_cache)
        except Exception as error:
            raise ValueError(f"Could not parse file: {name}") from error
    if isinstance(name, bytes) and _is_svg(name.strip()):
        return get_svg_info(name, io.BytesIO(name), image_cache=image_cache)
    if isinstance(name, io.BytesIO) and _is_svg(name.getvalue().strip()):
        return get_svg_info("vector_image", name, image_cache=image_cache)

    # Load raster data.
    if isinstance(name, str):
        img = None
    elif isinstance(name, Image.Image):
        bytes_ = name.tobytes()
        img_hash = hashlib.new("md5", usedforsecurity=False)  # nosec B324
        img_hash.update(bytes_)
        name, img = img_hash.hexdigest(), name
    elif isinstance(name, (bytes, io.BytesIO)):
        bytes_ = name.getvalue() if isinstance(name, io.BytesIO) else name
        bytes_ = bytes_.strip()
        img_hash = hashlib.new("md5", usedforsecurity=False)  # nosec B324
        img_hash.update(bytes_)
        name, img = img_hash.hexdigest(), name
    else:
        name, img = str(name), name
    info = image_cache.images.get(name)
    if info:
        info["usages"] += 1
    else:
        info = get_img_info(name, img, image_cache.image_filter, dims)
        info["i"] = len(image_cache.images) + 1
        info["usages"] = 1
        info["iccp_i"] = None
        iccp = info.get("iccp")
        if iccp:
            LOGGER.debug(
                "ICC profile found for image %s - It will be inserted in the PDF document",
                name,
            )
            if iccp in image_cache.icc_profiles:
                info["iccp_i"] = image_cache.icc_profiles[iccp]
            else:
                iccp_i = len(image_cache.icc_profiles)
                image_cache.icc_profiles[iccp] = iccp_i
                info["iccp_i"] = iccp_i
            info["iccp"] = None
        image_cache.images[name] = info
    return name, img, info

Read an image and load it into memory.

For raster images: following this call, the image is inserted in image_cache.images, and following calls to FPDF.image() will re-use the same cached values, without re-reading the image.

For vector images: the data is loaded and the metadata extracted.

Args

image_cache
an ImageCache instance, usually the .image_cache attribute of a FPDF instance.
name
either a string representing a file path to an image, an URL to an image, an io.BytesIO, or a instance of PIL.Image.Image.
dims : Tuple[float]
optional dimensions as a tuple (width, height) to resize the image (raster only) before storing it in the PDF.

Returns: A tuple, consisting of 3 values: the name, the image data, and an instance of a subclass of ImageInfo.

def transcode_monochrome(img)
Expand source code Browse git
def transcode_monochrome(img):
    """
    Convert the open PIL.Image imgdata to compressed CCITT Group4 data.

    """
    # Convert the image to Group 4 in memory. If libtiff is not installed and
    # Pillow is not compiled against it, .save() will raise an exception.
    newimgio = BytesIO()

    # we create a whole new PIL image or otherwise it might happen with some
    # input images, that libtiff fails an assert and the whole process is
    # killed by a SIGABRT:
    img2 = Image.frombytes(img.mode, img.size, img.tobytes())

    # Since version 8.3.0 Pillow limits strips to 64 KB. Since PDF only
    # supports single strip CCITT Group4 payloads, we have to coerce it back
    # into putting everything into a single strip. Thanks to Andrew Murray for
    # the hack.
    #
    # Since version 8.4.0 Pillow allows us to modify the strip size explicitly
    tmp_strip_size = (img.size[0] + 7) // 8 * img.size[1]
    if hasattr(TiffImagePlugin, "STRIP_SIZE"):
        # we are using Pillow 8.4.0 or later
        with temp_attr(TiffImagePlugin, "STRIP_SIZE", tmp_strip_size):
            img2.save(newimgio, format="TIFF", compression="group4")
    else:
        # only needed for Pillow 8.3.x but works for versions before that as
        # well
        pillow__getitem__ = TiffImagePlugin.ImageFileDirectory_v2.__getitem__

        def __getitem__(self, tag):
            overrides = {
                TiffImagePlugin.ROWSPERSTRIP: img.size[1],
                TiffImagePlugin.STRIPBYTECOUNTS: [tmp_strip_size],
                TiffImagePlugin.STRIPOFFSETS: [0],
            }
            return overrides.get(tag, pillow__getitem__(self, tag))

        with temp_attr(
            TiffImagePlugin.ImageFileDirectory_v2, "__getitem__", __getitem__
        ):
            img2.save(newimgio, format="TIFF", compression="group4")

    # Open new image in memory
    newimgio.seek(0)
    newimg = Image.open(newimgio)

    offset, length = ccitt_payload_location_from_pil(newimg)

    newimgio.seek(offset)
    return newimgio.read(length)

Convert the open PIL.Image imgdata to compressed CCITT Group4 data.

Classes

class ImageSettings (compression_level: int = -1)
Expand source code Browse git
@dataclass
class ImageSettings:
    # Passed to zlib.compress() - In range 0-9 - Default is currently equivalent to 6:
    compression_level: int = -1

ImageSettings(compression_level: int = -1)

Class variables

var compression_level : int

The type of the None singleton.

class temp_attr (obj, field, value)
Expand source code Browse git
class temp_attr:
    """
    temporary change the attribute of an object using a context manager
    """

    def __init__(self, obj, field, value):
        self.obj = obj
        self.field = field
        self.value = value

    def __enter__(self):
        self.exists = False
        if hasattr(self.obj, self.field):
            self.exists = True
            self.old_value = getattr(self.obj, self.field)
        setattr(self.obj, self.field, self.value)

    def __exit__(self, exctype, excinst, exctb):
        if self.exists:
            setattr(self.obj, self.field, self.old_value)
        else:
            delattr(self.obj, self.field)

temporary change the attribute of an object using a context manager