Source code for pdfje.document

from __future__ import annotations

import os
from dataclasses import dataclass
from itertools import count, islice
from pathlib import Path
from typing import IO, Iterable, Iterator, final, overload

from . import atoms
from .atoms import OBJ_ID_PAGETREE, OBJ_ID_RESOURCES
from .common import add_slots, flatten, setattr_frozen
from .layout import Block, Paragraph
from .layout.pages import AutoPage
from .page import Page
from .resources import Resources
from .style import Style, StyleFull, StyleLike

_OBJ_ID_FIRST_PAGE: atoms.ObjectID = OBJ_ID_RESOURCES + 1
_OBJS_PER_PAGE = 2



[docs]
@final
@add_slots
@dataclass(frozen=True, init=False)
class Document:
    """a PDF Document

    Parameters
    ----------

    content
        The content of the document.
    style
        Change the default style of the document.

    Examples
    --------

    Below are some examples of creating documents.

    >>> Document()  # the minimal PDF -- one empty page
    >>> Document("hello world")  # a document with pages of text
    >>> Document([  # document with explicit pages
    ...     Page(...),
    ...     AutoPage([LOREM_IPSUM, ZEN_OF_PYTHON]),
    ...     Page(),
    ... ])


    note
    ----
    A document must contain at least one page to be valid
    """

    pages: Iterable[Page | AutoPage]
    style: Style

    def __init__(
        self,
        content: Iterable[Page | AutoPage] | str | Block | None = None,
        style: StyleLike = Style.EMPTY,
    ) -> None:
        if content is None:
            content = [Page()]
        elif isinstance(content, str):
            content = [AutoPage([Paragraph(content)])]
        elif isinstance(content, Block):
            content = [AutoPage([content])]

        setattr_frozen(self, "pages", content)
        setattr_frozen(self, "style", Style.parse(style))

    @overload
    def write(self) -> Iterator[bytes]: ...

    @overload
    def write(self, target: os.PathLike[str] | str | IO[bytes]) -> None: ...


[docs]
    def write(  # type: ignore[return]
        self, target: os.PathLike[str] | str | IO[bytes] | None = None
    ) -> Iterator[bytes] | None:
        """Write the document to a given target. If no target is given,
        outputs the binary PDF content iteratively. See examples below.

        Parameters
        ----------
        target: ~os.PathLike | str | ~typing.IO[bytes] | None
            The target to write to. If not given, the PDF content is returned
            as an iterator.

        Returns
        -------
        ~typing.Iterator[bytes] | None

        Examples
        --------

        String, :class:`~pathlib.Path`, or :class:`~os.PathLike` target:

        >>> doc.write("myfolder/foo.pdf")
        >>> doc.write(Path.home() / "documents/foo.pdf")

        Files and file-like objects:

        >>> with open("my/file.pdf", 'wb') as f:
        ...     doc.write(f)
        >>> doc.write(b:= BytesIO())

        Iterator output is useful for streaming PDF contents. Below is
        an example of an HTTP request using the ``httpx`` library.

        >>> httpx.post("https://mysite.foo/upload", content=doc.write(),
        ...            headers={"Content-Type": "application/pdf"})
        """
        if target is None:
            return self._write_iter()
        elif isinstance(target, (str, os.PathLike)):
            self._write_to_path(Path(os.fspath(target)))
        else:  # i.e. IO[bytes]
            target.writelines(self._write_iter())


    def _write_iter(self) -> Iterator[bytes]:
        return atoms.write(_doc_objects(self.pages, self.style.setdefault()))

    def _write_to_path(self, p: Path) -> None:
        with p.open("wb") as wfile:
            wfile.writelines(self.write())



def _doc_objects(
    items: Iterable[Page | AutoPage], style: StyleFull
) -> Iterator[atoms.Object]:
    res = Resources()
    obj_id = pagenum = 0
    # FUTURE: the scoping of `pagenum` is a bit tricky here. Find a better
    #         way to do this -- or add a specific test.
    for pagenum, obj_id, page in zip(
        count(1),
        count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE),
        flatten(p.render(res, style, pagenum + 1) for p in items),
    ):
        yield from page.to_atoms(obj_id)

    if not pagenum:
        raise RuntimeError(
            "Cannot write PDF document without at least one page"
        )
    first_font_id = obj_id + _OBJS_PER_PAGE

    yield from res.to_objects(first_font_id)
    yield from _write_headers(
        (obj_id - _OBJ_ID_FIRST_PAGE) // _OBJS_PER_PAGE + 1,
        res.to_atoms(first_font_id),
    )


_CATALOG_OBJ = (
    atoms.OBJ_ID_CATALOG,
    atoms.Dictionary(
        (b"Type", atoms.Name(b"Catalog")),
        (b"Pages", atoms.Ref(OBJ_ID_PAGETREE)),
    ),
)


def _write_headers(
    num_pages: int, resources: atoms.Dictionary
) -> Iterable[atoms.Object]:
    yield _CATALOG_OBJ
    yield (
        OBJ_ID_PAGETREE,
        atoms.Dictionary(
            (b"Type", atoms.Name(b"Pages")),
            (
                b"Kids",
                atoms.Array(
                    map(
                        atoms.Ref,
                        islice(
                            count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE),
                            num_pages,
                        ),
                    )
                ),
            ),
            (b"Count", atoms.Int(num_pages)),
        ),
    )
    yield OBJ_ID_RESOURCES, resources