Source code for pdfje.document

from __future__ import annotations

import os
from dataclasses import dataclass
from itertools import count, islice
from pathlib import Path
from typing import IO, Iterable, Iterator, final, overload

from . import atoms
from .atoms import OBJ_ID_PAGETREE, OBJ_ID_RESOURCES
from .common import add_slots, flatten, setattr_frozen
from .layout import Block, Paragraph
from .layout.pages import AutoPage
from .page import Page
from .resources import Resources
from .style import Style, StyleFull, StyleLike

_OBJ_ID_FIRST_PAGE: atoms.ObjectID = OBJ_ID_RESOURCES + 1
_OBJS_PER_PAGE = 2


[docs] @final @add_slots @dataclass(frozen=True, init=False) class Document: """a PDF Document Parameters ---------- content The content of the document. style Change the default style of the document. Examples -------- Below are some examples of creating documents. >>> Document() # the minimal PDF -- one empty page >>> Document("hello world") # a document with pages of text >>> Document([ # document with explicit pages ... Page(...), ... AutoPage([LOREM_IPSUM, ZEN_OF_PYTHON]), ... Page(), ... ]) note ---- A document must contain at least one page to be valid """ pages: Iterable[Page | AutoPage] style: Style def __init__( self, content: Iterable[Page | AutoPage] | str | Block | None = None, style: StyleLike = Style.EMPTY, ) -> None: if content is None: content = [Page()] elif isinstance(content, str): content = [AutoPage([Paragraph(content)])] elif isinstance(content, Block): content = [AutoPage([content])] setattr_frozen(self, "pages", content) setattr_frozen(self, "style", Style.parse(style)) @overload def write(self) -> Iterator[bytes]: ... @overload def write(self, target: os.PathLike[str] | str | IO[bytes]) -> None: ...
[docs] def write( # type: ignore[return] self, target: os.PathLike[str] | str | IO[bytes] | None = None ) -> Iterator[bytes] | None: """Write the document to a given target. If no target is given, outputs the binary PDF content iteratively. See examples below. Parameters ---------- target: ~os.PathLike | str | ~typing.IO[bytes] | None The target to write to. If not given, the PDF content is returned as an iterator. Returns ------- ~typing.Iterator[bytes] | None Examples -------- String, :class:`~pathlib.Path`, or :class:`~os.PathLike` target: >>> doc.write("myfolder/foo.pdf") >>> doc.write(Path.home() / "documents/foo.pdf") Files and file-like objects: >>> with open("my/file.pdf", 'wb') as f: ... doc.write(f) >>> doc.write(b:= BytesIO()) Iterator output is useful for streaming PDF contents. Below is an example of an HTTP request using the ``httpx`` library. >>> httpx.post("https://mysite.foo/upload", content=doc.write(), ... headers={"Content-Type": "application/pdf"}) """ if target is None: return self._write_iter() elif isinstance(target, (str, os.PathLike)): self._write_to_path(Path(os.fspath(target))) else: # i.e. IO[bytes] target.writelines(self._write_iter())
def _write_iter(self) -> Iterator[bytes]: return atoms.write(_doc_objects(self.pages, self.style.setdefault())) def _write_to_path(self, p: Path) -> None: with p.open("wb") as wfile: wfile.writelines(self.write())
def _doc_objects( items: Iterable[Page | AutoPage], style: StyleFull ) -> Iterator[atoms.Object]: res = Resources() obj_id = pagenum = 0 # FUTURE: the scoping of `pagenum` is a bit tricky here. Find a better # way to do this -- or add a specific test. for pagenum, obj_id, page in zip( count(1), count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE), flatten(p.render(res, style, pagenum + 1) for p in items), ): yield from page.to_atoms(obj_id) if not pagenum: raise RuntimeError( "Cannot write PDF document without at least one page" ) first_font_id = obj_id + _OBJS_PER_PAGE yield from res.to_objects(first_font_id) yield from _write_headers( (obj_id - _OBJ_ID_FIRST_PAGE) // _OBJS_PER_PAGE + 1, res.to_atoms(first_font_id), ) _CATALOG_OBJ = ( atoms.OBJ_ID_CATALOG, atoms.Dictionary( (b"Type", atoms.Name(b"Catalog")), (b"Pages", atoms.Ref(OBJ_ID_PAGETREE)), ), ) def _write_headers( num_pages: int, resources: atoms.Dictionary ) -> Iterable[atoms.Object]: yield _CATALOG_OBJ yield ( OBJ_ID_PAGETREE, atoms.Dictionary( (b"Type", atoms.Name(b"Pages")), ( b"Kids", atoms.Array( map( atoms.Ref, islice( count(_OBJ_ID_FIRST_PAGE, step=_OBJS_PER_PAGE), num_pages, ), ) ), ), (b"Count", atoms.Int(num_pages)), ), ) yield OBJ_ID_RESOURCES, resources