initial commit

This commit is contained in:
Michael Pilosov 2024-11-24 19:28:20 +00:00
parent 03856ddb9e
commit 223fd853c9
5 changed files with 164 additions and 0 deletions

3
.gitignore vendored
View File

@ -138,3 +138,6 @@ dmypy.json
# Cython debug symbols
cython_debug/
# Other
*.pdf

1
.python-version Normal file
View File

@ -0,0 +1 @@
3.11

116
pdf_split.py Normal file
View File

@ -0,0 +1,116 @@
from pypdf import PdfReader, PdfWriter
from pypdf._page import Transformation
def resize_pdf_page(page, scale_factor):
"""
Resize or rescale a single PDF page, including its content and canvas.
Args:
page (PageObject): The page to be resized.
scale_factor (float): The factor by which to scale the page.
e.g., 0.5 for 50% size, 2.0 for 200% size.
Returns:
PageObject: The resized page.
"""
# Apply scaling transformation to the page content
transformation = Transformation().scale(scale_factor, scale_factor)
page.add_transformation(transformation)
# Adjust the media box to reflect the new dimensions
media_box = page.mediabox
new_width = media_box.width * scale_factor
new_height = media_box.height * scale_factor
page.mediabox.lower_left = (media_box.lower_left[0], media_box.lower_left[1])
page.mediabox.upper_right = (new_width, new_height)
return page
def split_pdf_vertically(
input_pdf,
output_pdf_prefix,
cut_points=None,
new_width: float = 0,
single_output=True,
):
"""
Splits a PDF vertically and optionally resizes it.
Args:
input_pdf (str): Path to the input PDF file.
output_pdf_prefix (str): Prefix for output PDF files.
cut_points (list of tuples): List of (start_y, end_y) ratios for vertical splitting.
new_width (float): Desired width of the output PDF in inches. Default is no resizing.
single_output (bool): If True, outputs a single PDF with multiple pages.
If False, outputs a separate PDF for each segment.
"""
if cut_points is None:
cut_points = [(0, 1)]
reader = PdfReader(input_pdf)
scale_factor = 1.0
original_page = reader.pages[0]
# Resize the page if a new width is specified
if new_width:
scale_factor = (new_width * 72) / original_page.mediabox.width
original_page = resize_pdf_page(original_page, scale_factor)
media_box = original_page.mediabox
h = original_page.mediabox.height
# Create a single writer for combined output, if needed
combined_writer = PdfWriter() if single_output else None
if not new_width:
_nw = original_page.mediabox.width // 72
else:
_nw = new_width
for i, (start_y, end_y) in enumerate(reversed(cut_points)):
# Convert relative coordinates to absolute coordinates
start_y = int(h * start_y)
end_y = int(h * end_y)
# Clone and crop the page
writer = PdfWriter()
new_page = original_page.clone(writer)
new_page.mediabox.upper_right = (media_box.upper_right[0], start_y)
new_page.mediabox.lower_left = (media_box.lower_left[0], end_y)
if single_output:
combined_writer.add_page(new_page)
else:
writer.add_page(new_page)
with open(
f"{output_pdf_prefix}W{_nw:03d}_{start_y}-{end_y}.pdf", "wb"
) as f:
writer.write(f)
# Write the single output file, if applicable
if single_output:
_ct_pts = set(sum([[*k] for k in cut_points], []))
ct = "-".join(sorted(list(map(lambda c: str(int(c * h)), _ct_pts))))
with open(f"{output_pdf_prefix}W{_nw:03d}_{ct}_combined.pdf", "wb") as f:
combined_writer.write(f)
if __name__ == "__main__":
# Usage:
input_pdf = "sample.pdf"
output_pdf_prefix = "split"
# Specify (start_y, end_y) for each segment
cut_points = [(0, 0.0949358), (0.0949358, 0.2)]
split_pdf_vertically(
input_pdf, output_pdf_prefix, cut_points, new_width=None, single_output=False
)
split_pdf_vertically(
input_pdf, "resize", cut_points=None, new_width=32, single_output=True
)
# split_pdf_vertically(
# input_pdf,
# output_pdf_prefix,
# cut_points=[(0, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1)],
# new_width=32,
# single_output=False,
# )

10
pyproject.toml Normal file
View File

@ -0,0 +1,10 @@
[project]
name = "pdf-utils"
version = "0.0.1"
description = "Slim PDF Utilities"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"pypdf>=5.1.0",
]

34
uv.lock generated Normal file
View File

@ -0,0 +1,34 @@
version = 1
requires-python = ">=3.10"
[[package]]
name = "pdf-utils"
version = "0.0.1"
source = { virtual = "." }
dependencies = [
{ name = "pypdf" },
]
[package.metadata]
requires-dist = [{ name = "pypdf", specifier = ">=5.1.0" }]
[[package]]
name = "pypdf"
version = "5.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/6b/9a/72d74f05f64895ebf1c7f6646cf7fe6dd124398c5c49240093f92d6f0fdd/pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740", size = 5011381 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976 },
]
[[package]]
name = "typing-extensions"
version = "4.12.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
]