initial commit
This commit is contained in:
parent
03856ddb9e
commit
223fd853c9
3
.gitignore
vendored
3
.gitignore
vendored
@ -138,3 +138,6 @@ dmypy.json
|
|||||||
# Cython debug symbols
|
# Cython debug symbols
|
||||||
cython_debug/
|
cython_debug/
|
||||||
|
|
||||||
|
|
||||||
|
# Other
|
||||||
|
*.pdf
|
||||||
|
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
|||||||
|
3.11
|
116
pdf_split.py
Normal file
116
pdf_split.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
from pypdf import PdfReader, PdfWriter
|
||||||
|
from pypdf._page import Transformation
|
||||||
|
|
||||||
|
|
||||||
|
def resize_pdf_page(page, scale_factor):
|
||||||
|
"""
|
||||||
|
Resize or rescale a single PDF page, including its content and canvas.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page (PageObject): The page to be resized.
|
||||||
|
scale_factor (float): The factor by which to scale the page.
|
||||||
|
e.g., 0.5 for 50% size, 2.0 for 200% size.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PageObject: The resized page.
|
||||||
|
"""
|
||||||
|
# Apply scaling transformation to the page content
|
||||||
|
transformation = Transformation().scale(scale_factor, scale_factor)
|
||||||
|
page.add_transformation(transformation)
|
||||||
|
|
||||||
|
# Adjust the media box to reflect the new dimensions
|
||||||
|
media_box = page.mediabox
|
||||||
|
new_width = media_box.width * scale_factor
|
||||||
|
new_height = media_box.height * scale_factor
|
||||||
|
page.mediabox.lower_left = (media_box.lower_left[0], media_box.lower_left[1])
|
||||||
|
page.mediabox.upper_right = (new_width, new_height)
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
def split_pdf_vertically(
|
||||||
|
input_pdf,
|
||||||
|
output_pdf_prefix,
|
||||||
|
cut_points=None,
|
||||||
|
new_width: float = 0,
|
||||||
|
single_output=True,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Splits a PDF vertically and optionally resizes it.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_pdf (str): Path to the input PDF file.
|
||||||
|
output_pdf_prefix (str): Prefix for output PDF files.
|
||||||
|
cut_points (list of tuples): List of (start_y, end_y) ratios for vertical splitting.
|
||||||
|
new_width (float): Desired width of the output PDF in inches. Default is no resizing.
|
||||||
|
single_output (bool): If True, outputs a single PDF with multiple pages.
|
||||||
|
If False, outputs a separate PDF for each segment.
|
||||||
|
"""
|
||||||
|
if cut_points is None:
|
||||||
|
cut_points = [(0, 1)]
|
||||||
|
|
||||||
|
reader = PdfReader(input_pdf)
|
||||||
|
scale_factor = 1.0
|
||||||
|
original_page = reader.pages[0]
|
||||||
|
|
||||||
|
# Resize the page if a new width is specified
|
||||||
|
if new_width:
|
||||||
|
scale_factor = (new_width * 72) / original_page.mediabox.width
|
||||||
|
original_page = resize_pdf_page(original_page, scale_factor)
|
||||||
|
|
||||||
|
media_box = original_page.mediabox
|
||||||
|
h = original_page.mediabox.height
|
||||||
|
|
||||||
|
# Create a single writer for combined output, if needed
|
||||||
|
combined_writer = PdfWriter() if single_output else None
|
||||||
|
if not new_width:
|
||||||
|
_nw = original_page.mediabox.width // 72
|
||||||
|
else:
|
||||||
|
_nw = new_width
|
||||||
|
for i, (start_y, end_y) in enumerate(reversed(cut_points)):
|
||||||
|
# Convert relative coordinates to absolute coordinates
|
||||||
|
start_y = int(h * start_y)
|
||||||
|
end_y = int(h * end_y)
|
||||||
|
|
||||||
|
# Clone and crop the page
|
||||||
|
writer = PdfWriter()
|
||||||
|
new_page = original_page.clone(writer)
|
||||||
|
new_page.mediabox.upper_right = (media_box.upper_right[0], start_y)
|
||||||
|
new_page.mediabox.lower_left = (media_box.lower_left[0], end_y)
|
||||||
|
if single_output:
|
||||||
|
combined_writer.add_page(new_page)
|
||||||
|
else:
|
||||||
|
writer.add_page(new_page)
|
||||||
|
with open(
|
||||||
|
f"{output_pdf_prefix}W{_nw:03d}_{start_y}-{end_y}.pdf", "wb"
|
||||||
|
) as f:
|
||||||
|
writer.write(f)
|
||||||
|
|
||||||
|
# Write the single output file, if applicable
|
||||||
|
if single_output:
|
||||||
|
_ct_pts = set(sum([[*k] for k in cut_points], []))
|
||||||
|
ct = "-".join(sorted(list(map(lambda c: str(int(c * h)), _ct_pts))))
|
||||||
|
|
||||||
|
with open(f"{output_pdf_prefix}W{_nw:03d}_{ct}_combined.pdf", "wb") as f:
|
||||||
|
combined_writer.write(f)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Usage:
|
||||||
|
input_pdf = "sample.pdf"
|
||||||
|
output_pdf_prefix = "split"
|
||||||
|
# Specify (start_y, end_y) for each segment
|
||||||
|
cut_points = [(0, 0.0949358), (0.0949358, 0.2)]
|
||||||
|
split_pdf_vertically(
|
||||||
|
input_pdf, output_pdf_prefix, cut_points, new_width=None, single_output=False
|
||||||
|
)
|
||||||
|
split_pdf_vertically(
|
||||||
|
input_pdf, "resize", cut_points=None, new_width=32, single_output=True
|
||||||
|
)
|
||||||
|
# split_pdf_vertically(
|
||||||
|
# input_pdf,
|
||||||
|
# output_pdf_prefix,
|
||||||
|
# cut_points=[(0, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1)],
|
||||||
|
# new_width=32,
|
||||||
|
# single_output=False,
|
||||||
|
# )
|
10
pyproject.toml
Normal file
10
pyproject.toml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
[project]
|
||||||
|
name = "pdf-utils"
|
||||||
|
version = "0.0.1"
|
||||||
|
description = "Slim PDF Utilities"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
dependencies = [
|
||||||
|
"pypdf>=5.1.0",
|
||||||
|
]
|
||||||
|
|
34
uv.lock
generated
Normal file
34
uv.lock
generated
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
version = 1
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdf-utils"
|
||||||
|
version = "0.0.1"
|
||||||
|
source = { virtual = "." }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "pypdf" },
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.metadata]
|
||||||
|
requires-dist = [{ name = "pypdf", specifier = ">=5.1.0" }]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pypdf"
|
||||||
|
version = "5.1.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/6b/9a/72d74f05f64895ebf1c7f6646cf7fe6dd124398c5c49240093f92d6f0fdd/pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740", size = 5011381 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976 },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typing-extensions"
|
||||||
|
version = "4.12.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
|
||||||
|
]
|
Loading…
Reference in New Issue
Block a user