initial commit
This commit is contained in:
parent
03856ddb9e
commit
223fd853c9
3
.gitignore
vendored
3
.gitignore
vendored
@ -138,3 +138,6 @@ dmypy.json
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
|
||||
# Other
|
||||
*.pdf
|
||||
|
1
.python-version
Normal file
1
.python-version
Normal file
@ -0,0 +1 @@
|
||||
3.11
|
116
pdf_split.py
Normal file
116
pdf_split.py
Normal file
@ -0,0 +1,116 @@
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
from pypdf._page import Transformation
|
||||
|
||||
|
||||
def resize_pdf_page(page, scale_factor):
|
||||
"""
|
||||
Resize or rescale a single PDF page, including its content and canvas.
|
||||
|
||||
Args:
|
||||
page (PageObject): The page to be resized.
|
||||
scale_factor (float): The factor by which to scale the page.
|
||||
e.g., 0.5 for 50% size, 2.0 for 200% size.
|
||||
|
||||
Returns:
|
||||
PageObject: The resized page.
|
||||
"""
|
||||
# Apply scaling transformation to the page content
|
||||
transformation = Transformation().scale(scale_factor, scale_factor)
|
||||
page.add_transformation(transformation)
|
||||
|
||||
# Adjust the media box to reflect the new dimensions
|
||||
media_box = page.mediabox
|
||||
new_width = media_box.width * scale_factor
|
||||
new_height = media_box.height * scale_factor
|
||||
page.mediabox.lower_left = (media_box.lower_left[0], media_box.lower_left[1])
|
||||
page.mediabox.upper_right = (new_width, new_height)
|
||||
|
||||
return page
|
||||
|
||||
|
||||
def split_pdf_vertically(
|
||||
input_pdf,
|
||||
output_pdf_prefix,
|
||||
cut_points=None,
|
||||
new_width: float = 0,
|
||||
single_output=True,
|
||||
):
|
||||
"""
|
||||
Splits a PDF vertically and optionally resizes it.
|
||||
|
||||
Args:
|
||||
input_pdf (str): Path to the input PDF file.
|
||||
output_pdf_prefix (str): Prefix for output PDF files.
|
||||
cut_points (list of tuples): List of (start_y, end_y) ratios for vertical splitting.
|
||||
new_width (float): Desired width of the output PDF in inches. Default is no resizing.
|
||||
single_output (bool): If True, outputs a single PDF with multiple pages.
|
||||
If False, outputs a separate PDF for each segment.
|
||||
"""
|
||||
if cut_points is None:
|
||||
cut_points = [(0, 1)]
|
||||
|
||||
reader = PdfReader(input_pdf)
|
||||
scale_factor = 1.0
|
||||
original_page = reader.pages[0]
|
||||
|
||||
# Resize the page if a new width is specified
|
||||
if new_width:
|
||||
scale_factor = (new_width * 72) / original_page.mediabox.width
|
||||
original_page = resize_pdf_page(original_page, scale_factor)
|
||||
|
||||
media_box = original_page.mediabox
|
||||
h = original_page.mediabox.height
|
||||
|
||||
# Create a single writer for combined output, if needed
|
||||
combined_writer = PdfWriter() if single_output else None
|
||||
if not new_width:
|
||||
_nw = original_page.mediabox.width // 72
|
||||
else:
|
||||
_nw = new_width
|
||||
for i, (start_y, end_y) in enumerate(reversed(cut_points)):
|
||||
# Convert relative coordinates to absolute coordinates
|
||||
start_y = int(h * start_y)
|
||||
end_y = int(h * end_y)
|
||||
|
||||
# Clone and crop the page
|
||||
writer = PdfWriter()
|
||||
new_page = original_page.clone(writer)
|
||||
new_page.mediabox.upper_right = (media_box.upper_right[0], start_y)
|
||||
new_page.mediabox.lower_left = (media_box.lower_left[0], end_y)
|
||||
if single_output:
|
||||
combined_writer.add_page(new_page)
|
||||
else:
|
||||
writer.add_page(new_page)
|
||||
with open(
|
||||
f"{output_pdf_prefix}W{_nw:03d}_{start_y}-{end_y}.pdf", "wb"
|
||||
) as f:
|
||||
writer.write(f)
|
||||
|
||||
# Write the single output file, if applicable
|
||||
if single_output:
|
||||
_ct_pts = set(sum([[*k] for k in cut_points], []))
|
||||
ct = "-".join(sorted(list(map(lambda c: str(int(c * h)), _ct_pts))))
|
||||
|
||||
with open(f"{output_pdf_prefix}W{_nw:03d}_{ct}_combined.pdf", "wb") as f:
|
||||
combined_writer.write(f)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Usage:
|
||||
input_pdf = "sample.pdf"
|
||||
output_pdf_prefix = "split"
|
||||
# Specify (start_y, end_y) for each segment
|
||||
cut_points = [(0, 0.0949358), (0.0949358, 0.2)]
|
||||
split_pdf_vertically(
|
||||
input_pdf, output_pdf_prefix, cut_points, new_width=None, single_output=False
|
||||
)
|
||||
split_pdf_vertically(
|
||||
input_pdf, "resize", cut_points=None, new_width=32, single_output=True
|
||||
)
|
||||
# split_pdf_vertically(
|
||||
# input_pdf,
|
||||
# output_pdf_prefix,
|
||||
# cut_points=[(0, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1)],
|
||||
# new_width=32,
|
||||
# single_output=False,
|
||||
# )
|
10
pyproject.toml
Normal file
10
pyproject.toml
Normal file
@ -0,0 +1,10 @@
|
||||
[project]
|
||||
name = "pdf-utils"
|
||||
version = "0.0.1"
|
||||
description = "Slim PDF Utilities"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"pypdf>=5.1.0",
|
||||
]
|
||||
|
34
uv.lock
generated
Normal file
34
uv.lock
generated
Normal file
@ -0,0 +1,34 @@
|
||||
version = 1
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[[package]]
|
||||
name = "pdf-utils"
|
||||
version = "0.0.1"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "pypdf" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "pypdf", specifier = ">=5.1.0" }]
|
||||
|
||||
[[package]]
|
||||
name = "pypdf"
|
||||
version = "5.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6b/9a/72d74f05f64895ebf1c7f6646cf7fe6dd124398c5c49240093f92d6f0fdd/pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740", size = 5011381 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.12.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
|
||||
]
|
Loading…
Reference in New Issue
Block a user