initial commit
This commit is contained in:
		
							parent
							
								
									03856ddb9e
								
							
						
					
					
						commit
						223fd853c9
					
				
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -138,3 +138,6 @@ dmypy.json
 | 
			
		||||
# Cython debug symbols
 | 
			
		||||
cython_debug/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Other
 | 
			
		||||
*.pdf
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.python-version
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
3.11
 | 
			
		||||
							
								
								
									
										116
									
								
								pdf_split.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								pdf_split.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,116 @@
 | 
			
		||||
from pypdf import PdfReader, PdfWriter
 | 
			
		||||
from pypdf._page import Transformation
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def resize_pdf_page(page, scale_factor):
 | 
			
		||||
    """
 | 
			
		||||
    Resize or rescale a single PDF page, including its content and canvas.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        page (PageObject): The page to be resized.
 | 
			
		||||
        scale_factor (float): The factor by which to scale the page.
 | 
			
		||||
                              e.g., 0.5 for 50% size, 2.0 for 200% size.
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        PageObject: The resized page.
 | 
			
		||||
    """
 | 
			
		||||
    # Apply scaling transformation to the page content
 | 
			
		||||
    transformation = Transformation().scale(scale_factor, scale_factor)
 | 
			
		||||
    page.add_transformation(transformation)
 | 
			
		||||
 | 
			
		||||
    # Adjust the media box to reflect the new dimensions
 | 
			
		||||
    media_box = page.mediabox
 | 
			
		||||
    new_width = media_box.width * scale_factor
 | 
			
		||||
    new_height = media_box.height * scale_factor
 | 
			
		||||
    page.mediabox.lower_left = (media_box.lower_left[0], media_box.lower_left[1])
 | 
			
		||||
    page.mediabox.upper_right = (new_width, new_height)
 | 
			
		||||
 | 
			
		||||
    return page
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def split_pdf_vertically(
 | 
			
		||||
    input_pdf,
 | 
			
		||||
    output_pdf_prefix,
 | 
			
		||||
    cut_points=None,
 | 
			
		||||
    new_width: float = 0,
 | 
			
		||||
    single_output=True,
 | 
			
		||||
):
 | 
			
		||||
    """
 | 
			
		||||
    Splits a PDF vertically and optionally resizes it.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        input_pdf (str): Path to the input PDF file.
 | 
			
		||||
        output_pdf_prefix (str): Prefix for output PDF files.
 | 
			
		||||
        cut_points (list of tuples): List of (start_y, end_y) ratios for vertical splitting.
 | 
			
		||||
        new_width (float): Desired width of the output PDF in inches. Default is no resizing.
 | 
			
		||||
        single_output (bool): If True, outputs a single PDF with multiple pages.
 | 
			
		||||
                              If False, outputs a separate PDF for each segment.
 | 
			
		||||
    """
 | 
			
		||||
    if cut_points is None:
 | 
			
		||||
        cut_points = [(0, 1)]
 | 
			
		||||
 | 
			
		||||
    reader = PdfReader(input_pdf)
 | 
			
		||||
    scale_factor = 1.0
 | 
			
		||||
    original_page = reader.pages[0]
 | 
			
		||||
 | 
			
		||||
    # Resize the page if a new width is specified
 | 
			
		||||
    if new_width:
 | 
			
		||||
        scale_factor = (new_width * 72) / original_page.mediabox.width
 | 
			
		||||
        original_page = resize_pdf_page(original_page, scale_factor)
 | 
			
		||||
 | 
			
		||||
    media_box = original_page.mediabox
 | 
			
		||||
    h = original_page.mediabox.height
 | 
			
		||||
 | 
			
		||||
    # Create a single writer for combined output, if needed
 | 
			
		||||
    combined_writer = PdfWriter() if single_output else None
 | 
			
		||||
    if not new_width:
 | 
			
		||||
        _nw = original_page.mediabox.width // 72
 | 
			
		||||
    else:
 | 
			
		||||
        _nw = new_width
 | 
			
		||||
    for i, (start_y, end_y) in enumerate(reversed(cut_points)):
 | 
			
		||||
        # Convert relative coordinates to absolute coordinates
 | 
			
		||||
        start_y = int(h * start_y)
 | 
			
		||||
        end_y = int(h * end_y)
 | 
			
		||||
 | 
			
		||||
        # Clone and crop the page
 | 
			
		||||
        writer = PdfWriter()
 | 
			
		||||
        new_page = original_page.clone(writer)
 | 
			
		||||
        new_page.mediabox.upper_right = (media_box.upper_right[0], start_y)
 | 
			
		||||
        new_page.mediabox.lower_left = (media_box.lower_left[0], end_y)
 | 
			
		||||
        if single_output:
 | 
			
		||||
            combined_writer.add_page(new_page)
 | 
			
		||||
        else:
 | 
			
		||||
            writer.add_page(new_page)
 | 
			
		||||
            with open(
 | 
			
		||||
                f"{output_pdf_prefix}W{_nw:03d}_{start_y}-{end_y}.pdf", "wb"
 | 
			
		||||
            ) as f:
 | 
			
		||||
                writer.write(f)
 | 
			
		||||
 | 
			
		||||
    # Write the single output file, if applicable
 | 
			
		||||
    if single_output:
 | 
			
		||||
        _ct_pts = set(sum([[*k] for k in cut_points], []))
 | 
			
		||||
        ct = "-".join(sorted(list(map(lambda c: str(int(c * h)), _ct_pts))))
 | 
			
		||||
 | 
			
		||||
        with open(f"{output_pdf_prefix}W{_nw:03d}_{ct}_combined.pdf", "wb") as f:
 | 
			
		||||
            combined_writer.write(f)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    # Usage:
 | 
			
		||||
    input_pdf = "sample.pdf"
 | 
			
		||||
    output_pdf_prefix = "split"
 | 
			
		||||
    # Specify (start_y, end_y) for each segment
 | 
			
		||||
    cut_points = [(0, 0.0949358), (0.0949358, 0.2)]
 | 
			
		||||
    split_pdf_vertically(
 | 
			
		||||
        input_pdf, output_pdf_prefix, cut_points, new_width=None, single_output=False
 | 
			
		||||
    )
 | 
			
		||||
    split_pdf_vertically(
 | 
			
		||||
        input_pdf, "resize", cut_points=None, new_width=32, single_output=True
 | 
			
		||||
    )
 | 
			
		||||
    # split_pdf_vertically(
 | 
			
		||||
    #     input_pdf,
 | 
			
		||||
    #     output_pdf_prefix,
 | 
			
		||||
    #     cut_points=[(0, 0.25), (0.25, 0.5), (0.5, 0.75), (0.75, 1)],
 | 
			
		||||
    #     new_width=32,
 | 
			
		||||
    #     single_output=False,
 | 
			
		||||
    # )
 | 
			
		||||
							
								
								
									
										10
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,10 @@
 | 
			
		||||
[project]
 | 
			
		||||
name = "pdf-utils"
 | 
			
		||||
version = "0.0.1"
 | 
			
		||||
description = "Slim PDF Utilities"
 | 
			
		||||
readme = "README.md"
 | 
			
		||||
requires-python = ">=3.10"
 | 
			
		||||
dependencies = [
 | 
			
		||||
    "pypdf>=5.1.0",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										34
									
								
								uv.lock
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								uv.lock
									
									
									
										generated
									
									
									
										Normal file
									
								
							@ -0,0 +1,34 @@
 | 
			
		||||
version = 1
 | 
			
		||||
requires-python = ">=3.10"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "pdf-utils"
 | 
			
		||||
version = "0.0.1"
 | 
			
		||||
source = { virtual = "." }
 | 
			
		||||
dependencies = [
 | 
			
		||||
    { name = "pypdf" },
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[package.metadata]
 | 
			
		||||
requires-dist = [{ name = "pypdf", specifier = ">=5.1.0" }]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "pypdf"
 | 
			
		||||
version = "5.1.0"
 | 
			
		||||
source = { registry = "https://pypi.org/simple" }
 | 
			
		||||
dependencies = [
 | 
			
		||||
    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 | 
			
		||||
]
 | 
			
		||||
sdist = { url = "https://files.pythonhosted.org/packages/6b/9a/72d74f05f64895ebf1c7f6646cf7fe6dd124398c5c49240093f92d6f0fdd/pypdf-5.1.0.tar.gz", hash = "sha256:425a129abb1614183fd1aca6982f650b47f8026867c0ce7c4b9f281c443d2740", size = 5011381 }
 | 
			
		||||
wheels = [
 | 
			
		||||
    { url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976 },
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "typing-extensions"
 | 
			
		||||
version = "4.12.2"
 | 
			
		||||
source = { registry = "https://pypi.org/simple" }
 | 
			
		||||
sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 }
 | 
			
		||||
wheels = [
 | 
			
		||||
    { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
 | 
			
		||||
]
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user