반응형
1. pdf를 ppt로 바꾸기
pdf 내용에 몇 가지 첨가하거나 수정하고 싶을 때
사실 텍스트까지 읽어주면 좋지만
아직 오류가 많이 나서
이미지만 복사하고
이미지 복사한 슬라이드 위에 텍스트 박스나 도형을 올리는 방식으로 수정하기로 결정
2.pptx 패키지 활용
pdf를 넣어서 pptx (이미지만 올린 간단한 ppt)를 만드는 코드
설치해야할 패키지들
pip install pytesseract python-pptx pillow
pip install pdf2image tqdm PyPDF2
sudo apt update
sudo apt install poppler-utils
pip install pytesseract python-pptx pillow
pip install pdf2image tqdm PyPDF2
sudo apt update
sudo apt install poppler-utils
위의 패키지들을 실행하는 환경에 설치한다
from pdf2image import convert_from_path
from pptx import Presentation
from pptx.util import Inches
from tqdm import tqdm
from PIL import Image
from PyPDF2 import PdfReader, PdfWriter
import os
import sys
# --- Configuration ---
original_pdf = 'original.pdf' # Input # DIY
image_dir = 'pdf_images' # Generated images
dpi = 300
CHUNK_SIZE = 10 # Pages per chunk
output_file = 'output.pptx' # Final Output
# how-to-use
# /home/user/miniconda3/envs/pptpdf/bin/python main.py
# --- Utilities ---
def trim_pdf(input_path, output_path, start_page, end_page):
reader = PdfReader(input_path)
writer = PdfWriter()
total_pages = len(reader.pages)
for i in range(start_page, min(end_page, total_pages)):
writer.add_page(reader.pages[i])
with open(output_path, 'wb') as out_file:
writer.write(out_file)
print(f"Created temporary PDF with pages {start_page + 1} to {min(end_page, total_pages)}")
# --- Check original PDF ---
if not os.path.exists(original_pdf):
print(f"{original_pdf} not found in the current directory.")
sys.exit(1)
# Create image directory
os.makedirs(image_dir, exist_ok=True)
# --- Get total pages ---
pdf_reader = PdfReader(original_pdf)
total_pages = len(pdf_reader.pages)
print(f"Total pages in PDF: {total_pages}")
# --- Split and convert chunks ---
print("Converting PDF to images in chunks...")
current_page = 0
while current_page < total_pages:
end_page = current_page + CHUNK_SIZE
temp_pdf = f'temp_{current_page}_{end_page}.pdf'
# 1. Create temporary PDF chunk
trim_pdf(original_pdf, temp_pdf, current_page, end_page)
# 2. Convert chunk to images
images = convert_from_path(temp_pdf, dpi=dpi)
# 3. Save images with global index
for i, img in enumerate(images):
page_num = current_page + i
img_path = os.path.join(image_dir, f'page_{page_num}.jpg')
img.save(img_path, 'JPEG')
# Remove temp file
os.remove(temp_pdf)
current_page += CHUNK_SIZE
print("All images saved.")
# --- Create 16:9 PowerPoint ---
print("Generating PowerPoint...")
prs = Presentation()
prs.slide_width = Inches(13.33)
prs.slide_height = Inches(7.5)
# Sorted image files
image_files = sorted(
[f for f in os.listdir(image_dir) if f.endswith('.jpg')],
key=lambda x: int(x.split('_')[1].split('.')[0])
)
for image_file in tqdm(image_files, desc="Adding slides", unit="slide"):
slide = prs.slides.add_slide(prs.slide_layouts[6])
img_path = os.path.join(image_dir, image_file)
slide.shapes.add_picture(img_path, Inches(0), Inches(0), width=prs.slide_width)
prs.save(output_file)
print(f"Presentation saved as {output_file}")
main.py라는 이름의 파일에 저장하고
python main.py를 실행
만약 conda env 를 써서 import 오류가 난다면,
/home/{user}/miniconda3/envs/{env name}/bin/python 을 앞에 붙여서 실행한다
반응형