Files
anki_convert/functions.py
2023-06-30 11:23:12 +02:00

79 lines
2.4 KiB
Python
Executable File

import pdfplumber
import re
import codecs
def convert(file_path=False):
if not file_path:
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename(
filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!"
)
conv_string = []
# open PDF
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
crop = page.crop((60, 80, page.width, page.height))
# first_page = pdf.pages[0]
# first_page = first_page.crop((60, 80, first_page.width, first_page.height))
text = crop.extract_text(layout=True)
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
no_wrong_nl = re.sub(
"\\n\ +([A-Za-z0-9])", r" \1", no_trail
) # clear wrong newlins
lines = re.split("\n", no_wrong_nl) # split into lines
test = 1
changed_lines = []
for line in lines:
line, num = re.subn("(:)(.+)", rf"\1 {{{{c{test}::\2}}}}", line)
if num > 0:
test += 1
changed_lines.append(line)
new_str = "\n".join(changed_lines).strip()
conv_string.append(new_str)
conv_string = "#################### neue Seite ####################\n".join(
conv_string
)
file_path = file_path.replace(".pdf", ".txt")
text_file = codecs.open(file_path, "w", "utf-8")
text_file.write(conv_string)
text_file.close()
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
if not __name__ == "__main__":
return conv_string
def convert_text(text):
text = str(text)
if "\r\n" in text:
text = text.replace("\r\n", "\n")
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])", r" \1", text) # clear wrong newlins
lines = re.split("\n", no_wrong_nl) # split into lines
test = 1
changed_lines = []
for line in lines:
line, num = re.subn("(:)(.+)", rf"\1 {{{{c{test}::\2}}}}", line)
if num > 0:
test += 1
changed_lines.append(line)
new_str = "\n".join(changed_lines).strip()
if not __name__ == "__main__":
return new_str
if __name__ == "__main__":
convert()