diff --git a/src/functions.py b/src/functions.py index 673fd12..28cd638 100755 --- a/src/functions.py +++ b/src/functions.py @@ -2,11 +2,11 @@ import re import codecs +from typing import Literal import pdfplumber - -def convert(file_path:str="", format_indents:bool=True): +def convert(file_path: str = "", format_indents: bool = True): """Opens pdf and converts it into text""" if file_path == "": # if there is no server to provide a filepath, open filepath dialog @@ -42,32 +42,47 @@ def convert(file_path:str="", format_indents:bool=True): print(f"Alles fertig, die Datei befindet sich unter {file_path}") return converted_pages -def convert_text(text: str, format_indents:bool=True): + +def convert_text(text: str, format_indents: bool = True): """Searches for ':' and converts into anki annotation""" - changed_lines: list[str] = [] # array with new lines -> anki fields added + changed_lines: list[str] = [] # array with new lines -> anki fields added + as_indent: bool = False for line in seperate(text): # add anki field into line and count the number of changes - changed_lines.append(match_and_replace(line, format_indents)) + if as_indent and re.search(r"^\W+.+", line) is not None: + newLine, _ = re.subn(r"(^\W+)(.+)", r"\1 {{{{c1::\2}}}}", line) + else: + as_indent = False + newLine = match_and_replace(line, format_indents) + if newLine is True: + as_indent = True + changed_lines.append(line) + else: + changed_lines.append(newLine) return "\n".join(changed_lines).strip() -def match_and_replace(line:str, format_indents:bool=True) -> str: - if re.search(":$", line) != None: - return line + +def match_and_replace( + line: str, format_indents: bool = True, as_indent: bool = False +) -> str | Literal[True]: + if re.search(":$", line) is not None: + return True if format_indents: - line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c1::\2}}}}", line) + line, num = re.subn("(\t+.)(..*)", r"\1 {{{{c1::\2}}}}", line) if num > 0: return line - line, num = re.subn("(:)(..+)", rf"\1 {{{{c1::\2}}}}", line) + line, num = re.subn("(:)(..+)", r"\1 {{{{c1::\2}}}}", line) return line + def seperate(text: str) -> list[str]: """Seperates a text into an array of lines""" if "\r\n" in text: # unifies CRLF text = text.replace("\r\n", "\n") # clean linebreaks if they are not paragraph breaks - clean_nl = re.sub("\\n +([A-Za-z0-9])",r" \1", text) + clean_nl = re.sub("\\n +([A-Za-z0-9])", r" \1", text) return clean_nl.split("\n") diff --git a/src/templates/index.html b/src/templates/index.html index da234a4..f1ad198 100755 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -27,10 +27,7 @@
- {{ resp_text }}
-
+ {{ resp_text }}