all working
All checks were successful
Publish Docker image / Push Docker image to Docker Hub (push) Successful in 2s
All checks were successful
Publish Docker image / Push Docker image to Docker Hub (push) Successful in 2s
This commit is contained in:
@@ -2,11 +2,11 @@
|
||||
|
||||
import re
|
||||
import codecs
|
||||
from typing import Literal
|
||||
import pdfplumber
|
||||
|
||||
|
||||
|
||||
def convert(file_path:str="", format_indents:bool=True):
|
||||
def convert(file_path: str = "", format_indents: bool = True):
|
||||
"""Opens pdf and converts it into text"""
|
||||
if file_path == "":
|
||||
# if there is no server to provide a filepath, open filepath dialog
|
||||
@@ -42,32 +42,47 @@ def convert(file_path:str="", format_indents:bool=True):
|
||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||
return converted_pages
|
||||
|
||||
def convert_text(text: str, format_indents:bool=True):
|
||||
|
||||
def convert_text(text: str, format_indents: bool = True):
|
||||
"""Searches for ':' and converts into anki annotation"""
|
||||
changed_lines: list[str] = [] # array with new lines -> anki fields added
|
||||
changed_lines: list[str] = [] # array with new lines -> anki fields added
|
||||
as_indent: bool = False
|
||||
|
||||
for line in seperate(text):
|
||||
# add anki field into line and count the number of changes
|
||||
changed_lines.append(match_and_replace(line, format_indents))
|
||||
if as_indent and re.search(r"^\W+.+", line) is not None:
|
||||
newLine, _ = re.subn(r"(^\W+)(.+)", r"\1 {{{{c1::\2}}}}", line)
|
||||
else:
|
||||
as_indent = False
|
||||
newLine = match_and_replace(line, format_indents)
|
||||
if newLine is True:
|
||||
as_indent = True
|
||||
changed_lines.append(line)
|
||||
else:
|
||||
changed_lines.append(newLine)
|
||||
return "\n".join(changed_lines).strip()
|
||||
|
||||
def match_and_replace(line:str, format_indents:bool=True) -> str:
|
||||
if re.search(":$", line) != None:
|
||||
return line
|
||||
|
||||
def match_and_replace(
|
||||
line: str, format_indents: bool = True, as_indent: bool = False
|
||||
) -> str | Literal[True]:
|
||||
if re.search(":$", line) is not None:
|
||||
return True
|
||||
if format_indents:
|
||||
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c1::\2}}}}", line)
|
||||
line, num = re.subn("(\t+.)(..*)", r"\1 {{{{c1::\2}}}}", line)
|
||||
if num > 0:
|
||||
return line
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c1::\2}}}}", line)
|
||||
line, num = re.subn("(:)(..+)", r"\1 {{{{c1::\2}}}}", line)
|
||||
return line
|
||||
|
||||
|
||||
def seperate(text: str) -> list[str]:
|
||||
"""Seperates a text into an array of lines"""
|
||||
if "\r\n" in text:
|
||||
# unifies CRLF
|
||||
text = text.replace("\r\n", "\n")
|
||||
# clean linebreaks if they are not paragraph breaks
|
||||
clean_nl = re.sub("\\n +([A-Za-z0-9])",r" \1", text)
|
||||
clean_nl = re.sub("\\n +([A-Za-z0-9])", r" \1", text)
|
||||
return clean_nl.split("\n")
|
||||
|
||||
|
||||
|
||||
@@ -27,10 +27,7 @@
|
||||
<div id="right">
|
||||
<button id="copy-btn">Copy Text</button>
|
||||
<br />
|
||||
<pre id="myInput">
|
||||
{{ resp_text }}
|
||||
</pre
|
||||
>
|
||||
<pre id="myInput">{{ resp_text }}</pre>
|
||||
</div>
|
||||
{% endif %}
|
||||
</main>
|
||||
|
||||
Reference in New Issue
Block a user