From b03faed2fb6fffb0dddd447495a02222c40a1eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=20Tr=C3=B6ger?= Date: Wed, 24 Sep 2025 11:23:54 +0200 Subject: [PATCH] added format indent without colon --- src/app.py | 11 +- src/functions.py | 59 +++---- src/static/style.css | 360 +++++++++++++++++++++++++++------------ src/templates/index.html | 24 ++- 4 files changed, 291 insertions(+), 163 deletions(-) diff --git a/src/app.py b/src/app.py index c8a09c9..4eefe92 100755 --- a/src/app.py +++ b/src/app.py @@ -23,18 +23,21 @@ def upload_file(): # Gets uploaded file or text from html input uploaded_file = request.files["file"] text = request.form["text"] + format_indents = request.form.get("format_indents") == "on" + + print(format_indents) # Check if file has been uploaded - if uploaded_file.filename != "": + if uploaded_file.filename: # Converts uploaded file and returns new text with anki fields filepath = path.join("uploads", uploaded_file.filename) uploaded_file.save(filepath) - response_text = functions.convert(filepath) + response_text = functions.convert(filepath, format_indents) text = response_text else: # Converts text and returns new text with anki fields - response_text = functions.convert_text(text) - return render_template("index.html", resp_text=response_text, base_text=text) + response_text = functions.convert_text(text, format_indents) + return render_template("index.html", resp_text=response_text, base_text=text, format_indents=format_indents) if __name__ == "__main__": diff --git a/src/functions.py b/src/functions.py index 367ec09..fdb2929 100755 --- a/src/functions.py +++ b/src/functions.py @@ -6,7 +6,7 @@ import pdfplumber -def convert(file_path:str=""): +def convert(file_path:str="", format_indents:bool=True): """Opens pdf and converts it into text""" if file_path == "": # if there is no server to provide a filepath, open filepath dialog @@ -19,72 +19,55 @@ def convert(file_path:str=""): filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!" ) - conv_string = [] + converted_pages: list[str] = [] # open PDF with pdfplumber.open(file_path) as pdf: for page in pdf.pages: crop = page.crop((60, 80, page.width, page.height)) text = crop.extract_text(layout=True) - no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces - conv_string.append(convert_text(no_trail)) + no_trail = re.sub(" +\\n", "\n", text) # cleared trailing spaces + converted_pages.append(convert_text(no_trail, format_indents)) - conv_string = "#################### neue Seite ####################\n".join( - conv_string + converted_text = "#################### neue Seite ####################\n".join( + converted_pages ) # write converted pdf to file file_path = file_path.replace(".pdf", ".txt") text_file = codecs.open(file_path, "w", "utf-8") - text_file.write(conv_string) + text_file.write(converted_text) text_file.close() print(f"Alles fertig, die Datei befindet sich unter {file_path}") - return conv_string + return converted_pages - -def convert_text(text: str): +def convert_text(text: str, format_indents:bool=True): """Searches for ':' and converts into anki annotation""" - field_nr = 1 # number of anki field - changed_lines = [] # array with new lines -> anki fields added + changed_lines: list[str] = [] # array with new lines -> anki fields added - indented_points = False - indent_level = 0 for line in seperate(text): # add anki field into line and count the number of changes - if re.search(":$", line) != None: - indented_points = True - changed_lines.append(line) - continue - - tabStops = re.findall("\t", line) - if indented_points and indent_level == 0: - indent_level = len(tabStops) - - if indented_points and indent_level == len(tabStops): - line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c{field_nr}::\2}}}}", line) - if num > 0: - changed_lines.append(line) - continue - indented_points = False - indent_level = 0 - else: - indented_points = False - indent_level = 0 - line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line) - changed_lines.append(line) - + changed_lines.append(match_and_replace(line, format_indents)) return "\n".join(changed_lines).strip() +def match_and_replace(line:str, format_indents:bool=True) -> str: + if format_indents: + line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c1::\2}}}}", line) + if num > 0: + return line + line, num = re.subn("(:)(..+)", rf"\1 {{{{c1::\2}}}}", line) + return line + def seperate(text: str) -> list[str]: """Seperates a text into an array of lines""" if "\r\n" in text: # unifies CRLF text = text.replace("\r\n", "\n") # clean linebreaks if they are not paragraph breaks - clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) + clean_nl = re.sub("\\n +([A-Za-z0-9])",r" \1", text) return clean_nl.split("\n") if __name__ == "__main__": - convert() + _ = convert() diff --git a/src/static/style.css b/src/static/style.css index 13fe61e..087dd99 100755 --- a/src/static/style.css +++ b/src/static/style.css @@ -1,112 +1,248 @@ -:root{ - --green:#4CAF50; - --primary:#3b3b3b; -} - -*{ - font-family: sans-serif; - font-size: 18px; -} - -body{ - margin: 0; - padding: 0; - color: var(--primary); -} - -main{ - display: grid; - width: 90vw; - margin: 5vw; - flex-wrap: wrap; - grid-template-areas: "head-input head-output" - "input output"; - grid-template-columns: 1fr 1fr; - gap: 1rem; -} - -h1.input{ - grid-area: head-input; -} - -h1.output{ - grid-area: head-output; -} - -h1{ - font-size: 2rem; - margin: 0; - margin-bottom: .5rem; -} - -textarea{ - grid-area: input; -} - -pre{ - grid-area: output; -} - - -main > div{ - width: 100%; - box-sizing: border-box; -} - -textarea{ - width: 100%; - height: 20vh; - margin: 20px 0; - -webkit-box-sizing: border-box; - -moz-box-sizing: border-box; - box-sizing: border-box; -} - -input[type="file"] { - display: none; -} -.custom-file-upload, button { - background-color: white; - color: var(--primary); - border: 2px solid var(--green); /* Green */ - /* background-color: lightgray; */ - display: inline-block; - padding: 6px 12px; - cursor: pointer; - font-size: 1rem; - border-radius: .2rem; - transition-duration: 0.4s; -} - -.custom-file-upload:hover, button:hover { - background-color: var(--green); /* Green */ - color: white; - } - -.custom-file-upload span, button span { - vertical-align: bottom; -} - -.material-symbols-outlined { - padding-right: 0.3rem; -} - -textarea, pre{ - background-color: white; - color: var(--primary); - border: 2px solid var(--green); /* Green */ - border-radius: .2rem; - margin: .5rem 0; - min-height: 10rem; - padding: .2rem; -} - -@media (max-width: 600px) { - main{ - width: 100wv; - } - - main > div{ - width: 100%; - } -} +:root { + --green: #4caf50; + --primary: #3b3b3b; +} + +* { + font-family: sans-serif; + font-size: 18px; +} + +body { + margin: 0; + padding: 0; + color: var(--primary); +} + +main { + display: grid; + width: 90vw; + margin: 5vw; + flex-wrap: wrap; + grid-template-areas: + "head-input head-output" + "input output"; + grid-template-columns: 1fr 1fr; + gap: 1rem; +} + +h1.input { + grid-area: head-input; +} + +h1.output { + grid-area: head-output; +} + +h1 { + font-size: 2rem; + margin: 0; + margin-bottom: 0.5rem; +} + +textarea { + grid-area: input; +} + +pre { + grid-area: output; +} + +main > div { + width: 100%; + box-sizing: border-box; +} + +textarea { + width: 100%; + height: 20vh; + margin: 20px 0; + -webkit-box-sizing: border-box; + -moz-box-sizing: border-box; + box-sizing: border-box; +} + +input[type="file"] { + display: none; +} +.custom-file-upload, +button { + background-color: white; + color: var(--primary); + border: 2px solid var(--green); /* Green */ + /* background-color: lightgray; */ + display: inline-block; + padding: 6px 12px; + cursor: pointer; + font-size: 1rem; + border-radius: 0.2rem; + transition-duration: 0.4s; +} + +.custom-file-upload:hover, +button:hover { + background-color: var(--green); /* Green */ + color: white; +} + +.custom-file-upload span, +button span { + vertical-align: bottom; +} + +.material-symbols-outlined { + padding-right: 0.3rem; +} + +.form_button { + display: flex; + flex-direction: row; + gap: 0.5rem; + align-items: center; +} + +@supports (-webkit-appearance: none) or (-moz-appearance: none) { + .checkbox-wrapper-14 input[type="checkbox"] { + --active: var(--green); + --active-inner: #fff; + --border: var(--primary); + --border-hover: var(--green); + --background: #fff; + -webkit-appearance: none; + -moz-appearance: none; + height: 21px; + outline: none; + display: inline-block; + vertical-align: top; + position: relative; + margin: 0; + cursor: pointer; + border: 1px solid var(--bc, var(--border)); + background: var(--b, var(--background)); + transition: + background 0.3s, + border-color 0.3s, + box-shadow 0.2s; + } + .checkbox-wrapper-14 input[type="checkbox"]:after { + content: ""; + display: block; + left: 0; + top: 0; + position: absolute; + transition: + transform var(--d-t, 0.3s) var(--d-t-e, ease), + opacity var(--d-o, 0.2s); + } + .checkbox-wrapper-14 input[type="checkbox"]:checked { + --b: var(--active); + --bc: var(--active); + --d-o: 0.3s; + --d-t: 0.6s; + --d-t-e: cubic-bezier(0.2, 0.85, 0.32, 1.2); + } + .checkbox-wrapper-14 input[type="checkbox"]:disabled { + --b: var(--disabled); + cursor: not-allowed; + opacity: 0.9; + } + .checkbox-wrapper-14 input[type="checkbox"]:disabled:checked { + --b: var(--disabled-inner); + --bc: var(--border); + } + .checkbox-wrapper-14 input[type="checkbox"]:disabled + label { + cursor: not-allowed; + } + .checkbox-wrapper-14 input[type="checkbox"]:hover:not(:checked):not(:disabled) { + --bc: var(--border-hover); + } + .checkbox-wrapper-14 input[type="checkbox"]:focus { + box-shadow: 0 0 0 var(--focus); + } + .checkbox-wrapper-14 input[type="checkbox"]:not(.switch) { + width: 21px; + } + .checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after { + opacity: var(--o, 0); + } + .checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked { + --o: 1; + } + .checkbox-wrapper-14 input[type="checkbox"] + label { + display: inline-block; + vertical-align: middle; + cursor: pointer; + margin-left: 4px; + } + + .checkbox-wrapper-14 input[type="checkbox"]:not(.switch) { + border-radius: 7px; + } + .checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after { + width: 5px; + height: 9px; + border: 2px solid var(--active-inner); + border-top: 0; + border-left: 0; + left: 7px; + top: 4px; + transform: rotate(var(--r, 20deg)); + } + .checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked { + --r: 43deg; + } + .checkbox-wrapper-14 input[type="checkbox"].switch { + width: 38px; + border-radius: 11px; + } + .checkbox-wrapper-14 input[type="checkbox"].switch:after { + left: 2px; + top: 2px; + border-radius: 50%; + width: 17px; + height: 17px; + background: var(--ab, var(--border)); + transform: translateX(var(--x, 0)); + } + .checkbox-wrapper-14 input[type="checkbox"].switch:checked { + --ab: var(--active-inner); + --x: 17px; + } + .checkbox-wrapper-14 input[type="checkbox"].switch:disabled:not(:checked):after { + opacity: 0.6; + } +} + +.checkbox-wrapper-14 * { + box-sizing: inherit; +} +.checkbox-wrapper-14 *:before, +.checkbox-wrapper-14 *:after { + box-sizing: inherit; +} + +.checkbox-wrapper-14 { + display: flex; + align-items: center; +} + +textarea, +pre { + background-color: white; + color: var(--primary); + border: 2px solid var(--green); /* Green */ + border-radius: 0.2rem; + margin: 0.5rem 0; + min-height: 10rem; + padding: 0.2rem; +} + +@media (max-width: 600px) { + main { + width: 100wv; + } + + main > div { + width: 100%; + } +} diff --git a/src/templates/index.html b/src/templates/index.html index be00e7f..acfc7ce 100755 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -6,24 +6,30 @@
+

Upload File/Enter Text

-

Upload File/Enter Text

-
- - + +
+ + +
+ + +
+

{% if resp_text %} +

Converted Text