CHANGE: Code Refactoring + Docker image + small fixes

2024-10-08 14:49:11 +02:00
parent ca8df8600b
commit e74e2aa49e
6 changed files with 68 additions and 38 deletions
--- a/src/main.py
+++ b/src/main.py
@@ -17,27 +17,26 @@ def index():
@app.route("/upload", methods=["POST"])
 def upload_file():
    """Handles Upload -> Files will be extracted and converted, text just converted"""
-    uploaded_file = request.files["file"]
-    text = request.form["text"]
+    # Create uploads directory if it doesn't exist
    makedirs("uploads", exist_ok=True)

+    # Gets uploaded file or text from html input
+    uploaded_file = request.files["file"]
+    text = request.form["text"]
+
+    # Check if file has been uploaded
    if uploaded_file.filename != "":
+        # Converts uploaded file and returns new text with anki fields
        filepath = path.join("uploads", uploaded_file.filename)
        uploaded_file.save(filepath)
        response_text = functions.convert(filepath)
        text = response_text
-        # response = make_response(response_text, 200)
-        # response.mimetype = "text/plain"
    else:
+        # Converts text and returns new text with anki fields
        response_text = functions.convert_text(text)
-        # response = make_response(response_text, 200)
-        # response.mimetype = "text/plain"
-    # return response #redirect(url_for('index'))
    return render_template("index.html", resp_text=response_text, base_text=text)


 if __name__ == "__main__":
-    # app.jinja_env.auto_reload = True
-    # app.config["TEMPLATES_AUTO_RELOAD"] = True
    port = int(environ.get('PORT', 5000))
    app.run(debug=True, host='0.0.0.0', port=port)
--- a/src/functions.py
+++ b/src/functions.py
@@ -9,6 +9,7 @@ import pdfplumber
 def convert(file_path=False):
    """Opens pdf and converts it into text"""
    if not file_path:
+        # if there is no server to provide a filepath, open filepath dialog
        import tkinter as tk
        from tkinter import filedialog

@@ -26,42 +27,46 @@ def convert(file_path=False):
            crop = page.crop((60, 80, page.width, page.height))
            text = crop.extract_text(layout=True)
            no_trail = re.sub("\ +\\n", "\n", text)  # cleared trailing spaces
-            new_str = convert_text(no_trail)
-            conv_string.append(new_str)
+            conv_string.append(convert_text(no_trail))

    conv_string = "#################### neue Seite ####################\n".join(
        conv_string
    )

+    # write converted pdf to file
    file_path = file_path.replace(".pdf", ".txt")
    text_file = codecs.open(file_path, "w", "utf-8")
    text_file.write(conv_string)
    text_file.close()

    print(f"Alles fertig, die Datei befindet sich unter {file_path}")
-    if __name__ != "__main__":
-        return conv_string
+    return conv_string


-def convert_text(text):
-    """Seraches for ':' and converts into anki annotation"""
-    text = str(text)
-    if "\r\n" in text:
-        text = text.replace("\r\n", "\n")
+def convert_text(text: str):
+    """Searches for ':' and converts into anki annotation"""
+    field_nr = 1 # number of anki field
+    changed_lines = [] # array with new lines -> anki fields added

-    no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)  # clear wrong newlins
-    lines = re.split("\n", no_wrong_nl)  # split into lines
-
-    test = 1
-    changed_lines = []
-    for line in lines:
-        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
+    for line in seperate(text):
+        # add anki field into line and count the number of changes
+        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
+        # if anki field added increase field number
        if num > 0:
-            test += 1
+            field_nr += 1
+        # add changed line to array
        changed_lines.append(line)
-    new_str = "\n".join(changed_lines).strip()
-    if __name__ != "__main__":
-        return new_str
+
+    return "\n".join(changed_lines).strip()
+
+def seperate(text: str) -> list[str]:
+    """Seperates a text into an array of lines"""
+    if "\r\n" in text:
+        # unifies CRLF
+        text = text.replace("\r\n", "\n")
+    # clean linebreaks if they are not paragraph breaks
+    clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
+    return clean_nl.split("\n")


 if __name__ == "__main__":