CHANGE: Code Refactoring + Docker image + small fixes

2024-10-08 14:49:11 +02:00
parent ca8df8600b
commit e74e2aa49e
6 changed files with 68 additions and 38 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,27 @@
+{
+	// Use IntelliSense to learn about possible attributes.
+	// Hover to view descriptions of existing attributes.
+	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+	"version": "0.2.0",
+	"configurations": [
+		{
+			"name": "Python Debugger: Flask",
+			"type": "debugpy",
+			"request": "launch",
+			"module": "flask",
+			"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
+			"args": ["run", "--no-debugger", "--no-reload"],
+			"jinja": true,
+			"autoStartBrowser": false,
+			"cwd": "${workspaceFolder}/src"
+		},
+		{
+			"name": "Python Debugger: Current File",
+			"type": "debugpy",
+			"request": "launch",
+			"program": "${file}",
+			"console": "integratedTerminal",
+			"cwd": "${workspaceFolder}/src"
+		}
+	]
+}
--- a/6
+++ b/6
@@ -1,9 +1,9 @@
-FROM python:3.9
+FROM python:3.12.2-alpine
 COPY requirements.txt /home
 RUN pip install -r /home/requirements.txt
 COPY src webserver
 WORKDIR /webserver

 ENV FLASK_DEGUB=false
-ENTRYPOINT [ "python3", "-u", "main.py" ]
-EXPOSE 5000
+ENTRYPOINT [ "python3", "-u", "app.py" ]
+EXPOSE 5000
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,9 +1,8 @@
-version: '3.0'
 services:
  web:
    build: .
-    image: anki_convert
+    image: git.letsstein.de/tom/anki_convert
    container_name: anki_convert
    restart: unless-stopped
-    ports: 
-      - 8080:5000
+    ports:
+      - 8080:5000
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
-flask
-pdfplumber
+Flask==3.0.3
+pdfplumber==0.11.4
--- a/src/main.py
+++ b/src/main.py
@@ -17,27 +17,26 @@ def index():
@app.route("/upload", methods=["POST"])
 def upload_file():
    """Handles Upload -> Files will be extracted and converted, text just converted"""
-    uploaded_file = request.files["file"]
-    text = request.form["text"]
+    # Create uploads directory if it doesn't exist
    makedirs("uploads", exist_ok=True)

+    # Gets uploaded file or text from html input
+    uploaded_file = request.files["file"]
+    text = request.form["text"]
+
+    # Check if file has been uploaded
    if uploaded_file.filename != "":
+        # Converts uploaded file and returns new text with anki fields
        filepath = path.join("uploads", uploaded_file.filename)
        uploaded_file.save(filepath)
        response_text = functions.convert(filepath)
        text = response_text
-        # response = make_response(response_text, 200)
-        # response.mimetype = "text/plain"
    else:
+        # Converts text and returns new text with anki fields
        response_text = functions.convert_text(text)
-        # response = make_response(response_text, 200)
-        # response.mimetype = "text/plain"
-    # return response #redirect(url_for('index'))
    return render_template("index.html", resp_text=response_text, base_text=text)


 if __name__ == "__main__":
-    # app.jinja_env.auto_reload = True
-    # app.config["TEMPLATES_AUTO_RELOAD"] = True
    port = int(environ.get('PORT', 5000))
    app.run(debug=True, host='0.0.0.0', port=port)
--- a/src/functions.py
+++ b/src/functions.py
@@ -9,6 +9,7 @@ import pdfplumber
 def convert(file_path=False):
    """Opens pdf and converts it into text"""
    if not file_path:
+        # if there is no server to provide a filepath, open filepath dialog
        import tkinter as tk
        from tkinter import filedialog

@@ -26,42 +27,46 @@ def convert(file_path=False):
            crop = page.crop((60, 80, page.width, page.height))
            text = crop.extract_text(layout=True)
            no_trail = re.sub("\ +\\n", "\n", text)  # cleared trailing spaces
-            new_str = convert_text(no_trail)
-            conv_string.append(new_str)
+            conv_string.append(convert_text(no_trail))

    conv_string = "#################### neue Seite ####################\n".join(
        conv_string
    )

+    # write converted pdf to file
    file_path = file_path.replace(".pdf", ".txt")
    text_file = codecs.open(file_path, "w", "utf-8")
    text_file.write(conv_string)
    text_file.close()

    print(f"Alles fertig, die Datei befindet sich unter {file_path}")
-    if __name__ != "__main__":
-        return conv_string
+    return conv_string


-def convert_text(text):
-    """Seraches for ':' and converts into anki annotation"""
-    text = str(text)
-    if "\r\n" in text:
-        text = text.replace("\r\n", "\n")
+def convert_text(text: str):
+    """Searches for ':' and converts into anki annotation"""
+    field_nr = 1 # number of anki field
+    changed_lines = [] # array with new lines -> anki fields added

-    no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)  # clear wrong newlins
-    lines = re.split("\n", no_wrong_nl)  # split into lines
-
-    test = 1
-    changed_lines = []
-    for line in lines:
-        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
+    for line in seperate(text):
+        # add anki field into line and count the number of changes
+        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
+        # if anki field added increase field number
        if num > 0:
-            test += 1
+            field_nr += 1
+        # add changed line to array
        changed_lines.append(line)
-    new_str = "\n".join(changed_lines).strip()
-    if __name__ != "__main__":
-        return new_str
+
+    return "\n".join(changed_lines).strip()
+
+def seperate(text: str) -> list[str]:
+    """Seperates a text into an array of lines"""
+    if "\r\n" in text:
+        # unifies CRLF
+        text = text.replace("\r\n", "\n")
+    # clean linebreaks if they are not paragraph breaks
+    clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
+    return clean_nl.split("\n")


 if __name__ == "__main__":