From e74e2aa49e249295ad384213db5dcce65431f215 Mon Sep 17 00:00:00 2001
From: tom <t.troeger.02@gmail.com>
Date: Tue, 8 Oct 2024 14:49:11 +0200
Subject: [PATCH] CHANGE: Code Refactoring + Docker image + small fixes

---
 .vscode/launch.json     | 27 +++++++++++++++++++++++++
 Dockerfile              |  6 +++---
 docker-compose.yml      |  7 +++----
 requirements.txt        |  4 ++--
 src/{main.py => app.py} | 17 ++++++++--------
 src/functions.py        | 45 +++++++++++++++++++++++------------------
 6 files changed, 68 insertions(+), 38 deletions(-)
 create mode 100644 .vscode/launch.json
 mode change 100755 => 100644 requirements.txt
 rename src/{main.py => app.py} (75%)

diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..4fd0029
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,27 @@
+{
+	// Use IntelliSense to learn about possible attributes.
+	// Hover to view descriptions of existing attributes.
+	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+	"version": "0.2.0",
+	"configurations": [
+		{
+			"name": "Python Debugger: Flask",
+			"type": "debugpy",
+			"request": "launch",
+			"module": "flask",
+			"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
+			"args": ["run", "--no-debugger", "--no-reload"],
+			"jinja": true,
+			"autoStartBrowser": false,
+			"cwd": "${workspaceFolder}/src"
+		},
+		{
+			"name": "Python Debugger: Current File",
+			"type": "debugpy",
+			"request": "launch",
+			"program": "${file}",
+			"console": "integratedTerminal",
+			"cwd": "${workspaceFolder}/src"
+		}
+	]
+}
diff --git a/Dockerfile b/Dockerfile
index 9f545a8..6433e59 100755
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,9 @@
-FROM python:3.9
+FROM python:3.12.2-alpine
 COPY requirements.txt /home
 RUN pip install -r /home/requirements.txt
 COPY src webserver
 WORKDIR /webserver
 
 ENV FLASK_DEGUB=false
-ENTRYPOINT [ "python3", "-u", "main.py" ]
-EXPOSE 5000
\ No newline at end of file
+ENTRYPOINT [ "python3", "-u", "app.py" ]
+EXPOSE 5000
diff --git a/docker-compose.yml b/docker-compose.yml
index b97bed9..7929201 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,9 +1,8 @@
-version: '3.0'
 services:
   web:
     build: .
-    image: anki_convert
+    image: git.letsstein.de/tom/anki_convert
     container_name: anki_convert
     restart: unless-stopped
-    ports: 
-      - 8080:5000
\ No newline at end of file
+    ports:
+      - 8080:5000
diff --git a/requirements.txt b/requirements.txt
old mode 100755
new mode 100644
index 5aefcd6..14f8d75
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
-flask
-pdfplumber
+Flask==3.0.3
+pdfplumber==0.11.4
diff --git a/src/main.py b/src/app.py
similarity index 75%
rename from src/main.py
rename to src/app.py
index 960cff9..c8a09c9 100755
--- a/src/main.py
+++ b/src/app.py
@@ -17,27 +17,26 @@ def index():
 @app.route("/upload", methods=["POST"])
 def upload_file():
     """Handles Upload -> Files will be extracted and converted, text just converted"""
-    uploaded_file = request.files["file"]
-    text = request.form["text"]
+    # Create uploads directory if it doesn't exist
     makedirs("uploads", exist_ok=True)
 
+    # Gets uploaded file or text from html input
+    uploaded_file = request.files["file"]
+    text = request.form["text"]
+
+    # Check if file has been uploaded
     if uploaded_file.filename != "":
+        # Converts uploaded file and returns new text with anki fields
         filepath = path.join("uploads", uploaded_file.filename)
         uploaded_file.save(filepath)
         response_text = functions.convert(filepath)
         text = response_text
-        # response = make_response(response_text, 200)
-        # response.mimetype = "text/plain"
     else:
+        # Converts text and returns new text with anki fields
         response_text = functions.convert_text(text)
-        # response = make_response(response_text, 200)
-        # response.mimetype = "text/plain"
-    # return response #redirect(url_for('index'))
     return render_template("index.html", resp_text=response_text, base_text=text)
 
 
 if __name__ == "__main__":
-    # app.jinja_env.auto_reload = True
-    # app.config["TEMPLATES_AUTO_RELOAD"] = True
     port = int(environ.get('PORT', 5000))
     app.run(debug=True, host='0.0.0.0', port=port)
diff --git a/src/functions.py b/src/functions.py
index 9a8b734..59d47c2 100755
--- a/src/functions.py
+++ b/src/functions.py
@@ -9,6 +9,7 @@ import pdfplumber
 def convert(file_path=False):
     """Opens pdf and converts it into text"""
     if not file_path:
+        # if there is no server to provide a filepath, open filepath dialog
         import tkinter as tk
         from tkinter import filedialog
 
@@ -26,42 +27,46 @@ def convert(file_path=False):
             crop = page.crop((60, 80, page.width, page.height))
             text = crop.extract_text(layout=True)
             no_trail = re.sub("\ +\\n", "\n", text)  # cleared trailing spaces
-            new_str = convert_text(no_trail)
-            conv_string.append(new_str)
+            conv_string.append(convert_text(no_trail))
 
     conv_string = "#################### neue Seite ####################\n".join(
         conv_string
     )
 
+    # write converted pdf to file
     file_path = file_path.replace(".pdf", ".txt")
     text_file = codecs.open(file_path, "w", "utf-8")
     text_file.write(conv_string)
     text_file.close()
 
     print(f"Alles fertig, die Datei befindet sich unter {file_path}")
-    if __name__ != "__main__":
-        return conv_string
+    return conv_string
 
 
-def convert_text(text):
-    """Seraches for ':' and converts into anki annotation"""
-    text = str(text)
-    if "\r\n" in text:
-        text = text.replace("\r\n", "\n")
+def convert_text(text: str):
+    """Searches for ':' and converts into anki annotation"""
+    field_nr = 1 # number of anki field
+    changed_lines = [] # array with new lines -> anki fields added
 
-    no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)  # clear wrong newlins
-    lines = re.split("\n", no_wrong_nl)  # split into lines
-
-    test = 1
-    changed_lines = []
-    for line in lines:
-        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
+    for line in seperate(text):
+        # add anki field into line and count the number of changes
+        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
+        # if anki field added increase field number
         if num > 0:
-            test += 1
+            field_nr += 1
+        # add changed line to array
         changed_lines.append(line)
-    new_str = "\n".join(changed_lines).strip()
-    if __name__ != "__main__":
-        return new_str
+
+    return "\n".join(changed_lines).strip()
+
+def seperate(text: str) -> list[str]:
+    """Seperates a text into an array of lines"""
+    if "\r\n" in text:
+        # unifies CRLF
+        text = text.replace("\r\n", "\n")
+    # clean linebreaks if they are not paragraph breaks
+    clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
+    return clean_nl.split("\n")
 
 
 if __name__ == "__main__":