CHANGE: Code Refactoring + Docker image + small fixes

2024-10-08 14:49:11 +02:00
parent ca8df8600b
commit e74e2aa49e
6 changed files with 68 additions and 38 deletions
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,27 @@
 {
 	// Use IntelliSense to learn about possible attributes.
 	// Hover to view descriptions of existing attributes.
 	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 	"version": "0.2.0",
 	"configurations": [
 		{
 			"name": "Python Debugger: Flask",
 			"type": "debugpy",
 			"request": "launch",
 			"module": "flask",
 			"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
 			"args": ["run", "--no-debugger", "--no-reload"],
 			"jinja": true,
 			"autoStartBrowser": false,
 			"cwd": "${workspaceFolder}/src"
 		},
 		{
 			"name": "Python Debugger: Current File",
 			"type": "debugpy",
 			"request": "launch",
 			"program": "${file}",
 			"console": "integratedTerminal",
 			"cwd": "${workspaceFolder}/src"
 		}
 	]
 }
--- a/6
+++ b/6
@@ -1,9 +1,9 @@
-FROM python:3.9
+FROM python:3.12.2-alpine
 COPY requirements.txt /home
 RUN pip install -r /home/requirements.txt
 COPY src webserver
 WORKDIR /webserver
 ENV FLASK_DEGUB=false
-ENTRYPOINT [ "python3", "-u", "main.py" ]
+ENTRYPOINT [ "python3", "-u", "app.py" ]
-EXPOSE 5000
+EXPOSE 5000
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,9 +1,8 @@
 version: '3.0'
 services:
  web:
    build: .
-    image: anki_convert
+    image: git.letsstein.de/tom/anki_convert
    container_name: anki_convert
    restart: unless-stopped
-    ports: 
+    ports:
-      - 8080:5000
+      - 8080:5000
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
-flask
+Flask==3.0.3
-pdfplumber
+pdfplumber==0.11.4
--- a/src/main.py
+++ b/src/main.py
@@ -17,27 +17,26 @@ def index():
@app.route("/upload", methods=["POST"])
 def upload_file():
    """Handles Upload -> Files will be extracted and converted, text just converted"""
-    uploaded_file = request.files["file"]
+    # Create uploads directory if it doesn't exist
    text = request.form["text"]
    makedirs("uploads", exist_ok=True)
    # Gets uploaded file or text from html input
    uploaded_file = request.files["file"]
    text = request.form["text"]
    # Check if file has been uploaded
    if uploaded_file.filename != "":
        # Converts uploaded file and returns new text with anki fields
        filepath = path.join("uploads", uploaded_file.filename)
        uploaded_file.save(filepath)
        response_text = functions.convert(filepath)
        text = response_text
        # response = make_response(response_text, 200)
        # response.mimetype = "text/plain"
    else:
        # Converts text and returns new text with anki fields
        response_text = functions.convert_text(text)
        # response = make_response(response_text, 200)
        # response.mimetype = "text/plain"
    # return response #redirect(url_for('index'))
    return render_template("index.html", resp_text=response_text, base_text=text)
 if __name__ == "__main__":
    # app.jinja_env.auto_reload = True
    # app.config["TEMPLATES_AUTO_RELOAD"] = True
    port = int(environ.get('PORT', 5000))
    app.run(debug=True, host='0.0.0.0', port=port)
--- a/src/functions.py
+++ b/src/functions.py
@@ -9,6 +9,7 @@ import pdfplumber
 def convert(file_path=False):
    """Opens pdf and converts it into text"""
    if not file_path:
        # if there is no server to provide a filepath, open filepath dialog
        import tkinter as tk
        from tkinter import filedialog
@@ -26,42 +27,46 @@ def convert(file_path=False):
            crop = page.crop((60, 80, page.width, page.height))
            text = crop.extract_text(layout=True)
            no_trail = re.sub("\ +\\n", "\n", text)  # cleared trailing spaces
-            new_str = convert_text(no_trail)
+            conv_string.append(convert_text(no_trail))
            conv_string.append(new_str)
    conv_string = "#################### neue Seite ####################\n".join(
        conv_string
    )
    # write converted pdf to file
    file_path = file_path.replace(".pdf", ".txt")
    text_file = codecs.open(file_path, "w", "utf-8")
    text_file.write(conv_string)
    text_file.close()
    print(f"Alles fertig, die Datei befindet sich unter {file_path}")
-    if __name__ != "__main__":
+    return conv_string
        return conv_string
-def convert_text(text):
+def convert_text(text: str):
-    """Seraches for ':' and converts into anki annotation"""
+    """Searches for ':' and converts into anki annotation"""
-    text = str(text)
+    field_nr = 1 # number of anki field
-    if "\r\n" in text:
+    changed_lines = [] # array with new lines -> anki fields added
        text = text.replace("\r\n", "\n")
-    no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)  # clear wrong newlins
+    for line in seperate(text):
-    lines = re.split("\n", no_wrong_nl)  # split into lines
+        # add anki field into line and count the number of changes
-
+        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
-    test = 1
+        # if anki field added increase field number
    changed_lines = []
    for line in lines:
        line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
        if num > 0:
-            test += 1
+            field_nr += 1
        # add changed line to array
        changed_lines.append(line)
-    new_str = "\n".join(changed_lines).strip()
+
-    if __name__ != "__main__":
+    return "\n".join(changed_lines).strip()
-        return new_str
+
 def seperate(text: str) -> list[str]:
    """Seperates a text into an array of lines"""
    if "\r\n" in text:
        # unifies CRLF
        text = text.replace("\r\n", "\n")
    # clean linebreaks if they are not paragraph breaks
    clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
    return clean_nl.split("\n")
 if __name__ == "__main__":
`@@ -1,2 +1,2 @@`
	`flask`	`Flask==3.0.3`
	`pdfplumber`	`pdfplumber==0.11.4`