From e74e2aa49e249295ad384213db5dcce65431f215 Mon Sep 17 00:00:00 2001 From: tom Date: Tue, 8 Oct 2024 14:49:11 +0200 Subject: [PATCH] CHANGE: Code Refactoring + Docker image + small fixes --- .vscode/launch.json | 27 +++++++++++++++++++++++++ Dockerfile | 6 +++--- docker-compose.yml | 7 +++---- requirements.txt | 4 ++-- src/{main.py => app.py} | 17 ++++++++-------- src/functions.py | 45 +++++++++++++++++++++++------------------ 6 files changed, 68 insertions(+), 38 deletions(-) create mode 100644 .vscode/launch.json mode change 100755 => 100644 requirements.txt rename src/{main.py => app.py} (75%) diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..4fd0029 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,27 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Flask", + "type": "debugpy", + "request": "launch", + "module": "flask", + "env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" }, + "args": ["run", "--no-debugger", "--no-reload"], + "jinja": true, + "autoStartBrowser": false, + "cwd": "${workspaceFolder}/src" + }, + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "${workspaceFolder}/src" + } + ] +} diff --git a/Dockerfile b/Dockerfile index 9f545a8..6433e59 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,9 @@ -FROM python:3.9 +FROM python:3.12.2-alpine COPY requirements.txt /home RUN pip install -r /home/requirements.txt COPY src webserver WORKDIR /webserver ENV FLASK_DEGUB=false -ENTRYPOINT [ "python3", "-u", "main.py" ] -EXPOSE 5000 \ No newline at end of file +ENTRYPOINT [ "python3", "-u", "app.py" ] +EXPOSE 5000 diff --git a/docker-compose.yml b/docker-compose.yml index b97bed9..7929201 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,8 @@ -version: '3.0' services: web: build: . - image: anki_convert + image: git.letsstein.de/tom/anki_convert container_name: anki_convert restart: unless-stopped - ports: - - 8080:5000 \ No newline at end of file + ports: + - 8080:5000 diff --git a/requirements.txt b/requirements.txt old mode 100755 new mode 100644 index 5aefcd6..14f8d75 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -flask -pdfplumber +Flask==3.0.3 +pdfplumber==0.11.4 diff --git a/src/main.py b/src/app.py similarity index 75% rename from src/main.py rename to src/app.py index 960cff9..c8a09c9 100755 --- a/src/main.py +++ b/src/app.py @@ -17,27 +17,26 @@ def index(): @app.route("/upload", methods=["POST"]) def upload_file(): """Handles Upload -> Files will be extracted and converted, text just converted""" - uploaded_file = request.files["file"] - text = request.form["text"] + # Create uploads directory if it doesn't exist makedirs("uploads", exist_ok=True) + # Gets uploaded file or text from html input + uploaded_file = request.files["file"] + text = request.form["text"] + + # Check if file has been uploaded if uploaded_file.filename != "": + # Converts uploaded file and returns new text with anki fields filepath = path.join("uploads", uploaded_file.filename) uploaded_file.save(filepath) response_text = functions.convert(filepath) text = response_text - # response = make_response(response_text, 200) - # response.mimetype = "text/plain" else: + # Converts text and returns new text with anki fields response_text = functions.convert_text(text) - # response = make_response(response_text, 200) - # response.mimetype = "text/plain" - # return response #redirect(url_for('index')) return render_template("index.html", resp_text=response_text, base_text=text) if __name__ == "__main__": - # app.jinja_env.auto_reload = True - # app.config["TEMPLATES_AUTO_RELOAD"] = True port = int(environ.get('PORT', 5000)) app.run(debug=True, host='0.0.0.0', port=port) diff --git a/src/functions.py b/src/functions.py index 9a8b734..59d47c2 100755 --- a/src/functions.py +++ b/src/functions.py @@ -9,6 +9,7 @@ import pdfplumber def convert(file_path=False): """Opens pdf and converts it into text""" if not file_path: + # if there is no server to provide a filepath, open filepath dialog import tkinter as tk from tkinter import filedialog @@ -26,42 +27,46 @@ def convert(file_path=False): crop = page.crop((60, 80, page.width, page.height)) text = crop.extract_text(layout=True) no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces - new_str = convert_text(no_trail) - conv_string.append(new_str) + conv_string.append(convert_text(no_trail)) conv_string = "#################### neue Seite ####################\n".join( conv_string ) + # write converted pdf to file file_path = file_path.replace(".pdf", ".txt") text_file = codecs.open(file_path, "w", "utf-8") text_file.write(conv_string) text_file.close() print(f"Alles fertig, die Datei befindet sich unter {file_path}") - if __name__ != "__main__": - return conv_string + return conv_string -def convert_text(text): - """Seraches for ':' and converts into anki annotation""" - text = str(text) - if "\r\n" in text: - text = text.replace("\r\n", "\n") +def convert_text(text: str): + """Searches for ':' and converts into anki annotation""" + field_nr = 1 # number of anki field + changed_lines = [] # array with new lines -> anki fields added - no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins - lines = re.split("\n", no_wrong_nl) # split into lines - - test = 1 - changed_lines = [] - for line in lines: - line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line) + for line in seperate(text): + # add anki field into line and count the number of changes + line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line) + # if anki field added increase field number if num > 0: - test += 1 + field_nr += 1 + # add changed line to array changed_lines.append(line) - new_str = "\n".join(changed_lines).strip() - if __name__ != "__main__": - return new_str + + return "\n".join(changed_lines).strip() + +def seperate(text: str) -> list[str]: + """Seperates a text into an array of lines""" + if "\r\n" in text: + # unifies CRLF + text = text.replace("\r\n", "\n") + # clean linebreaks if they are not paragraph breaks + clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) + return clean_nl.split("\n") if __name__ == "__main__":