CHANGE: Code Refactoring + Docker image + small fixes

This commit is contained in:
2024-10-08 14:49:11 +02:00
parent ca8df8600b
commit e74e2aa49e
6 changed files with 68 additions and 38 deletions

27
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,27 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Flask",
"type": "debugpy",
"request": "launch",
"module": "flask",
"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
"args": ["run", "--no-debugger", "--no-reload"],
"jinja": true,
"autoStartBrowser": false,
"cwd": "${workspaceFolder}/src"
},
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/src"
}
]
}

View File

@@ -1,9 +1,9 @@
FROM python:3.9 FROM python:3.12.2-alpine
COPY requirements.txt /home COPY requirements.txt /home
RUN pip install -r /home/requirements.txt RUN pip install -r /home/requirements.txt
COPY src webserver COPY src webserver
WORKDIR /webserver WORKDIR /webserver
ENV FLASK_DEGUB=false ENV FLASK_DEGUB=false
ENTRYPOINT [ "python3", "-u", "main.py" ] ENTRYPOINT [ "python3", "-u", "app.py" ]
EXPOSE 5000 EXPOSE 5000

View File

@@ -1,9 +1,8 @@
version: '3.0'
services: services:
web: web:
build: . build: .
image: anki_convert image: git.letsstein.de/tom/anki_convert
container_name: anki_convert container_name: anki_convert
restart: unless-stopped restart: unless-stopped
ports: ports:
- 8080:5000 - 8080:5000

4
requirements.txt Executable file → Normal file
View File

@@ -1,2 +1,2 @@
flask Flask==3.0.3
pdfplumber pdfplumber==0.11.4

View File

@@ -17,27 +17,26 @@ def index():
@app.route("/upload", methods=["POST"]) @app.route("/upload", methods=["POST"])
def upload_file(): def upload_file():
"""Handles Upload -> Files will be extracted and converted, text just converted""" """Handles Upload -> Files will be extracted and converted, text just converted"""
uploaded_file = request.files["file"] # Create uploads directory if it doesn't exist
text = request.form["text"]
makedirs("uploads", exist_ok=True) makedirs("uploads", exist_ok=True)
# Gets uploaded file or text from html input
uploaded_file = request.files["file"]
text = request.form["text"]
# Check if file has been uploaded
if uploaded_file.filename != "": if uploaded_file.filename != "":
# Converts uploaded file and returns new text with anki fields
filepath = path.join("uploads", uploaded_file.filename) filepath = path.join("uploads", uploaded_file.filename)
uploaded_file.save(filepath) uploaded_file.save(filepath)
response_text = functions.convert(filepath) response_text = functions.convert(filepath)
text = response_text text = response_text
# response = make_response(response_text, 200)
# response.mimetype = "text/plain"
else: else:
# Converts text and returns new text with anki fields
response_text = functions.convert_text(text) response_text = functions.convert_text(text)
# response = make_response(response_text, 200)
# response.mimetype = "text/plain"
# return response #redirect(url_for('index'))
return render_template("index.html", resp_text=response_text, base_text=text) return render_template("index.html", resp_text=response_text, base_text=text)
if __name__ == "__main__": if __name__ == "__main__":
# app.jinja_env.auto_reload = True
# app.config["TEMPLATES_AUTO_RELOAD"] = True
port = int(environ.get('PORT', 5000)) port = int(environ.get('PORT', 5000))
app.run(debug=True, host='0.0.0.0', port=port) app.run(debug=True, host='0.0.0.0', port=port)

View File

@@ -9,6 +9,7 @@ import pdfplumber
def convert(file_path=False): def convert(file_path=False):
"""Opens pdf and converts it into text""" """Opens pdf and converts it into text"""
if not file_path: if not file_path:
# if there is no server to provide a filepath, open filepath dialog
import tkinter as tk import tkinter as tk
from tkinter import filedialog from tkinter import filedialog
@@ -26,42 +27,46 @@ def convert(file_path=False):
crop = page.crop((60, 80, page.width, page.height)) crop = page.crop((60, 80, page.width, page.height))
text = crop.extract_text(layout=True) text = crop.extract_text(layout=True)
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
new_str = convert_text(no_trail) conv_string.append(convert_text(no_trail))
conv_string.append(new_str)
conv_string = "#################### neue Seite ####################\n".join( conv_string = "#################### neue Seite ####################\n".join(
conv_string conv_string
) )
# write converted pdf to file
file_path = file_path.replace(".pdf", ".txt") file_path = file_path.replace(".pdf", ".txt")
text_file = codecs.open(file_path, "w", "utf-8") text_file = codecs.open(file_path, "w", "utf-8")
text_file.write(conv_string) text_file.write(conv_string)
text_file.close() text_file.close()
print(f"Alles fertig, die Datei befindet sich unter {file_path}") print(f"Alles fertig, die Datei befindet sich unter {file_path}")
if __name__ != "__main__": return conv_string
return conv_string
def convert_text(text): def convert_text(text: str):
"""Seraches for ':' and converts into anki annotation""" """Searches for ':' and converts into anki annotation"""
text = str(text) field_nr = 1 # number of anki field
if "\r\n" in text: changed_lines = [] # array with new lines -> anki fields added
text = text.replace("\r\n", "\n")
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins for line in seperate(text):
lines = re.split("\n", no_wrong_nl) # split into lines # add anki field into line and count the number of changes
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
test = 1 # if anki field added increase field number
changed_lines = []
for line in lines:
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
if num > 0: if num > 0:
test += 1 field_nr += 1
# add changed line to array
changed_lines.append(line) changed_lines.append(line)
new_str = "\n".join(changed_lines).strip()
if __name__ != "__main__": return "\n".join(changed_lines).strip()
return new_str
def seperate(text: str) -> list[str]:
"""Seperates a text into an array of lines"""
if "\r\n" in text:
# unifies CRLF
text = text.replace("\r\n", "\n")
# clean linebreaks if they are not paragraph breaks
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
return clean_nl.split("\n")
if __name__ == "__main__": if __name__ == "__main__":