CHANGE: Code Refactoring + Docker image + small fixes

This commit is contained in:
2024-10-08 14:49:11 +02:00
parent ca8df8600b
commit e74e2aa49e
6 changed files with 68 additions and 38 deletions

27
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,27 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Flask",
"type": "debugpy",
"request": "launch",
"module": "flask",
"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
"args": ["run", "--no-debugger", "--no-reload"],
"jinja": true,
"autoStartBrowser": false,
"cwd": "${workspaceFolder}/src"
},
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"cwd": "${workspaceFolder}/src"
}
]
}

View File

@@ -1,9 +1,9 @@
FROM python:3.9
FROM python:3.12.2-alpine
COPY requirements.txt /home
RUN pip install -r /home/requirements.txt
COPY src webserver
WORKDIR /webserver
ENV FLASK_DEGUB=false
ENTRYPOINT [ "python3", "-u", "main.py" ]
EXPOSE 5000
ENTRYPOINT [ "python3", "-u", "app.py" ]
EXPOSE 5000

View File

@@ -1,9 +1,8 @@
version: '3.0'
services:
web:
build: .
image: anki_convert
image: git.letsstein.de/tom/anki_convert
container_name: anki_convert
restart: unless-stopped
ports:
- 8080:5000
ports:
- 8080:5000

4
requirements.txt Executable file → Normal file
View File

@@ -1,2 +1,2 @@
flask
pdfplumber
Flask==3.0.3
pdfplumber==0.11.4

View File

@@ -17,27 +17,26 @@ def index():
@app.route("/upload", methods=["POST"])
def upload_file():
"""Handles Upload -> Files will be extracted and converted, text just converted"""
uploaded_file = request.files["file"]
text = request.form["text"]
# Create uploads directory if it doesn't exist
makedirs("uploads", exist_ok=True)
# Gets uploaded file or text from html input
uploaded_file = request.files["file"]
text = request.form["text"]
# Check if file has been uploaded
if uploaded_file.filename != "":
# Converts uploaded file and returns new text with anki fields
filepath = path.join("uploads", uploaded_file.filename)
uploaded_file.save(filepath)
response_text = functions.convert(filepath)
text = response_text
# response = make_response(response_text, 200)
# response.mimetype = "text/plain"
else:
# Converts text and returns new text with anki fields
response_text = functions.convert_text(text)
# response = make_response(response_text, 200)
# response.mimetype = "text/plain"
# return response #redirect(url_for('index'))
return render_template("index.html", resp_text=response_text, base_text=text)
if __name__ == "__main__":
# app.jinja_env.auto_reload = True
# app.config["TEMPLATES_AUTO_RELOAD"] = True
port = int(environ.get('PORT', 5000))
app.run(debug=True, host='0.0.0.0', port=port)

View File

@@ -9,6 +9,7 @@ import pdfplumber
def convert(file_path=False):
"""Opens pdf and converts it into text"""
if not file_path:
# if there is no server to provide a filepath, open filepath dialog
import tkinter as tk
from tkinter import filedialog
@@ -26,42 +27,46 @@ def convert(file_path=False):
crop = page.crop((60, 80, page.width, page.height))
text = crop.extract_text(layout=True)
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
new_str = convert_text(no_trail)
conv_string.append(new_str)
conv_string.append(convert_text(no_trail))
conv_string = "#################### neue Seite ####################\n".join(
conv_string
)
# write converted pdf to file
file_path = file_path.replace(".pdf", ".txt")
text_file = codecs.open(file_path, "w", "utf-8")
text_file.write(conv_string)
text_file.close()
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
if __name__ != "__main__":
return conv_string
return conv_string
def convert_text(text):
"""Seraches for ':' and converts into anki annotation"""
text = str(text)
if "\r\n" in text:
text = text.replace("\r\n", "\n")
def convert_text(text: str):
"""Searches for ':' and converts into anki annotation"""
field_nr = 1 # number of anki field
changed_lines = [] # array with new lines -> anki fields added
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins
lines = re.split("\n", no_wrong_nl) # split into lines
test = 1
changed_lines = []
for line in lines:
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
for line in seperate(text):
# add anki field into line and count the number of changes
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
# if anki field added increase field number
if num > 0:
test += 1
field_nr += 1
# add changed line to array
changed_lines.append(line)
new_str = "\n".join(changed_lines).strip()
if __name__ != "__main__":
return new_str
return "\n".join(changed_lines).strip()
def seperate(text: str) -> list[str]:
"""Seperates a text into an array of lines"""
if "\r\n" in text:
# unifies CRLF
text = text.replace("\r\n", "\n")
# clean linebreaks if they are not paragraph breaks
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
return clean_nl.split("\n")
if __name__ == "__main__":