CHANGE: Code Refactoring + Docker image + small fixes
This commit is contained in:
27
.vscode/launch.json
vendored
Normal file
27
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Flask",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"module": "flask",
|
||||||
|
"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
|
||||||
|
"args": ["run", "--no-debugger", "--no-reload"],
|
||||||
|
"jinja": true,
|
||||||
|
"autoStartBrowser": false,
|
||||||
|
"cwd": "${workspaceFolder}/src"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"cwd": "${workspaceFolder}/src"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
FROM python:3.9
|
FROM python:3.12.2-alpine
|
||||||
COPY requirements.txt /home
|
COPY requirements.txt /home
|
||||||
RUN pip install -r /home/requirements.txt
|
RUN pip install -r /home/requirements.txt
|
||||||
COPY src webserver
|
COPY src webserver
|
||||||
WORKDIR /webserver
|
WORKDIR /webserver
|
||||||
|
|
||||||
ENV FLASK_DEGUB=false
|
ENV FLASK_DEGUB=false
|
||||||
ENTRYPOINT [ "python3", "-u", "main.py" ]
|
ENTRYPOINT [ "python3", "-u", "app.py" ]
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
@@ -1,8 +1,7 @@
|
|||||||
version: '3.0'
|
|
||||||
services:
|
services:
|
||||||
web:
|
web:
|
||||||
build: .
|
build: .
|
||||||
image: anki_convert
|
image: git.letsstein.de/tom/anki_convert
|
||||||
container_name: anki_convert
|
container_name: anki_convert
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
4
requirements.txt
Executable file → Normal file
4
requirements.txt
Executable file → Normal file
@@ -1,2 +1,2 @@
|
|||||||
flask
|
Flask==3.0.3
|
||||||
pdfplumber
|
pdfplumber==0.11.4
|
||||||
|
|||||||
@@ -17,27 +17,26 @@ def index():
|
|||||||
@app.route("/upload", methods=["POST"])
|
@app.route("/upload", methods=["POST"])
|
||||||
def upload_file():
|
def upload_file():
|
||||||
"""Handles Upload -> Files will be extracted and converted, text just converted"""
|
"""Handles Upload -> Files will be extracted and converted, text just converted"""
|
||||||
uploaded_file = request.files["file"]
|
# Create uploads directory if it doesn't exist
|
||||||
text = request.form["text"]
|
|
||||||
makedirs("uploads", exist_ok=True)
|
makedirs("uploads", exist_ok=True)
|
||||||
|
|
||||||
|
# Gets uploaded file or text from html input
|
||||||
|
uploaded_file = request.files["file"]
|
||||||
|
text = request.form["text"]
|
||||||
|
|
||||||
|
# Check if file has been uploaded
|
||||||
if uploaded_file.filename != "":
|
if uploaded_file.filename != "":
|
||||||
|
# Converts uploaded file and returns new text with anki fields
|
||||||
filepath = path.join("uploads", uploaded_file.filename)
|
filepath = path.join("uploads", uploaded_file.filename)
|
||||||
uploaded_file.save(filepath)
|
uploaded_file.save(filepath)
|
||||||
response_text = functions.convert(filepath)
|
response_text = functions.convert(filepath)
|
||||||
text = response_text
|
text = response_text
|
||||||
# response = make_response(response_text, 200)
|
|
||||||
# response.mimetype = "text/plain"
|
|
||||||
else:
|
else:
|
||||||
|
# Converts text and returns new text with anki fields
|
||||||
response_text = functions.convert_text(text)
|
response_text = functions.convert_text(text)
|
||||||
# response = make_response(response_text, 200)
|
|
||||||
# response.mimetype = "text/plain"
|
|
||||||
# return response #redirect(url_for('index'))
|
|
||||||
return render_template("index.html", resp_text=response_text, base_text=text)
|
return render_template("index.html", resp_text=response_text, base_text=text)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# app.jinja_env.auto_reload = True
|
|
||||||
# app.config["TEMPLATES_AUTO_RELOAD"] = True
|
|
||||||
port = int(environ.get('PORT', 5000))
|
port = int(environ.get('PORT', 5000))
|
||||||
app.run(debug=True, host='0.0.0.0', port=port)
|
app.run(debug=True, host='0.0.0.0', port=port)
|
||||||
@@ -9,6 +9,7 @@ import pdfplumber
|
|||||||
def convert(file_path=False):
|
def convert(file_path=False):
|
||||||
"""Opens pdf and converts it into text"""
|
"""Opens pdf and converts it into text"""
|
||||||
if not file_path:
|
if not file_path:
|
||||||
|
# if there is no server to provide a filepath, open filepath dialog
|
||||||
import tkinter as tk
|
import tkinter as tk
|
||||||
from tkinter import filedialog
|
from tkinter import filedialog
|
||||||
|
|
||||||
@@ -26,42 +27,46 @@ def convert(file_path=False):
|
|||||||
crop = page.crop((60, 80, page.width, page.height))
|
crop = page.crop((60, 80, page.width, page.height))
|
||||||
text = crop.extract_text(layout=True)
|
text = crop.extract_text(layout=True)
|
||||||
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
||||||
new_str = convert_text(no_trail)
|
conv_string.append(convert_text(no_trail))
|
||||||
conv_string.append(new_str)
|
|
||||||
|
|
||||||
conv_string = "#################### neue Seite ####################\n".join(
|
conv_string = "#################### neue Seite ####################\n".join(
|
||||||
conv_string
|
conv_string
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# write converted pdf to file
|
||||||
file_path = file_path.replace(".pdf", ".txt")
|
file_path = file_path.replace(".pdf", ".txt")
|
||||||
text_file = codecs.open(file_path, "w", "utf-8")
|
text_file = codecs.open(file_path, "w", "utf-8")
|
||||||
text_file.write(conv_string)
|
text_file.write(conv_string)
|
||||||
text_file.close()
|
text_file.close()
|
||||||
|
|
||||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||||
if __name__ != "__main__":
|
|
||||||
return conv_string
|
return conv_string
|
||||||
|
|
||||||
|
|
||||||
def convert_text(text):
|
def convert_text(text: str):
|
||||||
"""Seraches for ':' and converts into anki annotation"""
|
"""Searches for ':' and converts into anki annotation"""
|
||||||
text = str(text)
|
field_nr = 1 # number of anki field
|
||||||
if "\r\n" in text:
|
changed_lines = [] # array with new lines -> anki fields added
|
||||||
text = text.replace("\r\n", "\n")
|
|
||||||
|
|
||||||
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins
|
for line in seperate(text):
|
||||||
lines = re.split("\n", no_wrong_nl) # split into lines
|
# add anki field into line and count the number of changes
|
||||||
|
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
|
||||||
test = 1
|
# if anki field added increase field number
|
||||||
changed_lines = []
|
|
||||||
for line in lines:
|
|
||||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
|
|
||||||
if num > 0:
|
if num > 0:
|
||||||
test += 1
|
field_nr += 1
|
||||||
|
# add changed line to array
|
||||||
changed_lines.append(line)
|
changed_lines.append(line)
|
||||||
new_str = "\n".join(changed_lines).strip()
|
|
||||||
if __name__ != "__main__":
|
return "\n".join(changed_lines).strip()
|
||||||
return new_str
|
|
||||||
|
def seperate(text: str) -> list[str]:
|
||||||
|
"""Seperates a text into an array of lines"""
|
||||||
|
if "\r\n" in text:
|
||||||
|
# unifies CRLF
|
||||||
|
text = text.replace("\r\n", "\n")
|
||||||
|
# clean linebreaks if they are not paragraph breaks
|
||||||
|
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
|
||||||
|
return clean_nl.split("\n")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user