CHANGE: Code Refactoring + Docker image + small fixes
This commit is contained in:
27
.vscode/launch.json
vendored
Normal file
27
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Flask",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" },
|
||||
"args": ["run", "--no-debugger", "--no-reload"],
|
||||
"jinja": true,
|
||||
"autoStartBrowser": false,
|
||||
"cwd": "${workspaceFolder}/src"
|
||||
},
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"cwd": "${workspaceFolder}/src"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
FROM python:3.9
|
||||
FROM python:3.12.2-alpine
|
||||
COPY requirements.txt /home
|
||||
RUN pip install -r /home/requirements.txt
|
||||
COPY src webserver
|
||||
WORKDIR /webserver
|
||||
|
||||
ENV FLASK_DEGUB=false
|
||||
ENTRYPOINT [ "python3", "-u", "main.py" ]
|
||||
EXPOSE 5000
|
||||
ENTRYPOINT [ "python3", "-u", "app.py" ]
|
||||
EXPOSE 5000
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
version: '3.0'
|
||||
services:
|
||||
web:
|
||||
build: .
|
||||
image: anki_convert
|
||||
image: git.letsstein.de/tom/anki_convert
|
||||
container_name: anki_convert
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 8080:5000
|
||||
ports:
|
||||
- 8080:5000
|
||||
|
||||
4
requirements.txt
Executable file → Normal file
4
requirements.txt
Executable file → Normal file
@@ -1,2 +1,2 @@
|
||||
flask
|
||||
pdfplumber
|
||||
Flask==3.0.3
|
||||
pdfplumber==0.11.4
|
||||
|
||||
@@ -17,27 +17,26 @@ def index():
|
||||
@app.route("/upload", methods=["POST"])
|
||||
def upload_file():
|
||||
"""Handles Upload -> Files will be extracted and converted, text just converted"""
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
# Create uploads directory if it doesn't exist
|
||||
makedirs("uploads", exist_ok=True)
|
||||
|
||||
# Gets uploaded file or text from html input
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
|
||||
# Check if file has been uploaded
|
||||
if uploaded_file.filename != "":
|
||||
# Converts uploaded file and returns new text with anki fields
|
||||
filepath = path.join("uploads", uploaded_file.filename)
|
||||
uploaded_file.save(filepath)
|
||||
response_text = functions.convert(filepath)
|
||||
text = response_text
|
||||
# response = make_response(response_text, 200)
|
||||
# response.mimetype = "text/plain"
|
||||
else:
|
||||
# Converts text and returns new text with anki fields
|
||||
response_text = functions.convert_text(text)
|
||||
# response = make_response(response_text, 200)
|
||||
# response.mimetype = "text/plain"
|
||||
# return response #redirect(url_for('index'))
|
||||
return render_template("index.html", resp_text=response_text, base_text=text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# app.jinja_env.auto_reload = True
|
||||
# app.config["TEMPLATES_AUTO_RELOAD"] = True
|
||||
port = int(environ.get('PORT', 5000))
|
||||
app.run(debug=True, host='0.0.0.0', port=port)
|
||||
@@ -9,6 +9,7 @@ import pdfplumber
|
||||
def convert(file_path=False):
|
||||
"""Opens pdf and converts it into text"""
|
||||
if not file_path:
|
||||
# if there is no server to provide a filepath, open filepath dialog
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
@@ -26,42 +27,46 @@ def convert(file_path=False):
|
||||
crop = page.crop((60, 80, page.width, page.height))
|
||||
text = crop.extract_text(layout=True)
|
||||
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
||||
new_str = convert_text(no_trail)
|
||||
conv_string.append(new_str)
|
||||
conv_string.append(convert_text(no_trail))
|
||||
|
||||
conv_string = "#################### neue Seite ####################\n".join(
|
||||
conv_string
|
||||
)
|
||||
|
||||
# write converted pdf to file
|
||||
file_path = file_path.replace(".pdf", ".txt")
|
||||
text_file = codecs.open(file_path, "w", "utf-8")
|
||||
text_file.write(conv_string)
|
||||
text_file.close()
|
||||
|
||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||
if __name__ != "__main__":
|
||||
return conv_string
|
||||
return conv_string
|
||||
|
||||
|
||||
def convert_text(text):
|
||||
"""Seraches for ':' and converts into anki annotation"""
|
||||
text = str(text)
|
||||
if "\r\n" in text:
|
||||
text = text.replace("\r\n", "\n")
|
||||
def convert_text(text: str):
|
||||
"""Searches for ':' and converts into anki annotation"""
|
||||
field_nr = 1 # number of anki field
|
||||
changed_lines = [] # array with new lines -> anki fields added
|
||||
|
||||
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins
|
||||
lines = re.split("\n", no_wrong_nl) # split into lines
|
||||
|
||||
test = 1
|
||||
changed_lines = []
|
||||
for line in lines:
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
|
||||
for line in seperate(text):
|
||||
# add anki field into line and count the number of changes
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
|
||||
# if anki field added increase field number
|
||||
if num > 0:
|
||||
test += 1
|
||||
field_nr += 1
|
||||
# add changed line to array
|
||||
changed_lines.append(line)
|
||||
new_str = "\n".join(changed_lines).strip()
|
||||
if __name__ != "__main__":
|
||||
return new_str
|
||||
|
||||
return "\n".join(changed_lines).strip()
|
||||
|
||||
def seperate(text: str) -> list[str]:
|
||||
"""Seperates a text into an array of lines"""
|
||||
if "\r\n" in text:
|
||||
# unifies CRLF
|
||||
text = text.replace("\r\n", "\n")
|
||||
# clean linebreaks if they are not paragraph breaks
|
||||
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
|
||||
return clean_nl.split("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user