CHANGE: Code Refactoring + Docker image + small fixes
This commit is contained in:
@@ -17,27 +17,26 @@ def index():
|
||||
@app.route("/upload", methods=["POST"])
|
||||
def upload_file():
|
||||
"""Handles Upload -> Files will be extracted and converted, text just converted"""
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
# Create uploads directory if it doesn't exist
|
||||
makedirs("uploads", exist_ok=True)
|
||||
|
||||
# Gets uploaded file or text from html input
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
|
||||
# Check if file has been uploaded
|
||||
if uploaded_file.filename != "":
|
||||
# Converts uploaded file and returns new text with anki fields
|
||||
filepath = path.join("uploads", uploaded_file.filename)
|
||||
uploaded_file.save(filepath)
|
||||
response_text = functions.convert(filepath)
|
||||
text = response_text
|
||||
# response = make_response(response_text, 200)
|
||||
# response.mimetype = "text/plain"
|
||||
else:
|
||||
# Converts text and returns new text with anki fields
|
||||
response_text = functions.convert_text(text)
|
||||
# response = make_response(response_text, 200)
|
||||
# response.mimetype = "text/plain"
|
||||
# return response #redirect(url_for('index'))
|
||||
return render_template("index.html", resp_text=response_text, base_text=text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# app.jinja_env.auto_reload = True
|
||||
# app.config["TEMPLATES_AUTO_RELOAD"] = True
|
||||
port = int(environ.get('PORT', 5000))
|
||||
app.run(debug=True, host='0.0.0.0', port=port)
|
||||
@@ -9,6 +9,7 @@ import pdfplumber
|
||||
def convert(file_path=False):
|
||||
"""Opens pdf and converts it into text"""
|
||||
if not file_path:
|
||||
# if there is no server to provide a filepath, open filepath dialog
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
@@ -26,42 +27,46 @@ def convert(file_path=False):
|
||||
crop = page.crop((60, 80, page.width, page.height))
|
||||
text = crop.extract_text(layout=True)
|
||||
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
||||
new_str = convert_text(no_trail)
|
||||
conv_string.append(new_str)
|
||||
conv_string.append(convert_text(no_trail))
|
||||
|
||||
conv_string = "#################### neue Seite ####################\n".join(
|
||||
conv_string
|
||||
)
|
||||
|
||||
# write converted pdf to file
|
||||
file_path = file_path.replace(".pdf", ".txt")
|
||||
text_file = codecs.open(file_path, "w", "utf-8")
|
||||
text_file.write(conv_string)
|
||||
text_file.close()
|
||||
|
||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||
if __name__ != "__main__":
|
||||
return conv_string
|
||||
return conv_string
|
||||
|
||||
|
||||
def convert_text(text):
|
||||
"""Seraches for ':' and converts into anki annotation"""
|
||||
text = str(text)
|
||||
if "\r\n" in text:
|
||||
text = text.replace("\r\n", "\n")
|
||||
def convert_text(text: str):
|
||||
"""Searches for ':' and converts into anki annotation"""
|
||||
field_nr = 1 # number of anki field
|
||||
changed_lines = [] # array with new lines -> anki fields added
|
||||
|
||||
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins
|
||||
lines = re.split("\n", no_wrong_nl) # split into lines
|
||||
|
||||
test = 1
|
||||
changed_lines = []
|
||||
for line in lines:
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
|
||||
for line in seperate(text):
|
||||
# add anki field into line and count the number of changes
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
|
||||
# if anki field added increase field number
|
||||
if num > 0:
|
||||
test += 1
|
||||
field_nr += 1
|
||||
# add changed line to array
|
||||
changed_lines.append(line)
|
||||
new_str = "\n".join(changed_lines).strip()
|
||||
if __name__ != "__main__":
|
||||
return new_str
|
||||
|
||||
return "\n".join(changed_lines).strip()
|
||||
|
||||
def seperate(text: str) -> list[str]:
|
||||
"""Seperates a text into an array of lines"""
|
||||
if "\r\n" in text:
|
||||
# unifies CRLF
|
||||
text = text.replace("\r\n", "\n")
|
||||
# clean linebreaks if they are not paragraph breaks
|
||||
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
|
||||
return clean_nl.split("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user