fixed project and dockerrized everything
This commit is contained in:
68
src/functions.py
Executable file
68
src/functions.py
Executable file
@@ -0,0 +1,68 @@
|
||||
"""Function to extract text from pdf and convert it to anki anntation"""
|
||||
|
||||
import re
|
||||
import codecs
|
||||
import pdfplumber
|
||||
|
||||
|
||||
|
||||
def convert(file_path=False):
|
||||
"""Opens pdf and converts it into text"""
|
||||
if not file_path:
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
file_path = filedialog.askopenfilename(
|
||||
filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!"
|
||||
)
|
||||
|
||||
conv_string = []
|
||||
|
||||
# open PDF
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
crop = page.crop((60, 80, page.width, page.height))
|
||||
text = crop.extract_text(layout=True)
|
||||
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
||||
new_str = convert_text(no_trail)
|
||||
conv_string.append(new_str)
|
||||
|
||||
conv_string = "#################### neue Seite ####################\n".join(
|
||||
conv_string
|
||||
)
|
||||
|
||||
file_path = file_path.replace(".pdf", ".txt")
|
||||
text_file = codecs.open(file_path, "w", "utf-8")
|
||||
text_file.write(conv_string)
|
||||
text_file.close()
|
||||
|
||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||
if __name__ != "__main__":
|
||||
return conv_string
|
||||
|
||||
|
||||
def convert_text(text):
|
||||
"""Seraches for ':' and converts into anki annotation"""
|
||||
text = str(text)
|
||||
if "\r\n" in text:
|
||||
text = text.replace("\r\n", "\n")
|
||||
|
||||
no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) # clear wrong newlins
|
||||
lines = re.split("\n", no_wrong_nl) # split into lines
|
||||
|
||||
test = 1
|
||||
changed_lines = []
|
||||
for line in lines:
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line)
|
||||
if num > 0:
|
||||
test += 1
|
||||
changed_lines.append(line)
|
||||
new_str = "\n".join(changed_lines).strip()
|
||||
if __name__ != "__main__":
|
||||
return new_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
convert()
|
||||
43
src/main.py
Executable file
43
src/main.py
Executable file
@@ -0,0 +1,43 @@
|
||||
"""Main App Module"""
|
||||
|
||||
from os import makedirs, path, environ
|
||||
|
||||
from flask import Flask, render_template, request
|
||||
import functions
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
"""Renders Index Page"""
|
||||
return render_template("index.html")
|
||||
|
||||
|
||||
@app.route("/upload", methods=["POST"])
|
||||
def upload_file():
|
||||
"""Handles Upload -> Files will be extracted and converted, text just converted"""
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
makedirs("uploads", exist_ok=True)
|
||||
|
||||
if uploaded_file.filename != "":
|
||||
filepath = path.join("uploads", uploaded_file.filename)
|
||||
uploaded_file.save(filepath)
|
||||
response_text = functions.convert(filepath)
|
||||
text = response_text
|
||||
# response = make_response(response_text, 200)
|
||||
# response.mimetype = "text/plain"
|
||||
else:
|
||||
response_text = functions.convert_text(text)
|
||||
# response = make_response(response_text, 200)
|
||||
# response.mimetype = "text/plain"
|
||||
# return response #redirect(url_for('index'))
|
||||
return render_template("index.html", resp_text=response_text, base_text=text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# app.jinja_env.auto_reload = True
|
||||
# app.config["TEMPLATES_AUTO_RELOAD"] = True
|
||||
port = int(environ.get('PORT', 5000))
|
||||
app.run(debug=True, host='0.0.0.0', port=port)
|
||||
24
src/static/script.js
Executable file
24
src/static/script.js
Executable file
@@ -0,0 +1,24 @@
|
||||
document.getElementById("myfile").addEventListener("change", (event) => {
|
||||
btn = event.target;
|
||||
filename = btn.files[0].name;
|
||||
btn.parentNode.children[1].innerHTML = filename
|
||||
})
|
||||
try {
|
||||
document.getElementById("copy-btn").addEventListener("click", copy_text);
|
||||
} catch (e) {
|
||||
if (e instanceof TypeError)
|
||||
e = 0;
|
||||
else alert(e);
|
||||
}
|
||||
|
||||
function copy_text(){
|
||||
const copyText = document.getElementById("myInput").textContent.trim();
|
||||
const textArea = document.createElement('textarea');
|
||||
textArea.textContent = copyText;
|
||||
document.body.append(textArea);
|
||||
textArea.select();
|
||||
document.execCommand("copy");
|
||||
textArea.style.display = "none";
|
||||
|
||||
// console.log("Hello World!");
|
||||
}
|
||||
112
src/static/style.css
Executable file
112
src/static/style.css
Executable file
@@ -0,0 +1,112 @@
|
||||
:root{
|
||||
--green:#4CAF50;
|
||||
--primary:#3b3b3b;
|
||||
}
|
||||
|
||||
*{
|
||||
font-family: sans-serif;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
body{
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
main{
|
||||
display: grid;
|
||||
width: 90vw;
|
||||
margin: 5vw;
|
||||
flex-wrap: wrap;
|
||||
grid-template-areas: "head-input head-output"
|
||||
"input output";
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
h1.input{
|
||||
grid-area: head-input;
|
||||
}
|
||||
|
||||
h1.output{
|
||||
grid-area: head-output;
|
||||
}
|
||||
|
||||
h1{
|
||||
font-size: 2rem;
|
||||
margin: 0;
|
||||
margin-bottom: .5rem;
|
||||
}
|
||||
|
||||
textarea{
|
||||
grid-area: input;
|
||||
}
|
||||
|
||||
pre{
|
||||
grid-area: output;
|
||||
}
|
||||
|
||||
|
||||
main > div{
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
textarea{
|
||||
width: 100%;
|
||||
height: 20vh;
|
||||
margin: 20px 0;
|
||||
-webkit-box-sizing: border-box;
|
||||
-moz-box-sizing: border-box;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
input[type="file"] {
|
||||
display: none;
|
||||
}
|
||||
.custom-file-upload, button {
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
/* background-color: lightgray; */
|
||||
display: inline-block;
|
||||
padding: 6px 12px;
|
||||
cursor: pointer;
|
||||
font-size: 1rem;
|
||||
border-radius: .2rem;
|
||||
transition-duration: 0.4s;
|
||||
}
|
||||
|
||||
.custom-file-upload:hover, button:hover {
|
||||
background-color: var(--green); /* Green */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.custom-file-upload span, button span {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
.material-symbols-outlined {
|
||||
padding-right: 0.3rem;
|
||||
}
|
||||
|
||||
textarea, pre{
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
border-radius: .2rem;
|
||||
margin: .5rem 0;
|
||||
min-height: 10rem;
|
||||
padding: .2rem;
|
||||
}
|
||||
|
||||
@media (max-width: 600px) {
|
||||
main{
|
||||
width: 100wv;
|
||||
}
|
||||
|
||||
main > div{
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
32
src/templates/index.html
Executable file
32
src/templates/index.html
Executable file
@@ -0,0 +1,32 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Anki Converter</title>
|
||||
<link rel="stylesheet" href='/static/style.css' />
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<div id="left">
|
||||
<h1>Upload File/Enter Text</h1>
|
||||
<form action="/upload" method="post" enctype="multipart/form-data">
|
||||
<label class="custom-file-upload">
|
||||
<input id="myfile" type="file" name="file" accept=".pdf"/>
|
||||
<span>Custom Upload</span>
|
||||
</label>
|
||||
<button type="submit">Upload</button>
|
||||
<br>
|
||||
<textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea>
|
||||
</form>
|
||||
</div>
|
||||
{% if resp_text %}
|
||||
<div id="right">
|
||||
<h1>Converted Text</h1>
|
||||
<button id="copy-btn">Copy Text</button>
|
||||
<pre id="myInput">{{ resp_text }}
|
||||
</pre>
|
||||
</div>
|
||||
{% endif %}
|
||||
</main>
|
||||
<script src="{{url_for('static', filename='script.js')}}"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user