added format indent without colon
All checks were successful
Publish Docker image / Push Docker image to Docker Hub (push) Successful in -3s

This commit is contained in:
2025-09-24 11:23:54 +02:00
parent 7f6665d3e1
commit b03faed2fb
4 changed files with 291 additions and 163 deletions

View File

@@ -23,18 +23,21 @@ def upload_file():
# Gets uploaded file or text from html input # Gets uploaded file or text from html input
uploaded_file = request.files["file"] uploaded_file = request.files["file"]
text = request.form["text"] text = request.form["text"]
format_indents = request.form.get("format_indents") == "on"
print(format_indents)
# Check if file has been uploaded # Check if file has been uploaded
if uploaded_file.filename != "": if uploaded_file.filename:
# Converts uploaded file and returns new text with anki fields # Converts uploaded file and returns new text with anki fields
filepath = path.join("uploads", uploaded_file.filename) filepath = path.join("uploads", uploaded_file.filename)
uploaded_file.save(filepath) uploaded_file.save(filepath)
response_text = functions.convert(filepath) response_text = functions.convert(filepath, format_indents)
text = response_text text = response_text
else: else:
# Converts text and returns new text with anki fields # Converts text and returns new text with anki fields
response_text = functions.convert_text(text) response_text = functions.convert_text(text, format_indents)
return render_template("index.html", resp_text=response_text, base_text=text) return render_template("index.html", resp_text=response_text, base_text=text, format_indents=format_indents)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -6,7 +6,7 @@ import pdfplumber
def convert(file_path:str=""): def convert(file_path:str="", format_indents:bool=True):
"""Opens pdf and converts it into text""" """Opens pdf and converts it into text"""
if file_path == "": if file_path == "":
# if there is no server to provide a filepath, open filepath dialog # if there is no server to provide a filepath, open filepath dialog
@@ -19,72 +19,55 @@ def convert(file_path:str=""):
filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!" filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!"
) )
conv_string = [] converted_pages: list[str] = []
# open PDF # open PDF
with pdfplumber.open(file_path) as pdf: with pdfplumber.open(file_path) as pdf:
for page in pdf.pages: for page in pdf.pages:
crop = page.crop((60, 80, page.width, page.height)) crop = page.crop((60, 80, page.width, page.height))
text = crop.extract_text(layout=True) text = crop.extract_text(layout=True)
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces no_trail = re.sub(" +\\n", "\n", text) # cleared trailing spaces
conv_string.append(convert_text(no_trail)) converted_pages.append(convert_text(no_trail, format_indents))
conv_string = "#################### neue Seite ####################\n".join( converted_text = "#################### neue Seite ####################\n".join(
conv_string converted_pages
) )
# write converted pdf to file # write converted pdf to file
file_path = file_path.replace(".pdf", ".txt") file_path = file_path.replace(".pdf", ".txt")
text_file = codecs.open(file_path, "w", "utf-8") text_file = codecs.open(file_path, "w", "utf-8")
text_file.write(conv_string) text_file.write(converted_text)
text_file.close() text_file.close()
print(f"Alles fertig, die Datei befindet sich unter {file_path}") print(f"Alles fertig, die Datei befindet sich unter {file_path}")
return conv_string return converted_pages
def convert_text(text: str, format_indents:bool=True):
def convert_text(text: str):
"""Searches for ':' and converts into anki annotation""" """Searches for ':' and converts into anki annotation"""
field_nr = 1 # number of anki field changed_lines: list[str] = [] # array with new lines -> anki fields added
changed_lines = [] # array with new lines -> anki fields added
indented_points = False
indent_level = 0
for line in seperate(text): for line in seperate(text):
# add anki field into line and count the number of changes # add anki field into line and count the number of changes
if re.search(":$", line) != None: changed_lines.append(match_and_replace(line, format_indents))
indented_points = True
changed_lines.append(line)
continue
tabStops = re.findall("\t", line)
if indented_points and indent_level == 0:
indent_level = len(tabStops)
if indented_points and indent_level == len(tabStops):
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
if num > 0:
changed_lines.append(line)
continue
indented_points = False
indent_level = 0
else:
indented_points = False
indent_level = 0
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
changed_lines.append(line)
return "\n".join(changed_lines).strip() return "\n".join(changed_lines).strip()
def match_and_replace(line:str, format_indents:bool=True) -> str:
if format_indents:
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c1::\2}}}}", line)
if num > 0:
return line
line, num = re.subn("(:)(..+)", rf"\1 {{{{c1::\2}}}}", line)
return line
def seperate(text: str) -> list[str]: def seperate(text: str) -> list[str]:
"""Seperates a text into an array of lines""" """Seperates a text into an array of lines"""
if "\r\n" in text: if "\r\n" in text:
# unifies CRLF # unifies CRLF
text = text.replace("\r\n", "\n") text = text.replace("\r\n", "\n")
# clean linebreaks if they are not paragraph breaks # clean linebreaks if they are not paragraph breaks
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) clean_nl = re.sub("\\n +([A-Za-z0-9])",r" \1", text)
return clean_nl.split("\n") return clean_nl.split("\n")
if __name__ == "__main__": if __name__ == "__main__":
convert() _ = convert()

View File

@@ -1,112 +1,248 @@
:root{ :root {
--green:#4CAF50; --green: #4caf50;
--primary:#3b3b3b; --primary: #3b3b3b;
} }
*{ * {
font-family: sans-serif; font-family: sans-serif;
font-size: 18px; font-size: 18px;
} }
body{ body {
margin: 0; margin: 0;
padding: 0; padding: 0;
color: var(--primary); color: var(--primary);
} }
main{ main {
display: grid; display: grid;
width: 90vw; width: 90vw;
margin: 5vw; margin: 5vw;
flex-wrap: wrap; flex-wrap: wrap;
grid-template-areas: "head-input head-output" grid-template-areas:
"input output"; "head-input head-output"
grid-template-columns: 1fr 1fr; "input output";
gap: 1rem; grid-template-columns: 1fr 1fr;
} gap: 1rem;
}
h1.input{
grid-area: head-input; h1.input {
} grid-area: head-input;
}
h1.output{
grid-area: head-output; h1.output {
} grid-area: head-output;
}
h1{
font-size: 2rem; h1 {
margin: 0; font-size: 2rem;
margin-bottom: .5rem; margin: 0;
} margin-bottom: 0.5rem;
}
textarea{
grid-area: input; textarea {
} grid-area: input;
}
pre{
grid-area: output; pre {
} grid-area: output;
}
main > div{ main > div {
width: 100%; width: 100%;
box-sizing: border-box; box-sizing: border-box;
} }
textarea{ textarea {
width: 100%; width: 100%;
height: 20vh; height: 20vh;
margin: 20px 0; margin: 20px 0;
-webkit-box-sizing: border-box; -webkit-box-sizing: border-box;
-moz-box-sizing: border-box; -moz-box-sizing: border-box;
box-sizing: border-box; box-sizing: border-box;
} }
input[type="file"] { input[type="file"] {
display: none; display: none;
} }
.custom-file-upload, button { .custom-file-upload,
background-color: white; button {
color: var(--primary); background-color: white;
border: 2px solid var(--green); /* Green */ color: var(--primary);
/* background-color: lightgray; */ border: 2px solid var(--green); /* Green */
display: inline-block; /* background-color: lightgray; */
padding: 6px 12px; display: inline-block;
cursor: pointer; padding: 6px 12px;
font-size: 1rem; cursor: pointer;
border-radius: .2rem; font-size: 1rem;
transition-duration: 0.4s; border-radius: 0.2rem;
} transition-duration: 0.4s;
}
.custom-file-upload:hover, button:hover {
background-color: var(--green); /* Green */ .custom-file-upload:hover,
color: white; button:hover {
} background-color: var(--green); /* Green */
color: white;
.custom-file-upload span, button span { }
vertical-align: bottom;
} .custom-file-upload span,
button span {
.material-symbols-outlined { vertical-align: bottom;
padding-right: 0.3rem; }
}
.material-symbols-outlined {
textarea, pre{ padding-right: 0.3rem;
background-color: white; }
color: var(--primary);
border: 2px solid var(--green); /* Green */ .form_button {
border-radius: .2rem; display: flex;
margin: .5rem 0; flex-direction: row;
min-height: 10rem; gap: 0.5rem;
padding: .2rem; align-items: center;
} }
@media (max-width: 600px) { @supports (-webkit-appearance: none) or (-moz-appearance: none) {
main{ .checkbox-wrapper-14 input[type="checkbox"] {
width: 100wv; --active: var(--green);
} --active-inner: #fff;
--border: var(--primary);
main > div{ --border-hover: var(--green);
width: 100%; --background: #fff;
} -webkit-appearance: none;
} -moz-appearance: none;
height: 21px;
outline: none;
display: inline-block;
vertical-align: top;
position: relative;
margin: 0;
cursor: pointer;
border: 1px solid var(--bc, var(--border));
background: var(--b, var(--background));
transition:
background 0.3s,
border-color 0.3s,
box-shadow 0.2s;
}
.checkbox-wrapper-14 input[type="checkbox"]:after {
content: "";
display: block;
left: 0;
top: 0;
position: absolute;
transition:
transform var(--d-t, 0.3s) var(--d-t-e, ease),
opacity var(--d-o, 0.2s);
}
.checkbox-wrapper-14 input[type="checkbox"]:checked {
--b: var(--active);
--bc: var(--active);
--d-o: 0.3s;
--d-t: 0.6s;
--d-t-e: cubic-bezier(0.2, 0.85, 0.32, 1.2);
}
.checkbox-wrapper-14 input[type="checkbox"]:disabled {
--b: var(--disabled);
cursor: not-allowed;
opacity: 0.9;
}
.checkbox-wrapper-14 input[type="checkbox"]:disabled:checked {
--b: var(--disabled-inner);
--bc: var(--border);
}
.checkbox-wrapper-14 input[type="checkbox"]:disabled + label {
cursor: not-allowed;
}
.checkbox-wrapper-14 input[type="checkbox"]:hover:not(:checked):not(:disabled) {
--bc: var(--border-hover);
}
.checkbox-wrapper-14 input[type="checkbox"]:focus {
box-shadow: 0 0 0 var(--focus);
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
width: 21px;
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
opacity: var(--o, 0);
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
--o: 1;
}
.checkbox-wrapper-14 input[type="checkbox"] + label {
display: inline-block;
vertical-align: middle;
cursor: pointer;
margin-left: 4px;
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
border-radius: 7px;
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
width: 5px;
height: 9px;
border: 2px solid var(--active-inner);
border-top: 0;
border-left: 0;
left: 7px;
top: 4px;
transform: rotate(var(--r, 20deg));
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
--r: 43deg;
}
.checkbox-wrapper-14 input[type="checkbox"].switch {
width: 38px;
border-radius: 11px;
}
.checkbox-wrapper-14 input[type="checkbox"].switch:after {
left: 2px;
top: 2px;
border-radius: 50%;
width: 17px;
height: 17px;
background: var(--ab, var(--border));
transform: translateX(var(--x, 0));
}
.checkbox-wrapper-14 input[type="checkbox"].switch:checked {
--ab: var(--active-inner);
--x: 17px;
}
.checkbox-wrapper-14 input[type="checkbox"].switch:disabled:not(:checked):after {
opacity: 0.6;
}
}
.checkbox-wrapper-14 * {
box-sizing: inherit;
}
.checkbox-wrapper-14 *:before,
.checkbox-wrapper-14 *:after {
box-sizing: inherit;
}
.checkbox-wrapper-14 {
display: flex;
align-items: center;
}
textarea,
pre {
background-color: white;
color: var(--primary);
border: 2px solid var(--green); /* Green */
border-radius: 0.2rem;
margin: 0.5rem 0;
min-height: 10rem;
padding: 0.2rem;
}
@media (max-width: 600px) {
main {
width: 100wv;
}
main > div {
width: 100%;
}
}

View File

@@ -6,24 +6,30 @@
</head> </head>
<body> <body>
<main> <main>
<h1 class="input">Upload File/Enter Text</h1>
<div id="left"> <div id="left">
<h1>Upload File/Enter Text</h1> <form action="/upload" method="post" enctype="multipart/form-data" class="">
<form action="/upload" method="post" enctype="multipart/form-data"> <div class="form_button">
<label class="custom-file-upload"> <label class="custom-file-upload">
<input id="myfile" type="file" name="file" accept=".pdf" /> <input id="myfile" type="file" name="file" accept=".pdf" />
<span>Custom Upload</span> <span>Custom Upload</span>
</label> </label>
<button type="submit">Convert</button> <button type="submit">Convert</button>
<div class="checkbox-wrapper-14">
<input id="s1-14" type="checkbox" class="switch" {% if format_indents %}checked {% endif %} name="format_indents" />
<label for="s1-14">Stichpunkte formatieren</label>
</div>
</div>
<br /> <br />
<textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea> <textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea>
</form> </form>
</div> </div>
{% if resp_text %} {% if resp_text %}
<h1 class="output">Converted Text</h1>
<div id="right"> <div id="right">
<h1>Converted Text</h1>
<button id="copy-btn">Copy Text</button> <button id="copy-btn">Copy Text</button>
<pre id="myInput"> <pre id="myInput">
{{ resp_text }} {{ resp_text }}
</pre </pre
> >
</div> </div>