added format indent without colon
All checks were successful
Publish Docker image / Push Docker image to Docker Hub (push) Successful in -3s

This commit is contained in:
2025-09-24 11:23:54 +02:00
parent 7f6665d3e1
commit b03faed2fb
4 changed files with 291 additions and 163 deletions

View File

@@ -23,18 +23,21 @@ def upload_file():
# Gets uploaded file or text from html input
uploaded_file = request.files["file"]
text = request.form["text"]
format_indents = request.form.get("format_indents") == "on"
print(format_indents)
# Check if file has been uploaded
if uploaded_file.filename != "":
if uploaded_file.filename:
# Converts uploaded file and returns new text with anki fields
filepath = path.join("uploads", uploaded_file.filename)
uploaded_file.save(filepath)
response_text = functions.convert(filepath)
response_text = functions.convert(filepath, format_indents)
text = response_text
else:
# Converts text and returns new text with anki fields
response_text = functions.convert_text(text)
return render_template("index.html", resp_text=response_text, base_text=text)
response_text = functions.convert_text(text, format_indents)
return render_template("index.html", resp_text=response_text, base_text=text, format_indents=format_indents)
if __name__ == "__main__":

View File

@@ -6,7 +6,7 @@ import pdfplumber
def convert(file_path:str=""):
def convert(file_path:str="", format_indents:bool=True):
"""Opens pdf and converts it into text"""
if file_path == "":
# if there is no server to provide a filepath, open filepath dialog
@@ -19,72 +19,55 @@ def convert(file_path:str=""):
filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!"
)
conv_string = []
converted_pages: list[str] = []
# open PDF
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
crop = page.crop((60, 80, page.width, page.height))
text = crop.extract_text(layout=True)
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
conv_string.append(convert_text(no_trail))
no_trail = re.sub(" +\\n", "\n", text) # cleared trailing spaces
converted_pages.append(convert_text(no_trail, format_indents))
conv_string = "#################### neue Seite ####################\n".join(
conv_string
converted_text = "#################### neue Seite ####################\n".join(
converted_pages
)
# write converted pdf to file
file_path = file_path.replace(".pdf", ".txt")
text_file = codecs.open(file_path, "w", "utf-8")
text_file.write(conv_string)
text_file.write(converted_text)
text_file.close()
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
return conv_string
return converted_pages
def convert_text(text: str):
def convert_text(text: str, format_indents:bool=True):
"""Searches for ':' and converts into anki annotation"""
field_nr = 1 # number of anki field
changed_lines = [] # array with new lines -> anki fields added
changed_lines: list[str] = [] # array with new lines -> anki fields added
indented_points = False
indent_level = 0
for line in seperate(text):
# add anki field into line and count the number of changes
if re.search(":$", line) != None:
indented_points = True
changed_lines.append(line)
continue
tabStops = re.findall("\t", line)
if indented_points and indent_level == 0:
indent_level = len(tabStops)
if indented_points and indent_level == len(tabStops):
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
if num > 0:
changed_lines.append(line)
continue
indented_points = False
indent_level = 0
else:
indented_points = False
indent_level = 0
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
changed_lines.append(line)
changed_lines.append(match_and_replace(line, format_indents))
return "\n".join(changed_lines).strip()
def match_and_replace(line:str, format_indents:bool=True) -> str:
if format_indents:
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c1::\2}}}}", line)
if num > 0:
return line
line, num = re.subn("(:)(..+)", rf"\1 {{{{c1::\2}}}}", line)
return line
def seperate(text: str) -> list[str]:
"""Seperates a text into an array of lines"""
if "\r\n" in text:
# unifies CRLF
text = text.replace("\r\n", "\n")
# clean linebreaks if they are not paragraph breaks
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
clean_nl = re.sub("\\n +([A-Za-z0-9])",r" \1", text)
return clean_nl.split("\n")
if __name__ == "__main__":
convert()
_ = convert()

View File

@@ -1,59 +1,59 @@
:root{
--green:#4CAF50;
--primary:#3b3b3b;
:root {
--green: #4caf50;
--primary: #3b3b3b;
}
*{
* {
font-family: sans-serif;
font-size: 18px;
}
body{
body {
margin: 0;
padding: 0;
color: var(--primary);
}
main{
main {
display: grid;
width: 90vw;
margin: 5vw;
flex-wrap: wrap;
grid-template-areas: "head-input head-output"
grid-template-areas:
"head-input head-output"
"input output";
grid-template-columns: 1fr 1fr;
gap: 1rem;
}
h1.input{
h1.input {
grid-area: head-input;
}
h1.output{
h1.output {
grid-area: head-output;
}
h1{
h1 {
font-size: 2rem;
margin: 0;
margin-bottom: .5rem;
margin-bottom: 0.5rem;
}
textarea{
textarea {
grid-area: input;
}
pre{
pre {
grid-area: output;
}
main > div{
main > div {
width: 100%;
box-sizing: border-box;
}
textarea{
textarea {
width: 100%;
height: 20vh;
margin: 20px 0;
@@ -65,7 +65,8 @@ textarea{
input[type="file"] {
display: none;
}
.custom-file-upload, button {
.custom-file-upload,
button {
background-color: white;
color: var(--primary);
border: 2px solid var(--green); /* Green */
@@ -74,16 +75,18 @@ input[type="file"] {
padding: 6px 12px;
cursor: pointer;
font-size: 1rem;
border-radius: .2rem;
border-radius: 0.2rem;
transition-duration: 0.4s;
}
.custom-file-upload:hover, button:hover {
.custom-file-upload:hover,
button:hover {
background-color: var(--green); /* Green */
color: white;
}
}
.custom-file-upload span, button span {
.custom-file-upload span,
button span {
vertical-align: bottom;
}
@@ -91,22 +94,155 @@ input[type="file"] {
padding-right: 0.3rem;
}
textarea, pre{
.form_button {
display: flex;
flex-direction: row;
gap: 0.5rem;
align-items: center;
}
@supports (-webkit-appearance: none) or (-moz-appearance: none) {
.checkbox-wrapper-14 input[type="checkbox"] {
--active: var(--green);
--active-inner: #fff;
--border: var(--primary);
--border-hover: var(--green);
--background: #fff;
-webkit-appearance: none;
-moz-appearance: none;
height: 21px;
outline: none;
display: inline-block;
vertical-align: top;
position: relative;
margin: 0;
cursor: pointer;
border: 1px solid var(--bc, var(--border));
background: var(--b, var(--background));
transition:
background 0.3s,
border-color 0.3s,
box-shadow 0.2s;
}
.checkbox-wrapper-14 input[type="checkbox"]:after {
content: "";
display: block;
left: 0;
top: 0;
position: absolute;
transition:
transform var(--d-t, 0.3s) var(--d-t-e, ease),
opacity var(--d-o, 0.2s);
}
.checkbox-wrapper-14 input[type="checkbox"]:checked {
--b: var(--active);
--bc: var(--active);
--d-o: 0.3s;
--d-t: 0.6s;
--d-t-e: cubic-bezier(0.2, 0.85, 0.32, 1.2);
}
.checkbox-wrapper-14 input[type="checkbox"]:disabled {
--b: var(--disabled);
cursor: not-allowed;
opacity: 0.9;
}
.checkbox-wrapper-14 input[type="checkbox"]:disabled:checked {
--b: var(--disabled-inner);
--bc: var(--border);
}
.checkbox-wrapper-14 input[type="checkbox"]:disabled + label {
cursor: not-allowed;
}
.checkbox-wrapper-14 input[type="checkbox"]:hover:not(:checked):not(:disabled) {
--bc: var(--border-hover);
}
.checkbox-wrapper-14 input[type="checkbox"]:focus {
box-shadow: 0 0 0 var(--focus);
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
width: 21px;
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
opacity: var(--o, 0);
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
--o: 1;
}
.checkbox-wrapper-14 input[type="checkbox"] + label {
display: inline-block;
vertical-align: middle;
cursor: pointer;
margin-left: 4px;
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
border-radius: 7px;
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
width: 5px;
height: 9px;
border: 2px solid var(--active-inner);
border-top: 0;
border-left: 0;
left: 7px;
top: 4px;
transform: rotate(var(--r, 20deg));
}
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
--r: 43deg;
}
.checkbox-wrapper-14 input[type="checkbox"].switch {
width: 38px;
border-radius: 11px;
}
.checkbox-wrapper-14 input[type="checkbox"].switch:after {
left: 2px;
top: 2px;
border-radius: 50%;
width: 17px;
height: 17px;
background: var(--ab, var(--border));
transform: translateX(var(--x, 0));
}
.checkbox-wrapper-14 input[type="checkbox"].switch:checked {
--ab: var(--active-inner);
--x: 17px;
}
.checkbox-wrapper-14 input[type="checkbox"].switch:disabled:not(:checked):after {
opacity: 0.6;
}
}
.checkbox-wrapper-14 * {
box-sizing: inherit;
}
.checkbox-wrapper-14 *:before,
.checkbox-wrapper-14 *:after {
box-sizing: inherit;
}
.checkbox-wrapper-14 {
display: flex;
align-items: center;
}
textarea,
pre {
background-color: white;
color: var(--primary);
border: 2px solid var(--green); /* Green */
border-radius: .2rem;
margin: .5rem 0;
border-radius: 0.2rem;
margin: 0.5rem 0;
min-height: 10rem;
padding: .2rem;
padding: 0.2rem;
}
@media (max-width: 600px) {
main{
main {
width: 100wv;
}
main > div{
main > div {
width: 100%;
}
}

View File

@@ -6,24 +6,30 @@
</head>
<body>
<main>
<h1 class="input">Upload File/Enter Text</h1>
<div id="left">
<h1>Upload File/Enter Text</h1>
<form action="/upload" method="post" enctype="multipart/form-data">
<form action="/upload" method="post" enctype="multipart/form-data" class="">
<div class="form_button">
<label class="custom-file-upload">
<input id="myfile" type="file" name="file" accept=".pdf" />
<span>Custom Upload</span>
</label>
<button type="submit">Convert</button>
<div class="checkbox-wrapper-14">
<input id="s1-14" type="checkbox" class="switch" {% if format_indents %}checked {% endif %} name="format_indents" />
<label for="s1-14">Stichpunkte formatieren</label>
</div>
</div>
<br />
<textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea>
</form>
</div>
{% if resp_text %}
<h1 class="output">Converted Text</h1>
<div id="right">
<h1>Converted Text</h1>
<button id="copy-btn">Copy Text</button>
<pre id="myInput">
{{ resp_text }}
{{ resp_text }}
</pre
>
</div>