added format indent without colon
All checks were successful
Publish Docker image / Push Docker image to Docker Hub (push) Successful in -3s
All checks were successful
Publish Docker image / Push Docker image to Docker Hub (push) Successful in -3s
This commit is contained in:
11
src/app.py
11
src/app.py
@@ -23,18 +23,21 @@ def upload_file():
|
||||
# Gets uploaded file or text from html input
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
format_indents = request.form.get("format_indents") == "on"
|
||||
|
||||
print(format_indents)
|
||||
|
||||
# Check if file has been uploaded
|
||||
if uploaded_file.filename != "":
|
||||
if uploaded_file.filename:
|
||||
# Converts uploaded file and returns new text with anki fields
|
||||
filepath = path.join("uploads", uploaded_file.filename)
|
||||
uploaded_file.save(filepath)
|
||||
response_text = functions.convert(filepath)
|
||||
response_text = functions.convert(filepath, format_indents)
|
||||
text = response_text
|
||||
else:
|
||||
# Converts text and returns new text with anki fields
|
||||
response_text = functions.convert_text(text)
|
||||
return render_template("index.html", resp_text=response_text, base_text=text)
|
||||
response_text = functions.convert_text(text, format_indents)
|
||||
return render_template("index.html", resp_text=response_text, base_text=text, format_indents=format_indents)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -6,7 +6,7 @@ import pdfplumber
|
||||
|
||||
|
||||
|
||||
def convert(file_path:str=""):
|
||||
def convert(file_path:str="", format_indents:bool=True):
|
||||
"""Opens pdf and converts it into text"""
|
||||
if file_path == "":
|
||||
# if there is no server to provide a filepath, open filepath dialog
|
||||
@@ -19,72 +19,55 @@ def convert(file_path:str=""):
|
||||
filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!"
|
||||
)
|
||||
|
||||
conv_string = []
|
||||
converted_pages: list[str] = []
|
||||
|
||||
# open PDF
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
crop = page.crop((60, 80, page.width, page.height))
|
||||
text = crop.extract_text(layout=True)
|
||||
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
||||
conv_string.append(convert_text(no_trail))
|
||||
no_trail = re.sub(" +\\n", "\n", text) # cleared trailing spaces
|
||||
converted_pages.append(convert_text(no_trail, format_indents))
|
||||
|
||||
conv_string = "#################### neue Seite ####################\n".join(
|
||||
conv_string
|
||||
converted_text = "#################### neue Seite ####################\n".join(
|
||||
converted_pages
|
||||
)
|
||||
|
||||
# write converted pdf to file
|
||||
file_path = file_path.replace(".pdf", ".txt")
|
||||
text_file = codecs.open(file_path, "w", "utf-8")
|
||||
text_file.write(conv_string)
|
||||
text_file.write(converted_text)
|
||||
text_file.close()
|
||||
|
||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||
return conv_string
|
||||
return converted_pages
|
||||
|
||||
|
||||
def convert_text(text: str):
|
||||
def convert_text(text: str, format_indents:bool=True):
|
||||
"""Searches for ':' and converts into anki annotation"""
|
||||
field_nr = 1 # number of anki field
|
||||
changed_lines = [] # array with new lines -> anki fields added
|
||||
changed_lines: list[str] = [] # array with new lines -> anki fields added
|
||||
|
||||
indented_points = False
|
||||
indent_level = 0
|
||||
for line in seperate(text):
|
||||
# add anki field into line and count the number of changes
|
||||
if re.search(":$", line) != None:
|
||||
indented_points = True
|
||||
changed_lines.append(line)
|
||||
continue
|
||||
|
||||
tabStops = re.findall("\t", line)
|
||||
if indented_points and indent_level == 0:
|
||||
indent_level = len(tabStops)
|
||||
|
||||
if indented_points and indent_level == len(tabStops):
|
||||
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
|
||||
if num > 0:
|
||||
changed_lines.append(line)
|
||||
continue
|
||||
indented_points = False
|
||||
indent_level = 0
|
||||
else:
|
||||
indented_points = False
|
||||
indent_level = 0
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
|
||||
changed_lines.append(line)
|
||||
|
||||
changed_lines.append(match_and_replace(line, format_indents))
|
||||
return "\n".join(changed_lines).strip()
|
||||
|
||||
def match_and_replace(line:str, format_indents:bool=True) -> str:
|
||||
if format_indents:
|
||||
line, num = re.subn("(\t+.)(..*)", rf"\1 {{{{c1::\2}}}}", line)
|
||||
if num > 0:
|
||||
return line
|
||||
line, num = re.subn("(:)(..+)", rf"\1 {{{{c1::\2}}}}", line)
|
||||
return line
|
||||
|
||||
def seperate(text: str) -> list[str]:
|
||||
"""Seperates a text into an array of lines"""
|
||||
if "\r\n" in text:
|
||||
# unifies CRLF
|
||||
text = text.replace("\r\n", "\n")
|
||||
# clean linebreaks if they are not paragraph breaks
|
||||
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
|
||||
clean_nl = re.sub("\\n +([A-Za-z0-9])",r" \1", text)
|
||||
return clean_nl.split("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
convert()
|
||||
_ = convert()
|
||||
|
||||
@@ -1,59 +1,59 @@
|
||||
:root{
|
||||
--green:#4CAF50;
|
||||
--primary:#3b3b3b;
|
||||
:root {
|
||||
--green: #4caf50;
|
||||
--primary: #3b3b3b;
|
||||
}
|
||||
|
||||
*{
|
||||
* {
|
||||
font-family: sans-serif;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
body{
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
main{
|
||||
main {
|
||||
display: grid;
|
||||
width: 90vw;
|
||||
margin: 5vw;
|
||||
flex-wrap: wrap;
|
||||
grid-template-areas: "head-input head-output"
|
||||
grid-template-areas:
|
||||
"head-input head-output"
|
||||
"input output";
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
h1.input{
|
||||
h1.input {
|
||||
grid-area: head-input;
|
||||
}
|
||||
|
||||
h1.output{
|
||||
h1.output {
|
||||
grid-area: head-output;
|
||||
}
|
||||
|
||||
h1{
|
||||
h1 {
|
||||
font-size: 2rem;
|
||||
margin: 0;
|
||||
margin-bottom: .5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
textarea{
|
||||
textarea {
|
||||
grid-area: input;
|
||||
}
|
||||
|
||||
pre{
|
||||
pre {
|
||||
grid-area: output;
|
||||
}
|
||||
|
||||
|
||||
main > div{
|
||||
main > div {
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
textarea{
|
||||
textarea {
|
||||
width: 100%;
|
||||
height: 20vh;
|
||||
margin: 20px 0;
|
||||
@@ -65,7 +65,8 @@ textarea{
|
||||
input[type="file"] {
|
||||
display: none;
|
||||
}
|
||||
.custom-file-upload, button {
|
||||
.custom-file-upload,
|
||||
button {
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
@@ -74,16 +75,18 @@ input[type="file"] {
|
||||
padding: 6px 12px;
|
||||
cursor: pointer;
|
||||
font-size: 1rem;
|
||||
border-radius: .2rem;
|
||||
border-radius: 0.2rem;
|
||||
transition-duration: 0.4s;
|
||||
}
|
||||
|
||||
.custom-file-upload:hover, button:hover {
|
||||
.custom-file-upload:hover,
|
||||
button:hover {
|
||||
background-color: var(--green); /* Green */
|
||||
color: white;
|
||||
}
|
||||
}
|
||||
|
||||
.custom-file-upload span, button span {
|
||||
.custom-file-upload span,
|
||||
button span {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
@@ -91,22 +94,155 @@ input[type="file"] {
|
||||
padding-right: 0.3rem;
|
||||
}
|
||||
|
||||
textarea, pre{
|
||||
.form_button {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: 0.5rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
@supports (-webkit-appearance: none) or (-moz-appearance: none) {
|
||||
.checkbox-wrapper-14 input[type="checkbox"] {
|
||||
--active: var(--green);
|
||||
--active-inner: #fff;
|
||||
--border: var(--primary);
|
||||
--border-hover: var(--green);
|
||||
--background: #fff;
|
||||
-webkit-appearance: none;
|
||||
-moz-appearance: none;
|
||||
height: 21px;
|
||||
outline: none;
|
||||
display: inline-block;
|
||||
vertical-align: top;
|
||||
position: relative;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
border: 1px solid var(--bc, var(--border));
|
||||
background: var(--b, var(--background));
|
||||
transition:
|
||||
background 0.3s,
|
||||
border-color 0.3s,
|
||||
box-shadow 0.2s;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:after {
|
||||
content: "";
|
||||
display: block;
|
||||
left: 0;
|
||||
top: 0;
|
||||
position: absolute;
|
||||
transition:
|
||||
transform var(--d-t, 0.3s) var(--d-t-e, ease),
|
||||
opacity var(--d-o, 0.2s);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:checked {
|
||||
--b: var(--active);
|
||||
--bc: var(--active);
|
||||
--d-o: 0.3s;
|
||||
--d-t: 0.6s;
|
||||
--d-t-e: cubic-bezier(0.2, 0.85, 0.32, 1.2);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:disabled {
|
||||
--b: var(--disabled);
|
||||
cursor: not-allowed;
|
||||
opacity: 0.9;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:disabled:checked {
|
||||
--b: var(--disabled-inner);
|
||||
--bc: var(--border);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:disabled + label {
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:hover:not(:checked):not(:disabled) {
|
||||
--bc: var(--border-hover);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:focus {
|
||||
box-shadow: 0 0 0 var(--focus);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
|
||||
width: 21px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
|
||||
opacity: var(--o, 0);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
|
||||
--o: 1;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"] + label {
|
||||
display: inline-block;
|
||||
vertical-align: middle;
|
||||
cursor: pointer;
|
||||
margin-left: 4px;
|
||||
}
|
||||
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
|
||||
border-radius: 7px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
|
||||
width: 5px;
|
||||
height: 9px;
|
||||
border: 2px solid var(--active-inner);
|
||||
border-top: 0;
|
||||
border-left: 0;
|
||||
left: 7px;
|
||||
top: 4px;
|
||||
transform: rotate(var(--r, 20deg));
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
|
||||
--r: 43deg;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch {
|
||||
width: 38px;
|
||||
border-radius: 11px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch:after {
|
||||
left: 2px;
|
||||
top: 2px;
|
||||
border-radius: 50%;
|
||||
width: 17px;
|
||||
height: 17px;
|
||||
background: var(--ab, var(--border));
|
||||
transform: translateX(var(--x, 0));
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch:checked {
|
||||
--ab: var(--active-inner);
|
||||
--x: 17px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch:disabled:not(:checked):after {
|
||||
opacity: 0.6;
|
||||
}
|
||||
}
|
||||
|
||||
.checkbox-wrapper-14 * {
|
||||
box-sizing: inherit;
|
||||
}
|
||||
.checkbox-wrapper-14 *:before,
|
||||
.checkbox-wrapper-14 *:after {
|
||||
box-sizing: inherit;
|
||||
}
|
||||
|
||||
.checkbox-wrapper-14 {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
textarea,
|
||||
pre {
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
border-radius: .2rem;
|
||||
margin: .5rem 0;
|
||||
border-radius: 0.2rem;
|
||||
margin: 0.5rem 0;
|
||||
min-height: 10rem;
|
||||
padding: .2rem;
|
||||
padding: 0.2rem;
|
||||
}
|
||||
|
||||
@media (max-width: 600px) {
|
||||
main{
|
||||
main {
|
||||
width: 100wv;
|
||||
}
|
||||
|
||||
main > div{
|
||||
main > div {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,24 +6,30 @@
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<h1 class="input">Upload File/Enter Text</h1>
|
||||
<div id="left">
|
||||
<h1>Upload File/Enter Text</h1>
|
||||
<form action="/upload" method="post" enctype="multipart/form-data">
|
||||
<form action="/upload" method="post" enctype="multipart/form-data" class="">
|
||||
<div class="form_button">
|
||||
<label class="custom-file-upload">
|
||||
<input id="myfile" type="file" name="file" accept=".pdf" />
|
||||
<span>Custom Upload</span>
|
||||
</label>
|
||||
<button type="submit">Convert</button>
|
||||
<div class="checkbox-wrapper-14">
|
||||
<input id="s1-14" type="checkbox" class="switch" {% if format_indents %}checked {% endif %} name="format_indents" />
|
||||
<label for="s1-14">Stichpunkte formatieren</label>
|
||||
</div>
|
||||
</div>
|
||||
<br />
|
||||
<textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea>
|
||||
</form>
|
||||
</div>
|
||||
{% if resp_text %}
|
||||
<h1 class="output">Converted Text</h1>
|
||||
<div id="right">
|
||||
<h1>Converted Text</h1>
|
||||
<button id="copy-btn">Copy Text</button>
|
||||
<pre id="myInput">
|
||||
{{ resp_text }}
|
||||
{{ resp_text }}
|
||||
</pre
|
||||
>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user