Compare commits
10 Commits
1b4b1b1e25
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2bf98a8faf | |||
| 00855c3ce1 | |||
| 8467b4ed53 | |||
| 5892ebd05e | |||
| 8b4662c027 | |||
| b03faed2fb | |||
| 7f6665d3e1 | |||
| 4bcec44d05 | |||
| a577ac6eaa | |||
| a7e9c31a2a |
@@ -39,6 +39,9 @@ jobs:
|
||||
uses: docker/metadata-action@v3
|
||||
with:
|
||||
images: git.letsstein.de/tom/anki_convert
|
||||
tags: |
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
type=sha
|
||||
|
||||
- name: Build and push Docker image
|
||||
id: push
|
||||
|
||||
11
src/app.py
11
src/app.py
@@ -23,18 +23,21 @@ def upload_file():
|
||||
# Gets uploaded file or text from html input
|
||||
uploaded_file = request.files["file"]
|
||||
text = request.form["text"]
|
||||
format_indents = request.form.get("format_indents") == "on"
|
||||
|
||||
print(format_indents)
|
||||
|
||||
# Check if file has been uploaded
|
||||
if uploaded_file.filename != "":
|
||||
if uploaded_file.filename:
|
||||
# Converts uploaded file and returns new text with anki fields
|
||||
filepath = path.join("uploads", uploaded_file.filename)
|
||||
uploaded_file.save(filepath)
|
||||
response_text = functions.convert(filepath)
|
||||
response_text = functions.convert(filepath, format_indents)
|
||||
text = response_text
|
||||
else:
|
||||
# Converts text and returns new text with anki fields
|
||||
response_text = functions.convert_text(text)
|
||||
return render_template("index.html", resp_text=response_text, base_text=text)
|
||||
response_text = functions.convert_text(text, format_indents)
|
||||
return render_template("index.html", resp_text=response_text, base_text=text, format_indents=format_indents)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -5,10 +5,9 @@ import codecs
|
||||
import pdfplumber
|
||||
|
||||
|
||||
|
||||
def convert(file_path=False):
|
||||
def convert(file_path: str = "", format_indents: bool = True):
|
||||
"""Opens pdf and converts it into text"""
|
||||
if not file_path:
|
||||
if file_path == "":
|
||||
# if there is no server to provide a filepath, open filepath dialog
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
@@ -19,55 +18,75 @@ def convert(file_path=False):
|
||||
filetypes=[("PDFs", ".pdf")], title="Datei zum konvertieren auswählen!"
|
||||
)
|
||||
|
||||
conv_string = []
|
||||
converted_pages: list[str] = []
|
||||
|
||||
# open PDF
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
crop = page.crop((60, 80, page.width, page.height))
|
||||
text = crop.extract_text(layout=True)
|
||||
no_trail = re.sub("\ +\\n", "\n", text) # cleared trailing spaces
|
||||
conv_string.append(convert_text(no_trail))
|
||||
no_trail = re.sub(" +\\n", "\n", text) # cleared trailing spaces
|
||||
converted_pages.append(convert_text(no_trail, format_indents))
|
||||
|
||||
conv_string = "#################### neue Seite ####################\n".join(
|
||||
conv_string
|
||||
converted_text = "#################### neue Seite ####################\n".join(
|
||||
converted_pages
|
||||
)
|
||||
|
||||
# write converted pdf to file
|
||||
file_path = file_path.replace(".pdf", ".txt")
|
||||
text_file = codecs.open(file_path, "w", "utf-8")
|
||||
text_file.write(conv_string)
|
||||
text_file.write(converted_text)
|
||||
text_file.close()
|
||||
|
||||
print(f"Alles fertig, die Datei befindet sich unter {file_path}")
|
||||
return conv_string
|
||||
return converted_pages
|
||||
|
||||
|
||||
def convert_text(text: str):
|
||||
def convert_text(text: str, format_indents: bool = True):
|
||||
"""Searches for ':' and converts into anki annotation"""
|
||||
field_nr = 1 # number of anki field
|
||||
changed_lines = [] # array with new lines -> anki fields added
|
||||
changed_lines: list[str] = [] # array with new lines -> anki fields added
|
||||
as_indent: bool = False
|
||||
|
||||
for line in seperate(text):
|
||||
# add anki field into line and count the number of changes
|
||||
line, num = re.subn("(:|\.)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line)
|
||||
# if anki field added increase field number
|
||||
# if num > 0:
|
||||
# field_nr += 1
|
||||
# add changed line to array
|
||||
changed_lines.append(line)
|
||||
|
||||
if as_indent and re.search(r"^\W+.+", line) is not None:
|
||||
if re.search(r"^\t+", line) is not None:
|
||||
newLine = match_and_replace(line, format_indents)
|
||||
else:
|
||||
newLine, _ = re.subn(
|
||||
r"(?!^\t|^.+:$)^(\W+)(.+)$", r"\1 {{c1::\2}}", line
|
||||
)
|
||||
else:
|
||||
as_indent = False
|
||||
newLine = match_and_replace(line, format_indents)
|
||||
if newLine is True:
|
||||
as_indent = True
|
||||
changed_lines.append(line)
|
||||
else:
|
||||
changed_lines.append(newLine)
|
||||
return "\n".join(changed_lines).strip()
|
||||
|
||||
|
||||
def match_and_replace(line: str, format_indents: bool = True):
|
||||
if re.search(":$", line) is not None:
|
||||
return True
|
||||
if format_indents:
|
||||
line, num = re.subn("(\t+.)(..*)", r"\1 {{c1::\2}}", line)
|
||||
if num > 0:
|
||||
return line
|
||||
line, num = re.subn("(:)(..+)", r"\1 {{c1::\2}}", line)
|
||||
return line
|
||||
|
||||
|
||||
def seperate(text: str) -> list[str]:
|
||||
"""Seperates a text into an array of lines"""
|
||||
if "\r\n" in text:
|
||||
# unifies CRLF
|
||||
text = text.replace("\r\n", "\n")
|
||||
# clean linebreaks if they are not paragraph breaks
|
||||
clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)
|
||||
clean_nl = re.sub("\\n +([A-Za-z0-9])", r" \1", text)
|
||||
return clean_nl.split("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
convert()
|
||||
_ = convert()
|
||||
|
||||
@@ -1,112 +1,277 @@
|
||||
:root{
|
||||
--green:#4CAF50;
|
||||
--primary:#3b3b3b;
|
||||
}
|
||||
|
||||
*{
|
||||
font-family: sans-serif;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
body{
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
main{
|
||||
display: grid;
|
||||
width: 90vw;
|
||||
margin: 5vw;
|
||||
flex-wrap: wrap;
|
||||
grid-template-areas: "head-input head-output"
|
||||
"input output";
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
h1.input{
|
||||
grid-area: head-input;
|
||||
}
|
||||
|
||||
h1.output{
|
||||
grid-area: head-output;
|
||||
}
|
||||
|
||||
h1{
|
||||
font-size: 2rem;
|
||||
margin: 0;
|
||||
margin-bottom: .5rem;
|
||||
}
|
||||
|
||||
textarea{
|
||||
grid-area: input;
|
||||
}
|
||||
|
||||
pre{
|
||||
grid-area: output;
|
||||
}
|
||||
|
||||
|
||||
main > div{
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
textarea{
|
||||
width: 100%;
|
||||
height: 20vh;
|
||||
margin: 20px 0;
|
||||
-webkit-box-sizing: border-box;
|
||||
-moz-box-sizing: border-box;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
input[type="file"] {
|
||||
display: none;
|
||||
}
|
||||
.custom-file-upload, button {
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
/* background-color: lightgray; */
|
||||
display: inline-block;
|
||||
padding: 6px 12px;
|
||||
cursor: pointer;
|
||||
font-size: 1rem;
|
||||
border-radius: .2rem;
|
||||
transition-duration: 0.4s;
|
||||
}
|
||||
|
||||
.custom-file-upload:hover, button:hover {
|
||||
background-color: var(--green); /* Green */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.custom-file-upload span, button span {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
.material-symbols-outlined {
|
||||
padding-right: 0.3rem;
|
||||
}
|
||||
|
||||
textarea, pre{
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
border-radius: .2rem;
|
||||
margin: .5rem 0;
|
||||
min-height: 10rem;
|
||||
padding: .2rem;
|
||||
}
|
||||
|
||||
@media (max-width: 600px) {
|
||||
main{
|
||||
width: 100wv;
|
||||
}
|
||||
|
||||
main > div{
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
:root {
|
||||
--green: #4caf50;
|
||||
--primary: #3b3b3b;
|
||||
}
|
||||
|
||||
* {
|
||||
font-family: sans-serif;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
main {
|
||||
display: grid;
|
||||
width: 90vw;
|
||||
margin: 5vw;
|
||||
flex-wrap: wrap;
|
||||
grid-template-areas:
|
||||
"head-input head-output"
|
||||
"input output";
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
h1.input {
|
||||
grid-area: head-input;
|
||||
}
|
||||
|
||||
h1.output {
|
||||
grid-area: head-output;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2rem;
|
||||
margin: 0;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
#left {
|
||||
grid-area: input;
|
||||
}
|
||||
|
||||
#right {
|
||||
grid-area: output;
|
||||
}
|
||||
|
||||
#copy-btn {
|
||||
align-self: flex-start;
|
||||
}
|
||||
|
||||
main > div,
|
||||
main > form {
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
textarea {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
margin: 20px 0;
|
||||
-webkit-box-sizing: border-box;
|
||||
-moz-box-sizing: border-box;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
pre {
|
||||
white-space: pre-wrap; /* Since CSS 2.1 */
|
||||
white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
|
||||
white-space: -pre-wrap; /* Opera 4-6 */
|
||||
white-space: -o-pre-wrap; /* Opera 7 */
|
||||
word-wrap: break-word; /* Internet Explorer 5.5+ */
|
||||
}
|
||||
|
||||
input[type="file"] {
|
||||
display: none;
|
||||
}
|
||||
.custom-file-upload,
|
||||
button {
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
/* background-color: lightgray; */
|
||||
display: inline-block;
|
||||
padding: 6px 12px;
|
||||
cursor: pointer;
|
||||
font-size: 1rem;
|
||||
border-radius: 0.2rem;
|
||||
transition-duration: 0.4s;
|
||||
}
|
||||
|
||||
.custom-file-upload:hover,
|
||||
button:hover {
|
||||
background-color: var(--green); /* Green */
|
||||
color: white;
|
||||
}
|
||||
|
||||
.custom-file-upload span,
|
||||
button span {
|
||||
vertical-align: bottom;
|
||||
}
|
||||
|
||||
.material-symbols-outlined {
|
||||
padding-right: 0.3rem;
|
||||
}
|
||||
|
||||
.form_button {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
gap: 0.5rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
@supports (-webkit-appearance: none) or (-moz-appearance: none) {
|
||||
.checkbox-wrapper-14 input[type="checkbox"] {
|
||||
--active: var(--green);
|
||||
--active-inner: #fff;
|
||||
--border: var(--primary);
|
||||
--border-hover: var(--green);
|
||||
--background: #fff;
|
||||
-webkit-appearance: none;
|
||||
-moz-appearance: none;
|
||||
height: 21px;
|
||||
outline: none;
|
||||
display: inline-block;
|
||||
vertical-align: top;
|
||||
position: relative;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
border: 1px solid var(--bc, var(--border));
|
||||
background: var(--b, var(--background));
|
||||
transition:
|
||||
background 0.3s,
|
||||
border-color 0.3s,
|
||||
box-shadow 0.2s;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:after {
|
||||
content: "";
|
||||
display: block;
|
||||
left: 0;
|
||||
top: 0;
|
||||
position: absolute;
|
||||
transition:
|
||||
transform var(--d-t, 0.3s) var(--d-t-e, ease),
|
||||
opacity var(--d-o, 0.2s);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:checked {
|
||||
--b: var(--active);
|
||||
--bc: var(--active);
|
||||
--d-o: 0.3s;
|
||||
--d-t: 0.6s;
|
||||
--d-t-e: cubic-bezier(0.2, 0.85, 0.32, 1.2);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:disabled {
|
||||
--b: var(--disabled);
|
||||
cursor: not-allowed;
|
||||
opacity: 0.9;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:disabled:checked {
|
||||
--b: var(--disabled-inner);
|
||||
--bc: var(--border);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:disabled + label {
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:hover:not(:checked):not(:disabled) {
|
||||
--bc: var(--border-hover);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:focus {
|
||||
box-shadow: 0 0 0 var(--focus);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
|
||||
width: 21px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
|
||||
opacity: var(--o, 0);
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
|
||||
--o: 1;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"] + label {
|
||||
display: inline-block;
|
||||
vertical-align: middle;
|
||||
cursor: pointer;
|
||||
margin-left: 4px;
|
||||
}
|
||||
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch) {
|
||||
border-radius: 7px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):after {
|
||||
width: 5px;
|
||||
height: 9px;
|
||||
border: 2px solid var(--active-inner);
|
||||
border-top: 0;
|
||||
border-left: 0;
|
||||
left: 7px;
|
||||
top: 4px;
|
||||
transform: rotate(var(--r, 20deg));
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"]:not(.switch):checked {
|
||||
--r: 43deg;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch {
|
||||
width: 38px;
|
||||
border-radius: 11px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch:after {
|
||||
left: 2px;
|
||||
top: 2px;
|
||||
border-radius: 50%;
|
||||
width: 17px;
|
||||
height: 17px;
|
||||
background: var(--ab, var(--border));
|
||||
transform: translateX(var(--x, 0));
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch:checked {
|
||||
--ab: var(--active-inner);
|
||||
--x: 17px;
|
||||
}
|
||||
.checkbox-wrapper-14 input[type="checkbox"].switch:disabled:not(:checked):after {
|
||||
opacity: 0.6;
|
||||
}
|
||||
}
|
||||
|
||||
.checkbox-wrapper-14 * {
|
||||
box-sizing: inherit;
|
||||
}
|
||||
.checkbox-wrapper-14 *:before,
|
||||
.checkbox-wrapper-14 *:after {
|
||||
box-sizing: inherit;
|
||||
}
|
||||
|
||||
.checkbox-wrapper-14 {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.mobile_only {
|
||||
display: none;
|
||||
}
|
||||
|
||||
textarea,
|
||||
pre {
|
||||
background-color: white;
|
||||
color: var(--primary);
|
||||
border: 2px solid var(--green); /* Green */
|
||||
border-radius: 0.2rem;
|
||||
margin: 0.5rem 0;
|
||||
min-height: 10rem;
|
||||
padding: 0.2rem;
|
||||
}
|
||||
|
||||
@media (max-width: 600px) {
|
||||
main {
|
||||
width: 100wv;
|
||||
display: flex;
|
||||
}
|
||||
|
||||
main > div {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.desktop_only {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.mobile_only {
|
||||
display: unset;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,29 +3,36 @@
|
||||
<head>
|
||||
<title>Anki Converter</title>
|
||||
<link rel="stylesheet" href="/static/style.css" />
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="description" content="Anki Flashcard Formatter" />
|
||||
<meta name="keywords" content="Anki" />
|
||||
<meta name="author" content="Tom Tröger" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
</head>
|
||||
<body>
|
||||
<main>
|
||||
<div id="left">
|
||||
<h1>Upload File/Enter Text</h1>
|
||||
<form action="/upload" method="post" enctype="multipart/form-data">
|
||||
<h1 class="input">Upload File/Enter Text</h1>
|
||||
<form id="left" action="/upload" method="post" enctype="multipart/form-data" class="">
|
||||
<div class="form_button">
|
||||
<label class="custom-file-upload">
|
||||
<input id="myfile" type="file" name="file" accept=".pdf" />
|
||||
<span>Custom Upload</span>
|
||||
<span>Upload</span>
|
||||
</label>
|
||||
<button type="submit">Convert</button>
|
||||
<br />
|
||||
<textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea>
|
||||
</form>
|
||||
</div>
|
||||
<div class="checkbox-wrapper-14">
|
||||
<input id="s1-14" type="checkbox" class="switch" {% if format_indents %}checked {% endif %} name="format_indents" />
|
||||
<label for="s1-14" class="mobile_only">Stichpunkte</label>
|
||||
</div>
|
||||
</div>
|
||||
<br />
|
||||
<textarea name="text">{% if resp_text %}{{ base_text }}{% else %}Text in here{% endif %}</textarea>
|
||||
</form>
|
||||
{% if resp_text %}
|
||||
<h1 class="output">Converted Text</h1>
|
||||
<div id="right">
|
||||
<h1>Converted Text</h1>
|
||||
<button id="copy-btn">Copy Text</button>
|
||||
<pre id="myInput">
|
||||
{{ resp_text }}
|
||||
</pre
|
||||
>
|
||||
<br />
|
||||
<pre id="myInput">{{ resp_text }}</pre>
|
||||
</div>
|
||||
{% endif %}
|
||||
</main>
|
||||
|
||||
Reference in New Issue
Block a user