CHANGE: Code Refactoring + Docker image + small fixes
This commit is contained in:
		
							
								
								
									
										27
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| { | ||||
| 	// Use IntelliSense to learn about possible attributes. | ||||
| 	// Hover to view descriptions of existing attributes. | ||||
| 	// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | ||||
| 	"version": "0.2.0", | ||||
| 	"configurations": [ | ||||
| 		{ | ||||
| 			"name": "Python Debugger: Flask", | ||||
| 			"type": "debugpy", | ||||
| 			"request": "launch", | ||||
| 			"module": "flask", | ||||
| 			"env": { "FLASK_APP": "app.py", "FLASK_DEBUG": "1" }, | ||||
| 			"args": ["run", "--no-debugger", "--no-reload"], | ||||
| 			"jinja": true, | ||||
| 			"autoStartBrowser": false, | ||||
| 			"cwd": "${workspaceFolder}/src" | ||||
| 		}, | ||||
| 		{ | ||||
| 			"name": "Python Debugger: Current File", | ||||
| 			"type": "debugpy", | ||||
| 			"request": "launch", | ||||
| 			"program": "${file}", | ||||
| 			"console": "integratedTerminal", | ||||
| 			"cwd": "${workspaceFolder}/src" | ||||
| 		} | ||||
| 	] | ||||
| } | ||||
| @@ -1,9 +1,9 @@ | ||||
| FROM python:3.9 | ||||
| FROM python:3.12.2-alpine | ||||
| COPY requirements.txt /home | ||||
| RUN pip install -r /home/requirements.txt | ||||
| COPY src webserver | ||||
| WORKDIR /webserver | ||||
|  | ||||
| ENV FLASK_DEGUB=false | ||||
| ENTRYPOINT [ "python3", "-u", "main.py" ] | ||||
| EXPOSE 5000 | ||||
| ENTRYPOINT [ "python3", "-u", "app.py" ] | ||||
| EXPOSE 5000 | ||||
|   | ||||
| @@ -1,9 +1,8 @@ | ||||
| version: '3.0' | ||||
| services: | ||||
|   web: | ||||
|     build: . | ||||
|     image: anki_convert | ||||
|     image: git.letsstein.de/tom/anki_convert | ||||
|     container_name: anki_convert | ||||
|     restart: unless-stopped | ||||
|     ports:  | ||||
|       - 8080:5000 | ||||
|     ports: | ||||
|       - 8080:5000 | ||||
|   | ||||
							
								
								
									
										4
									
								
								requirements.txt
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										4
									
								
								requirements.txt
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @@ -1,2 +1,2 @@ | ||||
| flask | ||||
| pdfplumber | ||||
| Flask==3.0.3 | ||||
| pdfplumber==0.11.4 | ||||
|   | ||||
| @@ -17,27 +17,26 @@ def index(): | ||||
| @app.route("/upload", methods=["POST"]) | ||||
| def upload_file(): | ||||
|     """Handles Upload -> Files will be extracted and converted, text just converted""" | ||||
|     uploaded_file = request.files["file"] | ||||
|     text = request.form["text"] | ||||
|     # Create uploads directory if it doesn't exist | ||||
|     makedirs("uploads", exist_ok=True) | ||||
| 
 | ||||
|     # Gets uploaded file or text from html input | ||||
|     uploaded_file = request.files["file"] | ||||
|     text = request.form["text"] | ||||
| 
 | ||||
|     # Check if file has been uploaded | ||||
|     if uploaded_file.filename != "": | ||||
|         # Converts uploaded file and returns new text with anki fields | ||||
|         filepath = path.join("uploads", uploaded_file.filename) | ||||
|         uploaded_file.save(filepath) | ||||
|         response_text = functions.convert(filepath) | ||||
|         text = response_text | ||||
|         # response = make_response(response_text, 200) | ||||
|         # response.mimetype = "text/plain" | ||||
|     else: | ||||
|         # Converts text and returns new text with anki fields | ||||
|         response_text = functions.convert_text(text) | ||||
|         # response = make_response(response_text, 200) | ||||
|         # response.mimetype = "text/plain" | ||||
|     # return response #redirect(url_for('index')) | ||||
|     return render_template("index.html", resp_text=response_text, base_text=text) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     # app.jinja_env.auto_reload = True | ||||
|     # app.config["TEMPLATES_AUTO_RELOAD"] = True | ||||
|     port = int(environ.get('PORT', 5000)) | ||||
|     app.run(debug=True, host='0.0.0.0', port=port) | ||||
| @@ -9,6 +9,7 @@ import pdfplumber | ||||
| def convert(file_path=False): | ||||
|     """Opens pdf and converts it into text""" | ||||
|     if not file_path: | ||||
|         # if there is no server to provide a filepath, open filepath dialog | ||||
|         import tkinter as tk | ||||
|         from tkinter import filedialog | ||||
|  | ||||
| @@ -26,42 +27,46 @@ def convert(file_path=False): | ||||
|             crop = page.crop((60, 80, page.width, page.height)) | ||||
|             text = crop.extract_text(layout=True) | ||||
|             no_trail = re.sub("\ +\\n", "\n", text)  # cleared trailing spaces | ||||
|             new_str = convert_text(no_trail) | ||||
|             conv_string.append(new_str) | ||||
|             conv_string.append(convert_text(no_trail)) | ||||
|  | ||||
|     conv_string = "#################### neue Seite ####################\n".join( | ||||
|         conv_string | ||||
|     ) | ||||
|  | ||||
|     # write converted pdf to file | ||||
|     file_path = file_path.replace(".pdf", ".txt") | ||||
|     text_file = codecs.open(file_path, "w", "utf-8") | ||||
|     text_file.write(conv_string) | ||||
|     text_file.close() | ||||
|  | ||||
|     print(f"Alles fertig, die Datei befindet sich unter {file_path}") | ||||
|     if __name__ != "__main__": | ||||
|         return conv_string | ||||
|     return conv_string | ||||
|  | ||||
|  | ||||
| def convert_text(text): | ||||
|     """Seraches for ':' and converts into anki annotation""" | ||||
|     text = str(text) | ||||
|     if "\r\n" in text: | ||||
|         text = text.replace("\r\n", "\n") | ||||
| def convert_text(text: str): | ||||
|     """Searches for ':' and converts into anki annotation""" | ||||
|     field_nr = 1 # number of anki field | ||||
|     changed_lines = [] # array with new lines -> anki fields added | ||||
|  | ||||
|     no_wrong_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text)  # clear wrong newlins | ||||
|     lines = re.split("\n", no_wrong_nl)  # split into lines | ||||
|  | ||||
|     test = 1 | ||||
|     changed_lines = [] | ||||
|     for line in lines: | ||||
|         line, num = re.subn("(:)(..+)", rf"\1 {{{{c{test}::\2}}}}", line) | ||||
|     for line in seperate(text): | ||||
|         # add anki field into line and count the number of changes | ||||
|         line, num = re.subn("(:)(..+)", rf"\1 {{{{c{field_nr}::\2}}}}", line) | ||||
|         # if anki field added increase field number | ||||
|         if num > 0: | ||||
|             test += 1 | ||||
|             field_nr += 1 | ||||
|         # add changed line to array | ||||
|         changed_lines.append(line) | ||||
|     new_str = "\n".join(changed_lines).strip() | ||||
|     if __name__ != "__main__": | ||||
|         return new_str | ||||
|  | ||||
|     return "\n".join(changed_lines).strip() | ||||
|  | ||||
| def seperate(text: str) -> list[str]: | ||||
|     """Seperates a text into an array of lines""" | ||||
|     if "\r\n" in text: | ||||
|         # unifies CRLF | ||||
|         text = text.replace("\r\n", "\n") | ||||
|     # clean linebreaks if they are not paragraph breaks | ||||
|     clean_nl = re.sub("\\n\ +([A-Za-z0-9])",r" \1", text) | ||||
|     return clean_nl.split("\n") | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|   | ||||
		Reference in New Issue
	
	Block a user