Add backup script for v1 API

This commit is contained in:
gilex-dev 2025-08-06 19:36:56 +02:00
parent f4a76bc976
commit 0ae08ec194
Signed by: gilex-dev
GPG Key ID: 9A2BEC7B5188D2E3
3 changed files with 61 additions and 1 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
week_from_*.json

View File

@ -2,7 +2,7 @@
Intended for trainees in Germany to keep track of their daily tasks and qualifications they earned.
This project will contain a script to back up existing entries from
This project currently only contains a script to back up existing entries from
the https://digbe.services.ihk.de/digitales-berichtsheft/erstellen-api/v1/ API v1, as it is expected to shut down on
2025-08-07 00:00:00 CEST / UTC+2

59
main.py Normal file
View File

@ -0,0 +1,59 @@
import re
import os.path
import subprocess
import time
from datetime import datetime, timedelta
def download_entries_v1():
base_request = input(
"Search for \"berichtswoche?\" and select the second result. Copy as cURL and paste here\nDepending on your terminal running this in, you might need to replace any \\n (or \\\\\\n) with '' before pasting, as \\n can trigger the end of the input reading: ")
start_date = datetime.strptime(input("enter start date to download (yyyy-mm-dd): "), "%Y-%m-%d")
end_date = input("enter end date to download (yyyy-mm-dd): ")
if end_date == "":
end_date = datetime.now()
else:
end_date = datetime.strptime(end_date, "%Y-%m-%d")
regex = r"(.*)(https:\/\/digbe\.services\.ihk\.de\/digitales-berichtsheft\/erstellen-api\/v1\/berichtswoche\?datum=)([0-9]*-[0-9]{2}-[0-9]{2})(.*)"
current_date = start_date
days_in_week = 7
start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date()
while start_next_week < end_date.date():
start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date()
week_start = (current_date - timedelta(days=current_date.weekday())).date()
subst = f"\\g<1>\\g<2>{week_start}\\g<4>"
result = re.sub(regex, subst, base_request, count=1, flags=re.DOTALL | re.MULTILINE)
if result:
file_name = f"week_from_{week_start}.json"
if os.path.isfile(file_name):
print(f"skipping already existing file {file_name}")
current_date += timedelta(days_in_week)
continue
input_metadata_content = result.replace('curl ', f'curl -o {file_name} ')
print(f'Fetching week {week_start}')
subprocess.Popen(input_metadata_content, shell=True).wait()
time.sleep(1) # not sure if there is ddos protection / spam2ban
current_date += timedelta(days_in_week)
else:
print("oops, failed to find date part in cURL")
break
print(
f"\nFetched all weeks from {start_date.date()} until {end_date.date()}\nNow backup the files located at {os.getcwd()} and wait until our own report book platform is finished")
if __name__ == '__main__':
download_entries_v1()