YARB/main.py

60 lines
2.3 KiB
Python

import re
import os.path
import subprocess
import time
from datetime import datetime, timedelta
def download_entries_v1():
base_request = input(
"Search for \"berichtswoche?\" and select the result initiated by 'polyfills*'. Copy as cURL and paste here\nDepending on your browser / terminal running this in, you might need to replace any \\, \\n (or \\\\\\n) with '' before pasting, as \\n can trigger the end of the input reading: ")
start_date = datetime.strptime(input("enter start date to download (yyyy-mm-dd): "), "%Y-%m-%d")
end_date = input("enter end date to download (yyyy-mm-dd): ")
if end_date == "":
end_date = datetime.now()
else:
end_date = datetime.strptime(end_date, "%Y-%m-%d")
regex = r"(.*)(https:\/\/bildung\.service\.ihk\.de\/berichtsheft\/erstellen-api\/v1\/berichtswoche\?datum=)([0-9]*-[0-9]{2}-[0-9]{2})(.*)"
current_date = start_date
days_in_week = 7
start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date()
while start_next_week < end_date.date():
start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date()
week_start = (current_date - timedelta(days=current_date.weekday())).date()
subst = f"\\g<1>\\g<2>{week_start}\\g<4>"
result = re.sub(regex, subst, base_request, count=1, flags=re.DOTALL | re.MULTILINE)
if result:
file_name = f"week_from_{week_start}.json"
if os.path.isfile(file_name):
print(f"skipping already existing file {file_name}")
current_date += timedelta(days_in_week)
continue
input_metadata_content = result.replace('curl ', f'curl -o {file_name} ')
print(f'Fetching week {week_start}')
subprocess.Popen(input_metadata_content, shell=True).wait()
time.sleep(1) # not sure if there is ddos protection / spam2ban
current_date += timedelta(days_in_week)
else:
print("oops, failed to find date part in cURL")
break
print(
f"\nFetched all weeks from {start_date.date()} until {end_date.date()}\nNow backup the files located at {os.getcwd()} and wait until our own report book platform is finished")
if __name__ == '__main__':
download_entries_v1()