import re import os.path import subprocess import time from datetime import datetime, timedelta def download_entries_v1(): base_request = input( "Search for \"berichtswoche?\" and select the second result. Copy as cURL and paste here\nDepending on your terminal running this in, you might need to replace any \\n (or \\\\\\n) with '' before pasting, as \\n can trigger the end of the input reading: ") start_date = datetime.strptime(input("enter start date to download (yyyy-mm-dd): "), "%Y-%m-%d") end_date = input("enter end date to download (yyyy-mm-dd): ") if end_date == "": end_date = datetime.now() else: end_date = datetime.strptime(end_date, "%Y-%m-%d") regex = r"(.*)(https:\/\/digbe\.services\.ihk\.de\/digitales-berichtsheft\/erstellen-api\/v1\/berichtswoche\?datum=)([0-9]*-[0-9]{2}-[0-9]{2})(.*)" current_date = start_date days_in_week = 7 start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date() while start_next_week < end_date.date(): start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date() week_start = (current_date - timedelta(days=current_date.weekday())).date() subst = f"\\g<1>\\g<2>{week_start}\\g<4>" result = re.sub(regex, subst, base_request, count=1, flags=re.DOTALL | re.MULTILINE) if result: file_name = f"week_from_{week_start}.json" if os.path.isfile(file_name): print(f"skipping already existing file {file_name}") current_date += timedelta(days_in_week) continue input_metadata_content = result.replace('curl ', f'curl -o {file_name} ') print(f'Fetching week {week_start}') subprocess.Popen(input_metadata_content, shell=True).wait() time.sleep(1) # not sure if there is ddos protection / spam2ban current_date += timedelta(days_in_week) else: print("oops, failed to find date part in cURL") break print( f"\nFetched all weeks from {start_date.date()} until {end_date.date()}\nNow backup the files located at {os.getcwd()} and wait until our own report book platform is finished") if __name__ == '__main__': download_entries_v1()