60 lines
2.3 KiB
Python
60 lines
2.3 KiB
Python
import re
|
|
import os.path
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
|
|
|
|
def download_entries_v1():
|
|
base_request = input(
|
|
"Search for \"berichtswoche?\" and select the result initiated by 'polyfills*'. Copy as cURL and paste here\nDepending on your browser / terminal running this in, you might need to replace any \\, \\n (or \\\\\\n) with '' before pasting, as \\n can trigger the end of the input reading: ")
|
|
|
|
start_date = datetime.strptime(input("enter start date to download (yyyy-mm-dd): "), "%Y-%m-%d")
|
|
end_date = input("enter end date to download (yyyy-mm-dd): ")
|
|
|
|
if end_date == "":
|
|
end_date = datetime.now()
|
|
else:
|
|
end_date = datetime.strptime(end_date, "%Y-%m-%d")
|
|
|
|
regex = r"(.*)(https:\/\/bildung\.service\.ihk\.de\/berichtsheft\/erstellen-api\/v1\/berichtswoche\?datum=)([0-9]*-[0-9]{2}-[0-9]{2})(.*)"
|
|
|
|
current_date = start_date
|
|
|
|
days_in_week = 7
|
|
|
|
start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date()
|
|
|
|
while start_next_week < end_date.date():
|
|
start_next_week = (current_date + (timedelta(days=(days_in_week - current_date.weekday())))).date()
|
|
week_start = (current_date - timedelta(days=current_date.weekday())).date()
|
|
|
|
subst = f"\\g<1>\\g<2>{week_start}\\g<4>"
|
|
|
|
result = re.sub(regex, subst, base_request, count=1, flags=re.DOTALL | re.MULTILINE)
|
|
|
|
if result:
|
|
file_name = f"week_from_{week_start}.json"
|
|
if os.path.isfile(file_name):
|
|
print(f"skipping already existing file {file_name}")
|
|
current_date += timedelta(days_in_week)
|
|
continue
|
|
|
|
input_metadata_content = result.replace('curl ', f'curl -o {file_name} ')
|
|
|
|
print(f'Fetching week {week_start}')
|
|
subprocess.Popen(input_metadata_content, shell=True).wait()
|
|
time.sleep(1) # not sure if there is ddos protection / spam2ban
|
|
|
|
current_date += timedelta(days_in_week)
|
|
else:
|
|
print("oops, failed to find date part in cURL")
|
|
break
|
|
|
|
print(
|
|
f"\nFetched all weeks from {start_date.date()} until {end_date.date()}\nNow backup the files located at {os.getcwd()} and wait until our own report book platform is finished")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
download_entries_v1()
|