Print statistics about cache hit/miss

This commit is contained in:
Marco Borgeaud 2024-08-14 09:20:19 +02:00
parent 997bd49f75
commit 71fa9cb6d3

View File

@ -147,18 +147,44 @@ def get_all_links_from_htmls(dir):
def probe_links(urls): def probe_links(urls):
errors = [] errors = []
link_cache_exception = 0
link_cache_hit = 0
link_cache_miss = 0
print("Testing links") print("Testing links")
for url in urls: link_count = len(urls)
print(f"{url} in {len(urls[url])} files") for idx, url in enumerate(urls):
print(f"[{idx+1}/{link_count}] {url} in {len(urls[url])} files")
if url in EXCEPTIONS: if url in EXCEPTIONS:
link_cache_exception += 1
print("skip as an exception") print("skip as an exception")
elif url_was_reached_recently(url): elif url_was_reached_recently(url):
link_cache_hit += 1
print("skip probing because it was reached recently") print("skip probing because it was reached recently")
elif live_url(url, timeout=5): elif live_url(url, timeout=5):
link_cache_miss += 1
rejuvenate_url(url) rejuvenate_url(url)
elif url_is_long_dead(url): elif url_is_long_dead(url):
link_cache_miss += 1
errors.append(url) errors.append(url)
return errors else:
link_cache_miss += 1
confirmed_errors = confirm_errors(errors, urls)
print(f"\n\n\n{'=' * 80}\n\n\n")
if confirmed_errors:
report_errors(confirmed_errors, urls)
print(f"{len(confirmed_errors)}/{len(urls)} links are dead, see above ^^ the list and the related files\n\n")
print("Cache statistics:")
print(f"\t{link_cache_hit=}")
print(f"\t{link_cache_miss=}")
link_cache_hit_ratio = (link_cache_hit) / (link_cache_hit + link_cache_miss)
print(f"\t{link_cache_hit_ratio:03.2%} hits")
print(f"\t{link_cache_exception=}")
print(f"\n\n\n{'=' * 80}\n\n\n")
success = len(confirmed_errors) == 0
return success
def confirm_errors(presumed_errors, urls): def confirm_errors(presumed_errors, urls):
confirmed_errors = [] confirmed_errors = []
@ -180,16 +206,9 @@ def report_errors(errors, urls):
def check_html_links(dir): def check_html_links(dir):
load_url_probing_history() load_url_probing_history()
urls = get_all_links_from_htmls(dir) urls = get_all_links_from_htmls(dir)
errors = probe_links(urls) success = probe_links(urls)
exit_code = 0 if success:
if errors:
confirmed_errors = confirm_errors(errors, urls)
if confirmed_errors:
report_errors(confirmed_errors, urls)
print(f"{len(confirmed_errors)}/{len(urls)} links are dead, see above ^^ the list and the related files")
exit_code = 1
if exit_code == 0:
print(f"All {len(urls)} links are good") print(f"All {len(urls)} links are good")
save_url_probing_history() save_url_probing_history()
exit(exit_code) exit(0 if success else 1)