import random import datetime import string import bz2 import gzip import zlib import humanfriendly random.seed(12345) population = 100 loglines = 200 lines_per_minute = 5 domain_count = 120 page_per_domain_min = 3 page_per_domain_max = 13 lines_per_minute_min = 3 lines_per_minute_max = 8 def rand_domain(): word = "" word_len = random.randint(3, 12) alphabet = string.ascii_lowercase for c in range(word_len): winner_letter = alphabet[random.randint(0, 25)] word = word + winner_letter word = word + ".com" return word def rand_extension(): word = "" word_len = random.randint(3, 12) alphabet = string.ascii_lowercase for c in range(word_len): winner_letter = alphabet[random.randint(0, 25)] word = word + winner_letter word = "/" + word return word def url_mkr(): url_list = [] for d in range(domain_count): pages = random.randint(page_per_domain_min, page_per_domain_max) domain = rand_domain() url_list.append(domain) for p in range(pages): url_list.append(domain + rand_extension()) return url_list websites = url_mkr() def ip_mkr(): A = random.randint(1, 255) B = random.randint(1, 255) C = random.randint(1, 255) D = random.randint(1, 255) ip = str(A) + "." + str(B) + "." + str(C) + "." + str(D) return ip popip = [] for p in range(population): popip.append(ip_mkr()) def pick_someone(): winner = random.randint(0, population - 1) return popip[winner] def pick_website(): winner = random.randint(0, len(websites) - 1) return websites[winner] def date_update(date): return date + 1 def make_date(dt, rate): if rate % lines_per_minute == 0: td = datetime.timedelta(minutes=1) return dt + td else: return dt with open('log.txt', 'w') as f: dt = datetime.datetime(2023, 1, 1, 0, 0, 0) for l in range(loglines): text = str(dt) + " " + pick_someone() + " " + pick_website() f.write(text + '\n') dt = make_date(dt, l) f.close() with open('log.txt', 'rb') as f: data = f.read() # Compress the data with each library bz2_data = bz2.compress(data) gzip_data = gzip.compress(data) zlib_data = zlib.compress(data) # Print the sizes of the compressed data print(f'log lines: {loglines}') print(f'original size: {humanfriendly.format_size(len(data))}') print(f'bz2 size: {humanfriendly.format_size(len(bz2_data))}') print(f'gzip size: {humanfriendly.format_size(len(gzip_data))}') print(f'zlib size: {humanfriendly.format_size(len(zlib_data))}')