import json import html from bs4 import BeautifulSoup fn = "./by_date/2022-09-20.json" tn = "./training-data/01.json" # Load the input JSON file with open(fn, 'r') as f: input_data = json.load(f) # Create a list to store the output data output_data = [] # Loop through each entry in the input JSON data for entry in input_data: # Create a new dictionary with the "prompt" and "output" fields description = html.unescape(entry['description']) soup = BeautifulSoup(description, 'html.parser') description = soup.get_text() output_entry = { 'prompt': f"{entry['title']} {description}", 'output': '' } # Append the new dictionary to the output data list output_data.append(output_entry) # Write the output data to a new JSON file with open(tn, 'w') as f: json.dump(output_data, f, ensure_ascii=False, indent=4)