import json
import html
from bs4 import BeautifulSoup
fn = "./by_date/2022-09-20.json"
tn = "./training-data/01.json"
# Load the input JSON file
with open(fn, 'r') as f:
input_data = json.load(f)
# Create a list to store the output data
output_data = []
# Loop through each entry in the input JSON data
for entry in input_data:
# Create a new dictionary with the "prompt" and "output" fields
description = html.unescape(entry['description'])
soup = BeautifulSoup(description, 'html.parser')
description = soup.get_text()
output_entry = {
'prompt': f"{entry['title']} {description}",
'output': ''
}
# Append the new dictionary to the output data list
output_data.append(output_entry)
# Write the output data to a new JSON file
with open(tn, 'w') as f:
json.dump(output_data, f, ensure_ascii=False, indent=4)