35 lines
880 B
Python
35 lines
880 B
Python
import json
|
|
import html
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
fn = "./by_date/2022-09-20.json"
|
|
tn = "./training-data/01.json"
|
|
|
|
|
|
# Load the input JSON file
|
|
with open(fn, 'r') as f:
|
|
input_data = json.load(f)
|
|
|
|
# Create a list to store the output data
|
|
output_data = []
|
|
|
|
# Loop through each entry in the input JSON data
|
|
for entry in input_data:
|
|
# Create a new dictionary with the "prompt" and "output" fields
|
|
description = html.unescape(entry['description'])
|
|
|
|
soup = BeautifulSoup(description, 'html.parser')
|
|
description = soup.get_text()
|
|
|
|
output_entry = {
|
|
'prompt': f"{entry['title']} {description}",
|
|
'output': ''
|
|
}
|
|
# Append the new dictionary to the output data list
|
|
output_data.append(output_entry)
|
|
|
|
# Write the output data to a new JSON file
|
|
with open(tn, 'w') as f:
|
|
json.dump(output_data, f, ensure_ascii=False, indent=4)
|