266 lines
6.5 KiB
Python
266 lines
6.5 KiB
Python
import feedparser
|
||
import json
|
||
import datetime
|
||
import hashlib
|
||
import time
|
||
|
||
journals = [
|
||
{
|
||
"name" : "L'Humanité",
|
||
"url" : 'https://www.humanite.fr/rss/actu.rss'
|
||
},
|
||
{
|
||
"name" : "Alternative Economiques",
|
||
"url" : 'https://www.alternatives-economiques.fr/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Basta",
|
||
"url" : 'https://basta.media/spip.php?page=backend'
|
||
},
|
||
{
|
||
"name" : "Bondy Blog",
|
||
"url" : 'https://www.bondyblog.fr/feed'
|
||
},
|
||
{
|
||
"name" : "La Déferlante",
|
||
"url" : 'https://revueladeferlante.fr/feed/'
|
||
},
|
||
{
|
||
"name" : "OFF Investigation",
|
||
"url" : 'https://www.off-investigation.fr/feed/'
|
||
},
|
||
{
|
||
"name" : "Frustration",
|
||
"url" : 'https://www.frustrationmagazine.fr/feed'
|
||
},
|
||
{
|
||
"name" : "Lundi Matin",
|
||
"url" : 'https://lundi.am/spip.php?page=backend'
|
||
},
|
||
{
|
||
"name" : "Courrier international",
|
||
"url" : 'https://www.courrierinternational.com/feed/all/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Les Économistes Atterrés",
|
||
"url" : 'https://www.atterres.org/feed'
|
||
},
|
||
{
|
||
"name" : "Acrimed",
|
||
"url" : 'https://www.acrimed.org/spip.php?page=backend'
|
||
},
|
||
{
|
||
"name" : "Libération",
|
||
"url" : 'https://www.liberation.fr/arc/outboundfeeds/rss/?outputType=xml'
|
||
},
|
||
{
|
||
"name" : "Médiapart",
|
||
"url" : 'https://www.mediapart.fr/articles/feed'
|
||
},
|
||
{
|
||
"name" : "Franceinfo",
|
||
"url" : 'https://www.francetvinfo.fr/titres.rss'
|
||
},
|
||
{
|
||
"name" : "Le Monde Diplomatique",
|
||
"url" : 'https://www.monde-diplomatique.fr/spip.php?page=backend&id_mot='
|
||
},
|
||
{
|
||
"name" : "Le Monde",
|
||
"url" : 'https://www.lemonde.fr/rss/une.xml'
|
||
},
|
||
{
|
||
"name" : "BLAST",
|
||
"url" : 'https://api.blast-info.fr/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Les Jours",
|
||
"url" : 'https://lesjours.fr/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Arrêt sur Images",
|
||
"url" : 'https://api.arretsurimages.net/api/public/rss/all-content'
|
||
},
|
||
{
|
||
"name" : "Élucid",
|
||
"url" : 'https://elucid.media/feed'
|
||
},
|
||
{
|
||
"name" : "reflets",
|
||
"url" : 'https://reflets.info/feeds/public'
|
||
},
|
||
{
|
||
"name" : "StreetPress",
|
||
"url" : 'https://backend.streetpress.com/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Presse-citron",
|
||
"url" : 'https://www.presse-citron.net/feed/'
|
||
},
|
||
{
|
||
"name" : "korii",
|
||
"url" : 'https://korii.slate.fr/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Slate",
|
||
"url" : 'https://www.slate.fr/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Le HuffPost",
|
||
"url" : 'https://www.huffingtonpost.fr/rss/all_headline.xml'
|
||
},
|
||
{
|
||
"name" : "Numerama",
|
||
"url" : 'https://www.numerama.com/feed/'
|
||
},
|
||
{
|
||
"name" : "Le Figaro",
|
||
"url" : 'https://www.lefigaro.fr/rss/figaro_actualites-a-la-une.xml'
|
||
},
|
||
{
|
||
"name" : "La Croix",
|
||
"url" : 'https://www.la-croix.com/RSS/UNIVERS'
|
||
},
|
||
{
|
||
"name" : "nvo",
|
||
"url" : 'https://nvo.fr/feed/?post_type=post'
|
||
},
|
||
{
|
||
"name" : "Usine Nouvelle",
|
||
"url" : 'https://www.usinenouvelle.com/rss/'
|
||
},
|
||
{
|
||
"name" : "Fakir",
|
||
"url" : 'http://www.fakirpresse.info/spip.php?page=backend'
|
||
},
|
||
{
|
||
"name" : "CQFD",
|
||
"url" : 'https://cqfd-journal.org/spip.php?page=backend'
|
||
},
|
||
{
|
||
"name" : "Politis",
|
||
"url" : 'https://www.politis.fr/flux-rss-politis-fr/'
|
||
},
|
||
{
|
||
"name" : "afriqueXXI",
|
||
"url" : 'https://afriquexxi.info/?page=backend&lang=fr'
|
||
},
|
||
{
|
||
"name" : "Rapports de Force",
|
||
"url" : 'https://rapportsdeforce.fr/feed'
|
||
},
|
||
{
|
||
"name" : "Reporterre",
|
||
"url" : 'https://reporterre.net/spip.php?page=backend-simple'
|
||
},
|
||
{
|
||
"name" : "Science Critique",
|
||
"url" : 'https://sciences-critiques.fr/feed/'
|
||
},
|
||
{
|
||
"name" : "Socialter",
|
||
"url" : 'https://www.socialter.fr/rss'
|
||
},
|
||
{
|
||
"name" : "Terrestres",
|
||
"url" : 'https://www.terrestres.org/feed/'
|
||
},
|
||
{
|
||
"name" : "Miroir Social",
|
||
"url" : 'https://www.miroirsocial.com/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Le Vent Se Lève",
|
||
"url" : 'https://lvsl.fr/feed/'
|
||
},
|
||
{
|
||
"name" : "Le Media",
|
||
"url" : 'https://api.lemediatv.fr/rss.xml'
|
||
},
|
||
{
|
||
"name" : "Là-bas si j'y suis",
|
||
"url" : 'https://la-bas.org/spip.php?page=backend'
|
||
},
|
||
{
|
||
"name" : "L’envolée",
|
||
"url" : 'https://lenvolee.net/feed/'
|
||
},
|
||
{
|
||
"name" : "Jef Klak",
|
||
"url" : 'http://jefklak.org/?feed=rss2'
|
||
},
|
||
{
|
||
"name" : "Disclose",
|
||
"url" : 'https://disclose.ngo/feed/'
|
||
},
|
||
{
|
||
"name" : "Observatoire des multinationales",
|
||
"url" : 'https://multinationales.org/fr/page/backend'
|
||
},
|
||
{
|
||
"name" : "Next INpact",
|
||
"url" : 'https://www.nextinpact.com/rss/news.xml'
|
||
},
|
||
{
|
||
"name" : "Usbek & Rica",
|
||
"url" : 'https://usbeketrica.com/fr/rss'
|
||
}
|
||
]
|
||
|
||
|
||
empty = {
|
||
"name" : "",
|
||
"url" : ''
|
||
}
|
||
|
||
data = []
|
||
|
||
problem = [
|
||
{
|
||
|
||
}
|
||
]
|
||
|
||
def parsedatostring(timestamp):
|
||
return time.strftime('%Y-%m-%d', timestamp)
|
||
|
||
|
||
def getdate(entry):
|
||
date = ""
|
||
if (hasattr(entry,'published_parsed')):
|
||
date = entry.published_parsed
|
||
elif (hasattr(entry,'updated_parsed')):
|
||
date = entry.updated_parsed
|
||
elif (hasattr(entry,'created_parsed')):
|
||
date = entry.created_parsed
|
||
if date != "":
|
||
return parsedatostring(date)
|
||
else:
|
||
return "???"
|
||
|
||
def parse(url, journal):
|
||
news_feed = feedparser.parse(url)
|
||
for entry in news_feed.entries:
|
||
title = entry.title
|
||
try :
|
||
description = entry.description
|
||
except:
|
||
description = ""
|
||
date = getdate(entry)
|
||
#print(date)
|
||
link = entry.link
|
||
keywords = "news"
|
||
id = hashlib.sha256(link.encode("utf-8")).hexdigest()
|
||
data.append({"id": id, "journal" : journal,"title": title, "description": description, "url": link, "date": date, "tags": keywords})
|
||
|
||
for j in journals:
|
||
journal = j["name"]
|
||
print(journal)
|
||
url = j["url"]
|
||
parse(url, journal)
|
||
|
||
now = datetime.datetime.now()
|
||
fn = now.strftime("json/%Y-%m-%d-%H-%M.json")
|
||
|
||
with open(fn, "w", encoding="utf-8") as f:
|
||
json.dump(data, f, indent=4, ensure_ascii=False) |