import feedparser import json import datetime import hashlib import time journals = [ { "name" : "L'Humanité", "url" : 'https://www.humanite.fr/rss/actu.rss' }, { "name" : "Alternative Economiques", "url" : 'https://www.alternatives-economiques.fr/rss.xml' }, { "name" : "Basta", "url" : 'https://basta.media/spip.php?page=backend' }, { "name" : "Bondy Blog", "url" : 'https://www.bondyblog.fr/feed' }, { "name" : "La Déferlante", "url" : 'https://revueladeferlante.fr/feed/' }, { "name" : "OFF Investigation", "url" : 'https://www.off-investigation.fr/feed/' }, { "name" : "Frustration", "url" : 'https://www.frustrationmagazine.fr/feed' }, { "name" : "Lundi Matin", "url" : 'https://lundi.am/spip.php?page=backend' }, { "name" : "Courrier international", "url" : 'https://www.courrierinternational.com/feed/all/rss.xml' }, { "name" : "Les Économistes Atterrés", "url" : 'https://www.atterres.org/feed' }, { "name" : "Acrimed", "url" : 'https://www.acrimed.org/spip.php?page=backend' }, { "name" : "Libération", "url" : 'https://www.liberation.fr/arc/outboundfeeds/rss/?outputType=xml' }, { "name" : "Médiapart", "url" : 'https://www.mediapart.fr/articles/feed' }, { "name" : "Franceinfo", "url" : 'https://www.francetvinfo.fr/titres.rss' }, { "name" : "Le Monde Diplomatique", "url" : 'https://www.monde-diplomatique.fr/spip.php?page=backend&id_mot=' }, { "name" : "Le Monde", "url" : 'https://www.lemonde.fr/rss/une.xml' }, { "name" : "BLAST", "url" : 'https://api.blast-info.fr/rss.xml' }, { "name" : "Les Jours", "url" : 'https://lesjours.fr/rss.xml' }, { "name" : "Arrêt sur Images", "url" : 'https://api.arretsurimages.net/api/public/rss/all-content' }, { "name" : "Élucid", "url" : 'https://elucid.media/feed' }, { "name" : "reflets", "url" : 'https://reflets.info/feeds/public' }, { "name" : "StreetPress", "url" : 'https://backend.streetpress.com/rss.xml' }, { "name" : "Presse-citron", "url" : 'https://www.presse-citron.net/feed/' }, { "name" : "korii", "url" : 'https://korii.slate.fr/rss.xml' }, { "name" : "Slate", "url" : 'https://www.slate.fr/rss.xml' }, { "name" : "Le HuffPost", "url" : 'https://www.huffingtonpost.fr/rss/all_headline.xml' }, { "name" : "Numerama", "url" : 'https://www.numerama.com/feed/' }, { "name" : "Le Figaro", "url" : 'https://www.lefigaro.fr/rss/figaro_actualites-a-la-une.xml' }, { "name" : "La Croix", "url" : 'https://www.la-croix.com/RSS/UNIVERS' }, { "name" : "nvo", "url" : 'https://nvo.fr/feed/?post_type=post' }, { "name" : "Usine Nouvelle", "url" : 'https://www.usinenouvelle.com/rss/' }, { "name" : "Fakir", "url" : 'http://www.fakirpresse.info/spip.php?page=backend' }, { "name" : "CQFD", "url" : 'https://cqfd-journal.org/spip.php?page=backend' }, { "name" : "Politis", "url" : 'https://www.politis.fr/flux-rss-politis-fr/' }, { "name" : "afriqueXXI", "url" : 'https://afriquexxi.info/?page=backend&lang=fr' }, { "name" : "Rapports de Force", "url" : 'https://rapportsdeforce.fr/feed' }, { "name" : "Reporterre", "url" : 'https://reporterre.net/spip.php?page=backend-simple' }, { "name" : "Science Critique", "url" : 'https://sciences-critiques.fr/feed/' }, { "name" : "Socialter", "url" : 'https://www.socialter.fr/rss' }, { "name" : "Terrestres", "url" : 'https://www.terrestres.org/feed/' }, { "name" : "Miroir Social", "url" : 'https://www.miroirsocial.com/rss.xml' }, { "name" : "Le Vent Se Lève", "url" : 'https://lvsl.fr/feed/' }, { "name" : "Le Media", "url" : 'https://api.lemediatv.fr/rss.xml' }, { "name" : "Là-bas si j'y suis", "url" : 'https://la-bas.org/spip.php?page=backend' }, { "name" : "L’envolée", "url" : 'https://lenvolee.net/feed/' }, { "name" : "Jef Klak", "url" : 'http://jefklak.org/?feed=rss2' }, { "name" : "Disclose", "url" : 'https://disclose.ngo/feed/' }, { "name" : "Observatoire des multinationales", "url" : 'https://multinationales.org/fr/page/backend' }, { "name" : "Next INpact", "url" : 'https://www.nextinpact.com/rss/news.xml' }, { "name" : "Usbek & Rica", "url" : 'https://usbeketrica.com/fr/rss' } ] empty = { "name" : "", "url" : '' } data = [] problem = [ { } ] def parsedatostring(timestamp): return time.strftime('%Y-%m-%d', timestamp) def getdate(entry): date = "" if (hasattr(entry,'published_parsed')): date = entry.published_parsed elif (hasattr(entry,'updated_parsed')): date = entry.updated_parsed elif (hasattr(entry,'created_parsed')): date = entry.created_parsed if date != "": return parsedatostring(date) else: return "???" def parse(url, journal): news_feed = feedparser.parse(url) for entry in news_feed.entries: title = entry.title try : description = entry.description except: description = "" date = getdate(entry) #print(date) link = entry.link keywords = "news" id = hashlib.sha256(link.encode("utf-8")).hexdigest() data.append({"id": id, "journal" : journal,"title": title, "description": description, "url": link, "date": date, "tags": keywords}) for j in journals: journal = j["name"] print(journal) url = j["url"] parse(url, journal) now = datetime.datetime.now() fn = now.strftime("json/%Y-%m-%d-%H-%M.json") with open(fn, "w", encoding="utf-8") as f: json.dump(data, f, indent=4, ensure_ascii=False)