diff --git a/helpers.js b/helpers.js index 3e714a886e0..7ce0fc26be4 100644 --- a/helpers.js +++ b/helpers.js @@ -1,3 +1,5 @@ +const { decode } = require('html-entities') + // configure markdown-it const transformer = require('jstransformer') const { _tr: mdTransformer } = transformer(require('jstransformer-markdown-it')) @@ -14,7 +16,11 @@ mdTransformer.render = str => renderMd(str, config) // replacements const replacements = str => { - return str.replace(/<\/?u>/g, '') + return str && str.replace(/<\/?u>/g, '') +} + +const stripHTML = str => { + return str && decode(str.replace(/(<([^>]+)>)/ig, '').trim().replace(/\n\s*/g, '\n')) } // slug @@ -34,5 +40,6 @@ module.exports = { markdown: mdTransformer.render, replacements, slugify, + stripHTML, truncate } diff --git a/package-lock.json b/package-lock.json index ff8aa48d62c..5a08ca7481a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,7 +15,11 @@ "autoprefixer": "10.3.4", "browser-sync": "2.27.5", "csso-cli": "3.0.0", + "fast-xml-parser": "3.20.0", "glob": "7.1.7", + "he": "1.2.0", + "html-entities": "2.3.2", + "jstoxmlparser": "1.0.2", "jstransformer-markdown-it": "2.1.0", "markdown-it": "12.2.0", "node-file-rev": "1.1.3", @@ -29,7 +33,6 @@ "postcss-media-variables": "2.0.1", "postcss-nesting": "8.0.1", "pug": "3.0.2", - "rss-parser": "3.12.0", "sync-request": "6.1.0" }, "engines": { @@ -1344,6 +1347,22 @@ "node": ">=8" } }, + "node_modules/fast-xml-parser": { + "version": "3.20.0", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-3.20.0.tgz", + "integrity": "sha512-cMQwDJYVDjMPU56DviszewgMKuNzuf4NQSBuDf9RgZ6FKm5QEMxW05Za8lvnuL6moxoeZVUWBlL733WmovvV6g==", + "dev": true, + "dependencies": { + "strnum": "^1.0.3" + }, + "bin": { + "xml2js": "cli.js" + }, + "funding": { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + } + }, "node_modules/fastq": { "version": "1.11.0", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.11.0.tgz", @@ -1658,12 +1677,27 @@ "node": ">=8" } }, + "node_modules/he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true, + "bin": { + "he": "bin/he" + } + }, "node_modules/hosted-git-info": { "version": "2.8.9", "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", "dev": true }, + "node_modules/html-entities": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz", + "integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ==", + "dev": true + }, "node_modules/http-basic": { "version": "8.1.3", "resolved": "https://registry.npmjs.org/http-basic/-/http-basic-8.1.3.tgz", @@ -2118,6 +2152,12 @@ "graceful-fs": "^4.1.6" } }, + "node_modules/jstoxmlparser": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/jstoxmlparser/-/jstoxmlparser-1.0.2.tgz", + "integrity": "sha512-EVy/kaRwz9kgIR3WwG3lzqP2uQgCZwd1s1f2axVgwydy7qYn+VNuCdiOIa8b7Kg1F2cM5OpYmPJ42QBmTMgUQw==", + "dev": true + }, "node_modules/jstransformer": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/jstransformer/-/jstransformer-1.0.0.tgz", @@ -3932,16 +3972,6 @@ "node": ">=0.10.0" } }, - "node_modules/rss-parser": { - "version": "3.12.0", - "resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.12.0.tgz", - "integrity": "sha512-aqD3E8iavcCdkhVxNDIdg1nkBI17jgqF+9OqPS1orwNaOgySdpvq6B+DoONLhzjzwV8mWg37sb60e4bmLK117A==", - "dev": true, - "dependencies": { - "entities": "^2.0.3", - "xml2js": "^0.4.19" - } - }, "node_modules/run-parallel": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", @@ -3981,12 +4011,6 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", "dev": true }, - "node_modules/sax": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", - "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==", - "dev": true - }, "node_modules/semver": { "version": "5.7.1", "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", @@ -4500,6 +4524,12 @@ "node": ">=8" } }, + "node_modules/strnum": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.3.tgz", + "integrity": "sha512-GVoRjsqAYZkAH16GDzfTuafuwKxzKdaaCQyLaWf37gOP1e2PPbAKWoME1OmO+c4RCKMfNrrPRDLFCNBFU45N/A==", + "dev": true + }, "node_modules/supports-color": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", @@ -4904,28 +4934,6 @@ } } }, - "node_modules/xml2js": { - "version": "0.4.23", - "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz", - "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==", - "dev": true, - "dependencies": { - "sax": ">=0.6.0", - "xmlbuilder": "~11.0.0" - }, - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/xmlbuilder": { - "version": "11.0.1", - "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", - "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", - "dev": true, - "engines": { - "node": ">=4.0" - } - }, "node_modules/xmlhttprequest-ssl": { "version": "1.6.2", "resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-1.6.2.tgz", @@ -6073,6 +6081,15 @@ "picomatch": "^2.2.1" } }, + "fast-xml-parser": { + "version": "3.20.0", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-3.20.0.tgz", + "integrity": "sha512-cMQwDJYVDjMPU56DviszewgMKuNzuf4NQSBuDf9RgZ6FKm5QEMxW05Za8lvnuL6moxoeZVUWBlL733WmovvV6g==", + "dev": true, + "requires": { + "strnum": "^1.0.3" + } + }, "fastq": { "version": "1.11.0", "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.11.0.tgz", @@ -6320,12 +6337,24 @@ "type-fest": "^0.8.0" } }, + "he": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/he/-/he-1.2.0.tgz", + "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", + "dev": true + }, "hosted-git-info": { "version": "2.8.9", "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz", "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==", "dev": true }, + "html-entities": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz", + "integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ==", + "dev": true + }, "http-basic": { "version": "8.1.3", "resolved": "https://registry.npmjs.org/http-basic/-/http-basic-8.1.3.tgz", @@ -6694,6 +6723,12 @@ "graceful-fs": "^4.1.6" } }, + "jstoxmlparser": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/jstoxmlparser/-/jstoxmlparser-1.0.2.tgz", + "integrity": "sha512-EVy/kaRwz9kgIR3WwG3lzqP2uQgCZwd1s1f2axVgwydy7qYn+VNuCdiOIa8b7Kg1F2cM5OpYmPJ42QBmTMgUQw==", + "dev": true + }, "jstransformer": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/jstransformer/-/jstransformer-1.0.0.tgz", @@ -8172,16 +8207,6 @@ "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", "dev": true }, - "rss-parser": { - "version": "3.12.0", - "resolved": "https://registry.npmjs.org/rss-parser/-/rss-parser-3.12.0.tgz", - "integrity": "sha512-aqD3E8iavcCdkhVxNDIdg1nkBI17jgqF+9OqPS1orwNaOgySdpvq6B+DoONLhzjzwV8mWg37sb60e4bmLK117A==", - "dev": true, - "requires": { - "entities": "^2.0.3", - "xml2js": "^0.4.19" - } - }, "run-parallel": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", @@ -8218,12 +8243,6 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", "dev": true }, - "sax": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", - "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==", - "dev": true - }, "semver": { "version": "5.7.1", "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz", @@ -8677,6 +8696,12 @@ "min-indent": "^1.0.0" } }, + "strnum": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.3.tgz", + "integrity": "sha512-GVoRjsqAYZkAH16GDzfTuafuwKxzKdaaCQyLaWf37gOP1e2PPbAKWoME1OmO+c4RCKMfNrrPRDLFCNBFU45N/A==", + "dev": true + }, "supports-color": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", @@ -9002,22 +9027,6 @@ "dev": true, "requires": {} }, - "xml2js": { - "version": "0.4.23", - "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz", - "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==", - "dev": true, - "requires": { - "sax": ">=0.6.0", - "xmlbuilder": "~11.0.0" - } - }, - "xmlbuilder": { - "version": "11.0.1", - "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-11.0.1.tgz", - "integrity": "sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA==", - "dev": true - }, "xmlhttprequest-ssl": { "version": "1.6.2", "resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-1.6.2.tgz", diff --git a/package.json b/package.json index 9aaa198746b..31c91ad3870 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,11 @@ "autoprefixer": "10.3.4", "browser-sync": "2.27.5", "csso-cli": "3.0.0", + "fast-xml-parser": "3.20.0", "glob": "7.1.7", + "he": "1.2.0", + "html-entities": "2.3.2", + "jstoxmlparser": "1.0.2", "jstransformer-markdown-it": "2.1.0", "markdown-it": "12.2.0", "node-file-rev": "1.1.3", @@ -49,7 +53,6 @@ "postcss-media-variables": "2.0.1", "postcss-nesting": "8.0.1", "pug": "3.0.2", - "rss-parser": "3.12.0", "sync-request": "6.1.0" } } diff --git a/src/episode.pug b/src/episode.pug index 7dbe14c886a..651b4ad371c 100644 --- a/src/episode.pug +++ b/src/episode.pug @@ -2,7 +2,7 @@ extends /template.pug block vars - const title = episode.title - - const description = episode.contentSnippet + - const description = episode.description - const cardImage = episode.image - const cardSize = episode.image.match('cloudfront') ? 3000 : 1400 diff --git a/tasks/fetch_feed.js b/tasks/fetch_feed.js index 49b95dca521..ec0372f13ae 100644 --- a/tasks/fetch_feed.js +++ b/tasks/fetch_feed.js @@ -1,86 +1,109 @@ const { writeFileSync } = require('fs') const { join, resolve } = require('path') -const { replacements, slugify } = require('../helpers') +const { replacements, slugify, stripHTML } = require('../helpers') const { masterFeedUrl, publicFeedUrl } = require('../content/meta.json') const request = require('sync-request') -const Parser = require('rss-parser') +const parser = require('fast-xml-parser') +const JSON2XMLParser = require("fast-xml-parser").j2xParser; +const he = require('he') const dir = resolve(__dirname, '..') const write = (name, data) => writeFileSync(join(dir, name), data) const writeJSON = (name, data) => write(`generated/${name}.json`, JSON.stringify(data, null, 2)) -const parseBaseInfoFromMatch = m => { - let [, categoryName = 'News', number, titlePlain] = m ? m : [,,,] - if (!number) categoryName = 'Verschiedenes' - if (categoryName === 'Der-Weg') categoryName = 'Der Weg' - return { categoryName, number, titlePlain } +const commonOpts = { + attributeNamePrefix: "", + attrNodeName: "__attr", + textNodeName: "#text", + ignoreAttributes: false, + cdataTagName: "__cdata", + cdataPositionChar: "\\c" } -const parseInfo = e => { - const titleMatch = e.title.match(/([\w\s]+?)?\s?#(\d+) - (.*)/) || [,,,e.title] - const { categoryName, number, titlePlain } = parseBaseInfoFromMatch(titleMatch) - const blockMatch = e.contentSnippet.match(/Blockzeit\s(\d+)/) +const xml2jsonOpts = { + ...commonOpts, + ignoreNameSpace: false, + parseNodeValue: true, + parseAttributeValue: false, + trimValues: true, + parseTrueNumberOnly: false, + arrayMode: false, + numParseOptions: { + hex: true, + leadingZeros: true, + }, + tagValueProcessor: val => he.decode(val), + attrValueProcessor: val => he.decode(val, { isAttributeValue: true }) +} + +var json2xmlOpts = { + ...commonOpts, + format: false, + indentBy: " ", + supressEmptyNode: false, + tagValueProcessor: a => a, + attrValueProcessor: a => he.encode(a, { isAttributeValue: true, useNamedReferences: true }) +}; + +const parseEpisode = e => { + const guid = e.guid['#text'] + const title = e.title.__cdata.trim() + const content = replacements(e.description.__cdata).trim() + const description = stripHTML(content) + let [, categoryName = 'News', number, titlePlain] = title.match(/([\w\s]+?)?\s?#(\d+) - (.*)/) || [, , , title] + if (!number) categoryName = 'Verschiedenes' + if (categoryName === 'Der-Weg') categoryName = 'Der Weg' + const firstLine = description.split('\n')[0] + const blockMatch = firstLine.match(/Blockzeit\s(\d+)/) const block = blockMatch ? parseInt(blockMatch[1]) : null const category = slugify(categoryName) const slug = slugify(`${categoryName} ${number || ''} ${titlePlain}`) - return { block, category, categoryName, number, titlePlain, slug } + const date = new Date(e.pubDate) + const img = e['itunes:image'].__attr.href + const image = ['interview', 'lesestunde', 'verschiedenes'].includes(category) ? img : `/img/cover/${category}.png` + const duration = e['itunes:duration'] + const enclosure = e.enclosure.__attr + const [, participantsString] = firstLine.match(/ - (?:(?:von und )?mit )(.*)/i) || [] + const participants = participantsString ? participantsString.replace(/(\s*,\s*|\s*und\s*)/ig, '%').split('%') : [] + return { block, category, categoryName, number, title, titlePlain, description, content, duration, slug, image, guid , date, enclosure, participants } } ;(async () => { // Load and adapt feed - const xml = request('GET', masterFeedUrl).getBody('utf8') - .replace(/(.*?)<\/itunes:email>/g, 'einundzwanzigpodcast@pm.me') + const anchorXML = request('GET', masterFeedUrl).getBody('utf8') + const xml = anchorXML .replace(`"${masterFeedUrl}"`, `"${publicFeedUrl}"`) + .replace('xmlns:anchor="https://anchor.fm/xmlns"', 'xmlns:anchor="https://anchor.fm/xmlns" xmlns:podcast="https://podcastindex.org/namespace/1.0"') + .replace('', ` + + + + + + + `) - // Parse feed - const parser = new Parser() - const feed = await parser.parseString(xml) + const feed = parser.parse(xml, xml2jsonOpts, true) + const episodes = [] - // Original Anchor-Feed - write('dist/feed.xml', xml) - writeJSON('feed', feed) + delete feed.rss.channel.author // remove invalid tag - // All episodes - const episodes = feed.items.map(e => { - const info = parseInfo(e) - const image = ['interview', 'lesestunde', 'verschiedenes'].includes(info.category) ? e.itunes.image : `/img/cover/${info.category}.png` + feed.rss.channel.item = feed.rss.channel.item.map(item => { + const episode = parseEpisode(item) + episodes.push(episode) return { - title: e.title.trim(), - content: replacements(e.content.trim()), - contentSnippet: replacements(e.contentSnippet.trim()), - anchor: e.link, - date: e.isoDate, - enclosure: e.enclosure, - duration: e.itunes.duration, - season: e.itunes.season, - episode: e.itunes.episode, - guid: e.guid, - image, - originalImage: e.itunes.image, - ...info + ...item, + link: `https://einundzwanzig.space/podcast/${episode.slug}`, // replace Anchor link + 'itunes:summary': episode.description // please the validator, Anchor's itunes:summary contains HTML } }) - writeJSON('episodes', episodes) + writeJSON('feed', feed) - // Original Anchor-Feed - const updated = xml - .replace(/(https:\/\/anchor\.fm\/einundzwanzig\/episodes\/(.*?))<\/link>/gi, (match, url, anchorSlug) => { - const slugMatch = anchorSlug.match(/^(?:(.*)-)?([0-9]+?)---/) - const { categoryName, number } = parseBaseInfoFromMatch(slugMatch) - const episode = slugMatch ? episodes.find(e => e.categoryName == categoryName && e.number === number) : null - const link = episode ? `https://einundzwanzig.space/podcast/${episode.slug}` : url - return `${link}` - }) - .replace('xmlns:anchor="https://anchor.fm/xmlns"', 'xmlns:anchor="https://anchor.fm/xmlns" xmlns:podcast="https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md"') - .replace('', ` - - - - - - - - `) - write('dist/feed.xml', updated) + const JSON2XML = new JSON2XMLParser(json2xmlOpts) + const outputXML = JSON2XML.parse(feed) + + writeJSON('episodes', episodes) + write('dist/feed.xml', outputXML) + write('dist/anchor.xml', anchorXML) })()