Initial commit of working RSS Aggregator build

This commit is contained in:
2026-05-12 17:04:02 -03:00
parent ea3a2ca53e
commit 7ac2f6e384
4962 changed files with 1032666 additions and 0 deletions
+24
View File
@@ -0,0 +1,24 @@
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
name: tests
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [14.x, 16.x, 18.x]
steps:
- uses: actions/checkout@v3
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
- run: npm ci
- run: npm test
+14
View File
@@ -0,0 +1,14 @@
dist: trusty
language: node_js
node_js:
- "8"
before_script:
- npm install -g mocha
script: npm test
addons:
apt:
packages:
- libnss3
+21
View File
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2016 Bobby Brennan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+271
View File
@@ -0,0 +1,271 @@
# rss-parser
[![Version][npm-image]][npm-link]
[![Build Status][build-image]][build-link]
[![Downloads][downloads-image]][npm-link]
[downloads-image]: https://img.shields.io/npm/dm/rss-parser.svg
[npm-image]: https://img.shields.io/npm/v/rss-parser.svg
[npm-link]: https://npmjs.org/package/rss-parser
[build-image]: https://github.com/rbren/rss-parser/workflows/tests/badge.svg
[build-link]: https://github.com/rbren/rss-parser/actions
A small library for turning RSS XML feeds into JavaScript objects.
## Installation
```bash
npm install --save rss-parser
```
## Usage
You can parse RSS from a URL (`parser.parseURL`) or an XML string (`parser.parseString`).
Both callbacks and Promises are supported.
### NodeJS
Here's an example in NodeJS using Promises with async/await:
```js
let Parser = require('rss-parser');
let parser = new Parser();
(async () => {
let feed = await parser.parseURL('https://www.reddit.com/.rss');
console.log(feed.title);
feed.items.forEach(item => {
console.log(item.title + ':' + item.link)
});
})();
```
### TypeScript
When using TypeScript, you can set a type to control the custom fields:
```typescript
import Parser from 'rss-parser';
type CustomFeed = {foo: string};
type CustomItem = {bar: number};
const parser: Parser<CustomFeed, CustomItem> = new Parser({
customFields: {
feed: ['foo', 'baz'],
// ^ will error because `baz` is not a key of CustomFeed
item: ['bar']
}
});
(async () => {
const feed = await parser.parseURL('https://www.reddit.com/.rss');
console.log(feed.title); // feed will have a `foo` property, type as a string
feed.items.forEach(item => {
console.log(item.title + ':' + item.link) // item will have a `bar` property type as a number
});
})();
```
### Web
> We recommend using a bundler like [webpack](https://webpack.js.org/), but we also provide
> pre-built browser distributions in the `dist/` folder. If you use the pre-built distribution,
> you'll need a [polyfill](https://github.com/taylorhakes/promise-polyfill) for Promise support.
Here's an example in the browser using callbacks:
```html
<script src="/node_modules/rss-parser/dist/rss-parser.min.js"></script>
<script>
// Note: some RSS feeds can't be loaded in the browser due to CORS security.
// To get around this, you can use a proxy.
const CORS_PROXY = "https://cors-anywhere.herokuapp.com/"
let parser = new RSSParser();
parser.parseURL(CORS_PROXY + 'https://www.reddit.com/.rss', function(err, feed) {
if (err) throw err;
console.log(feed.title);
feed.items.forEach(function(entry) {
console.log(entry.title + ':' + entry.link);
})
})
</script>
```
### Upgrading from v2 to v3
A few minor breaking changes were made in v3. Here's what you need to know:
* You need to construct a `new Parser()` before calling `parseString` or `parseURL`
* `parseFile` is no longer available (for better browser support)
* `options` are now passed to the Parser constructor
* `parsed.feed` is now just `feed` (top-level object removed)
* `feed.entries` is now `feed.items` (to better match RSS XML)
## Output
Check out the full output format in [test/output/reddit.json](test/output/reddit.json)
```yaml
feedUrl: 'https://www.reddit.com/.rss'
title: 'reddit: the front page of the internet'
description: ""
link: 'https://www.reddit.com/'
items:
- title: 'The water is too deep, so he improvises'
link: 'https://www.reddit.com/r/funny/comments/3skxqc/the_water_is_too_deep_so_he_improvises/'
pubDate: 'Thu, 12 Nov 2015 21:16:39 +0000'
creator: "John Doe"
content: '<a href="http://example.com">this is a link</a> &amp; <b>this is bold text</b>'
contentSnippet: 'this is a link & this is bold text'
guid: 'https://www.reddit.com/r/funny/comments/3skxqc/the_water_is_too_deep_so_he_improvises/'
categories:
- funny
isoDate: '2015-11-12T21:16:39.000Z'
```
##### Notes:
* The `contentSnippet` field strips out HTML tags and unescapes HTML entities
* The `dc:` prefix will be removed from all fields
* Both `dc:date` and `pubDate` will be available in ISO 8601 format as `isoDate`
* If `author` is specified, but not `dc:creator`, `creator` will be set to `author` ([see article](http://www.lowter.com/blogs/2008/2/9/rss-dccreator-author))
* Atom's `updated` becomes `lastBuildDate` for consistency
## XML Options
### Custom Fields
If your RSS feed contains fields that aren't currently returned, you can access them using the `customFields` option.
```js
let parser = new Parser({
customFields: {
feed: ['otherTitle', 'extendedDescription'],
item: ['coAuthor','subtitle'],
}
});
parser.parseURL('https://www.reddit.com/.rss', function(err, feed) {
console.log(feed.extendedDescription);
feed.items.forEach(function(entry) {
console.log(entry.coAuthor + ':' + entry.subtitle);
})
})
```
To rename fields, you can pass in an array with two items, in the format `[fromField, toField]`:
```js
let parser = new Parser({
customFields: {
item: [
['dc:coAuthor', 'coAuthor'],
]
}
})
```
To pass additional flags, provide an object as the third array item. Currently there is one such flag:
* `keepArray (false)` - set to `true` to return *all* values for fields that can have multiple entries.
* `includeSnippet (false)` - set to `true` to add an additional field, `${toField}Snippet`, with HTML stripped out
```js
let parser = new Parser({
customFields: {
item: [
['media:content', 'media:content', {keepArray: true}],
]
}
})
```
### Default RSS version
If your RSS Feed doesn't contain a `<rss>` tag with a `version` attribute,
you can pass a `defaultRSS` option for the Parser to use:
```js
let parser = new Parser({
defaultRSS: 2.0
});
```
### xml2js passthrough
`rss-parser` uses [xml2js](https://github.com/Leonidas-from-XIV/node-xml2js)
to parse XML. You can pass [these options](https://github.com/Leonidas-from-XIV/node-xml2js#options)
to `new xml2js.Parser()` by specifying `options.xml2js`:
```js
let parser = new Parser({
xml2js: {
emptyTag: '--EMPTY--',
}
});
```
## HTTP Options
### Timeout
You can set the amount of time (in milliseconds) to wait before the HTTP request times out (default 60 seconds):
```js
let parser = new Parser({
timeout: 1000,
});
```
### Headers
You can pass headers to the HTTP request:
```js
let parser = new Parser({
headers: {'User-Agent': 'something different'},
});
```
### Redirects
By default, `parseURL` will follow up to five redirects. You can change this
with `options.maxRedirects`.
```js
let parser = new Parser({maxRedirects: 100});
```
### Request passthrough
`rss-parser` uses [http](https://nodejs.org/docs/latest/api/http.html#http_http_get_url_options_callback)/[https](https://nodejs.org/docs/latest/api/https.html#https_https_get_url_options_callback) module
to do requests. You can pass [these options](https://nodejs.org/docs/latest/api/https.html#https_https_request_options_callback)
to `http.get()`/`https.get()` by specifying `options.requestOptions`:
e.g. to allow unauthorized certificate
```js
let parser = new Parser({
requestOptions: {
rejectUnauthorized: false
}
});
```
## Contributing
Contributions are welcome! If you are adding a feature or fixing a bug, please be sure to add a [test case](https://github.com/bobby-brennan/rss-parser/tree/master/test/input)
### Running Tests
The tests run the RSS parser for several sample RSS feeds in `test/input` and outputs the resulting JSON into `test/output`. If there are any changes to the output files the tests will fail.
To check if your changes affect the output of any test cases, run
`npm test`
To update the output files with your changes, run
`WRITE_GOLDEN=true npm test`
### Publishing Releases
```bash
npm run build
git commit -a -m "Build distribution"
npm version minor # or major/patch
npm publish
git push --follow-tags
```
+21
View File
@@ -0,0 +1,21 @@
{
"name": "rss-parser",
"description": "",
"version": "1.1.0",
"main": "dist/rss-parser.js",
"authors": [
"Bobby Brennan"
],
"license": "MIT",
"homepage": "https://github.com/bobby-brennan/rss-parser",
"moduleType": [
"node"
],
"ignore": [
"**/.*",
"node_modules",
"bower_components",
"test",
"tests"
]
}
+11002
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+11
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+119
View File
@@ -0,0 +1,119 @@
import { Options } from 'xml2js';
import { RequestOptions } from 'https';
declare namespace Parser {
type CustomFieldItem<U> = keyof U | (string | { keepArray: boolean })[]
export interface CustomFields<T, U> {
readonly feed?: Array<keyof T>;
readonly item?: CustomFieldItem<U>[] | CustomFieldItem<U>[][];
}
export interface ParserOptions<T, U> {
readonly xml2js?: Options;
readonly requestOptions?: RequestOptions;
readonly headers?: Record<string, string>;
readonly defaultRSS?: number;
readonly maxRedirects?: number;
readonly customFields?: CustomFields<T, U>;
readonly timeout?: number;
}
export interface Enclosure {
url: string;
length?: number;
type?: string;
}
export interface Item {
link?: string;
guid?: string;
title?: string;
pubDate?: string;
creator?: string;
summary?: string;
content?: string;
isoDate?: string;
categories?: string[];
contentSnippet?: string;
enclosure?: Enclosure;
}
export interface PaginationLinks {
self?: string;
first?: string;
next?: string;
last?: string;
prev?: string;
}
export interface Output<U> {
image?: {
link?: string;
url: string;
title?: string;
},
paginationLinks?: PaginationLinks;
link?: string;
title?: string;
items: (U & Item)[];
feedUrl?: string;
description?: string;
itunes?: {
[key: string]: any;
image?: string;
owner?: {
name?: string;
email?: string;
};
author?: string;
summary?: string;
explicit?: string;
categories?: string[];
keywords?: string[];
};
}
}
/**
* Class that handles all parsing or URL, or even XML, RSS feed to JSON.
*/
declare class Parser<T = {[key: string]: any}, U = {[key: string]: any}> {
/**
* @param options - Parser options.
*/
constructor(options?: Parser.ParserOptions<T, U>);
/**
* Parse XML content to JSON.
*
* @param xml - The xml to be parsed.
* @param callback - Traditional callback.
*
* @returns Promise that has the same Output as the callback.
*/
parseString(
xml: string,
callback?: (err: Error, feed: Parser.Output<U>) => void
): Promise<T & Parser.Output<U>>;
/**
* Parse URL content to JSON.
*
* @param feedUrl - The url that needs to be parsed to JSON.
* @param callback - Traditional callback.
* @param redirectCount - Max of redirects, default is set to five.
*
* @example
* await parseURL('https://www.reddit.com/.rss');
* parseURL('https://www.reddit.com/.rss', (err, feed) => { ... });
*
* @returns Promise that has the same Output as the callback.
*/
parseURL(
feedUrl: string,
callback?: (err: Error, feed: Parser.Output<U>) => void,
redirectCount?: number
): Promise<T & Parser.Output<U>>;
}
export = Parser;
+4
View File
@@ -0,0 +1,4 @@
'use strict';
module.exports = require('./lib/parser');
+73
View File
@@ -0,0 +1,73 @@
const fields = module.exports = {};
fields.feed = [
['author', 'creator'],
['dc:publisher', 'publisher'],
['dc:creator', 'creator'],
['dc:source', 'source'],
['dc:title', 'title'],
['dc:type', 'type'],
'title',
'description',
'author',
'pubDate',
'webMaster',
'managingEditor',
'generator',
'link',
'language',
'copyright',
'lastBuildDate',
'docs',
'generator',
'ttl',
'rating',
'skipHours',
'skipDays',
];
fields.item = [
['author', 'creator'],
['dc:creator', 'creator'],
['dc:date', 'date'],
['dc:language', 'language'],
['dc:rights', 'rights'],
['dc:source', 'source'],
['dc:title', 'title'],
'title',
'link',
'pubDate',
'author',
'summary',
['content:encoded', 'content:encoded', {includeSnippet: true}],
'enclosure',
'dc:creator',
'dc:date',
'comments',
];
var mapItunesField = function(f) {
return ['itunes:' + f, f];
}
fields.podcastFeed = ([
'author',
'subtitle',
'summary',
'explicit'
]).map(mapItunesField);
fields.podcastItem = ([
'author',
'subtitle',
'summary',
'explicit',
'duration',
'image',
'episode',
'image',
'season',
'keywords',
'episodeType'
]).map(mapItunesField);
+349
View File
@@ -0,0 +1,349 @@
"use strict";
const http = require('http');
const https = require('https');
const xml2js = require('xml2js');
const url = require('url');
const fields = require('./fields');
const utils = require('./utils');
const DEFAULT_HEADERS = {
'User-Agent': 'rss-parser',
'Accept': 'application/rss+xml',
}
const DEFAULT_MAX_REDIRECTS = 5;
const DEFAULT_TIMEOUT = 60000;
class Parser {
constructor(options={}) {
options.headers = options.headers || {};
options.xml2js = options.xml2js || {};
options.customFields = options.customFields || {};
options.customFields.item = options.customFields.item || [];
options.customFields.feed = options.customFields.feed || [];
options.requestOptions = options.requestOptions || {};
if (!options.maxRedirects) options.maxRedirects = DEFAULT_MAX_REDIRECTS;
if (!options.timeout) options.timeout = DEFAULT_TIMEOUT;
this.options = options;
this.xmlParser = new xml2js.Parser(this.options.xml2js);
}
parseString(xml, callback) {
let prom = new Promise((resolve, reject) => {
this.xmlParser.parseString(xml, (err, result) => {
if (err) return reject(err);
if (!result) {
return reject(new Error('Unable to parse XML.'));
}
let feed = null;
if (result.feed) {
feed = this.buildAtomFeed(result);
} else if (result.rss && result.rss.$ && result.rss.$.version && result.rss.$.version.match(/^2/)) {
feed = this.buildRSS2(result);
} else if (result['rdf:RDF']) {
feed = this.buildRSS1(result);
} else if (result.rss && result.rss.$ && result.rss.$.version && result.rss.$.version.match(/0\.9/)) {
feed = this.buildRSS0_9(result);
} else if (result.rss && this.options.defaultRSS) {
switch(this.options.defaultRSS) {
case 0.9:
feed = this.buildRSS0_9(result);
break;
case 1:
feed = this.buildRSS1(result);
break;
case 2:
feed = this.buildRSS2(result);
break;
default:
return reject(new Error("default RSS version not recognized."))
}
} else {
return reject(new Error("Feed not recognized as RSS 1 or 2."))
}
resolve(feed);
});
});
prom = utils.maybePromisify(callback, prom);
return prom;
}
parseURL(feedUrl, callback, redirectCount=0) {
let xml = '';
let get = feedUrl.indexOf('https') === 0 ? https.get : http.get;
let urlParts = url.parse(feedUrl);
let headers = Object.assign({}, DEFAULT_HEADERS, this.options.headers);
let timeout = null;
let prom = new Promise((resolve, reject) => {
const requestOpts = Object.assign({headers}, urlParts, this.options.requestOptions);
let req = get(requestOpts, (res) => {
if (this.options.maxRedirects && res.statusCode >= 300 && res.statusCode < 400 && res.headers['location']) {
if (redirectCount === this.options.maxRedirects) {
return reject(new Error("Too many redirects"));
} else {
const newLocation = url.resolve(feedUrl, res.headers['location']);
return this.parseURL(newLocation, null, redirectCount + 1).then(resolve, reject);
}
} else if (res.statusCode >= 300) {
return reject(new Error("Status code " + res.statusCode))
}
let encoding = utils.getEncodingFromContentType(res.headers['content-type']);
res.setEncoding(encoding);
res.on('data', (chunk) => {
xml += chunk;
});
res.on('end', () => {
return this.parseString(xml).then(resolve, reject);
});
})
req.on('error', reject);
timeout = setTimeout(() => {
return reject(new Error("Request timed out after " + this.options.timeout + "ms"));
}, this.options.timeout);
}).then(data => {
clearTimeout(timeout);
return Promise.resolve(data);
}, e => {
clearTimeout(timeout);
return Promise.reject(e);
});
prom = utils.maybePromisify(callback, prom);
return prom;
}
buildAtomFeed(xmlObj) {
let feed = {items: []};
utils.copyFromXML(xmlObj.feed, feed, this.options.customFields.feed);
if (xmlObj.feed.link) {
feed.link = utils.getLink(xmlObj.feed.link, 'alternate', 0);
feed.feedUrl = utils.getLink(xmlObj.feed.link, 'self', 1);
}
if (xmlObj.feed.title) {
let title = xmlObj.feed.title[0] || '';
if (title._) title = title._
if (title) feed.title = title;
}
if (xmlObj.feed.updated) {
feed.lastBuildDate = xmlObj.feed.updated[0];
}
feed.items = (xmlObj.feed.entry || []).map(entry => this.parseItemAtom(entry));
return feed;
}
parseItemAtom(entry) {
let item = {};
utils.copyFromXML(entry, item, this.options.customFields.item);
if (entry.title) {
let title = entry.title[0] || '';
if (title._) title = title._;
if (title) item.title = title;
}
if (entry.link && entry.link.length) {
item.link = utils.getLink(entry.link, 'alternate', 0);
}
if (entry.published && entry.published.length && entry.published[0].length) item.pubDate = new Date(entry.published[0]).toISOString();
if (!item.pubDate && entry.updated && entry.updated.length && entry.updated[0].length) item.pubDate = new Date(entry.updated[0]).toISOString();
if (entry.author && entry.author.length && entry.author[0].name && entry.author[0].name.length) item.author = entry.author[0].name[0];
if (entry.content && entry.content.length) {
item.content = utils.getContent(entry.content[0]);
item.contentSnippet = utils.getSnippet(item.content)
}
if (entry.summary && entry.summary.length) {
item.summary = utils.getContent(entry.summary[0]);
}
if (entry.id) {
item.id = entry.id[0];
}
this.setISODate(item);
return item;
}
buildRSS0_9(xmlObj) {
var channel = xmlObj.rss.channel[0];
var items = channel.item;
return this.buildRSS(channel, items);
}
buildRSS1(xmlObj) {
xmlObj = xmlObj['rdf:RDF'];
let channel = xmlObj.channel[0];
let items = xmlObj.item;
return this.buildRSS(channel, items);
}
buildRSS2(xmlObj) {
let channel = xmlObj.rss.channel[0];
let items = channel.item;
let feed = this.buildRSS(channel, items);
if (xmlObj.rss.$ && xmlObj.rss.$['xmlns:itunes']) {
this.decorateItunes(feed, channel);
}
return feed;
}
buildRSS(channel, items) {
items = items || [];
let feed = {items: []};
let feedFields = fields.feed.concat(this.options.customFields.feed);
let itemFields = fields.item.concat(this.options.customFields.item);
if (channel['atom:link'] && channel['atom:link'][0] && channel['atom:link'][0].$) {
feed.feedUrl = channel['atom:link'][0].$.href;
}
if (channel.image && channel.image[0] && channel.image[0].url) {
feed.image = {};
let image = channel.image[0];
if (image.link) feed.image.link = image.link[0];
if (image.url) feed.image.url = image.url[0];
if (image.title) feed.image.title = image.title[0];
if (image.width) feed.image.width = image.width[0];
if (image.height) feed.image.height = image.height[0];
}
const paginationLinks = this.generatePaginationLinks(channel);
if (Object.keys(paginationLinks).length) {
feed.paginationLinks = paginationLinks;
}
utils.copyFromXML(channel, feed, feedFields);
feed.items = items.map(xmlItem => this.parseItemRss(xmlItem, itemFields));
return feed;
}
parseItemRss(xmlItem, itemFields) {
let item = {};
utils.copyFromXML(xmlItem, item, itemFields);
if (xmlItem.enclosure) {
item.enclosure = xmlItem.enclosure[0].$;
}
if (xmlItem.description) {
item.content = utils.getContent(xmlItem.description[0]);
item.contentSnippet = utils.getSnippet(item.content);
}
if (xmlItem.guid) {
item.guid = xmlItem.guid[0];
if (item.guid._) item.guid = item.guid._;
}
if (xmlItem.$ && xmlItem.$['rdf:about']) {
item['rdf:about'] = xmlItem.$['rdf:about']
}
if (xmlItem.category) item.categories = xmlItem.category;
this.setISODate(item);
return item;
}
/**
* Add iTunes specific fields from XML to extracted JSON
*
* @access public
* @param {object} feed extracted
* @param {object} channel parsed XML
*/
decorateItunes(feed, channel) {
let items = channel.item || [];
let categories = [];
feed.itunes = {}
if (channel['itunes:owner']) {
let owner = {};
if(channel['itunes:owner'][0]['itunes:name']) {
owner.name = channel['itunes:owner'][0]['itunes:name'][0];
}
if(channel['itunes:owner'][0]['itunes:email']) {
owner.email = channel['itunes:owner'][0]['itunes:email'][0];
}
feed.itunes.owner = owner;
}
if (channel['itunes:image']) {
let image;
let hasImageHref = (channel['itunes:image'][0] &&
channel['itunes:image'][0].$ &&
channel['itunes:image'][0].$.href);
image = hasImageHref ? channel['itunes:image'][0].$.href : null;
if (image) {
feed.itunes.image = image;
}
}
if (channel['itunes:category']) {
const categoriesWithSubs = channel['itunes:category'].map((category) => {
return {
name: category && category.$ && category.$.text,
subs: category['itunes:category'] ?
category['itunes:category']
.map((subcategory) => ({
name: subcategory && subcategory.$ && subcategory.$.text
})) : null,
};
});
feed.itunes.categories = categoriesWithSubs.map((category) => category.name);
feed.itunes.categoriesWithSubs = categoriesWithSubs;
}
if (channel['itunes:keywords']) {
if (channel['itunes:keywords'].length > 1) {
feed.itunes.keywords = channel['itunes:keywords'].map(
keyword => keyword && keyword.$ && keyword.$.text
);
} else {
let keywords = channel['itunes:keywords'][0];
if (keywords && typeof keywords._ === 'string') {
keywords = keywords._;
}
if (keywords && keywords.$ && keywords.$.text) {
feed.itunes.keywords = keywords.$.text.split(',')
} else if (typeof keywords === "string") {
feed.itunes.keywords = keywords.split(',');
}
}
}
utils.copyFromXML(channel, feed.itunes, fields.podcastFeed);
items.forEach((item, index) => {
let entry = feed.items[index];
entry.itunes = {};
utils.copyFromXML(item, entry.itunes, fields.podcastItem);
let image = item['itunes:image'];
if (image && image[0] && image[0].$ && image[0].$.href) {
entry.itunes.image = image[0].$.href;
}
});
}
setISODate(item) {
let date = item.pubDate || item.date;
if (date) {
try {
item.isoDate = new Date(date.trim()).toISOString();
} catch (e) {
// Ignore bad date format
}
}
}
/**
* Generates a pagination object where the rel attribute is the key and href attribute is the value
* { self: 'self-url', first: 'first-url', ... }
*
* @access private
* @param {Object} channel parsed XML
* @returns {Object}
*/
generatePaginationLinks(channel) {
if (!channel['atom:link']) {
return {};
}
const paginationRelAttributes = ['self', 'first', 'next', 'prev', 'last'];
return channel['atom:link'].reduce((paginationLinks, link) => {
if (!link.$ || !paginationRelAttributes.includes(link.$.rel)) {
return paginationLinks;
}
paginationLinks[link.$.rel] = link.$.href;
return paginationLinks;
}, {});
}
}
module.exports = Parser;
+85
View File
@@ -0,0 +1,85 @@
const utils = module.exports = {};
const entities = require('entities');
const xml2js = require('xml2js');
utils.stripHtml = function(str) {
str = str.replace(/([^\n])<\/?(h|br|p|ul|ol|li|blockquote|section|table|tr|div)(?:.|\n)*?>([^\n])/gm, '$1\n$3')
str = str.replace(/<(?:.|\n)*?>/gm, '');
return str;
}
utils.getSnippet = function(str) {
return entities.decodeHTML(utils.stripHtml(str)).trim();
}
utils.getLink = function(links, rel, fallbackIdx) {
if (!links) return;
for (let i = 0; i < links.length; ++i) {
if (links[i].$.rel === rel) return links[i].$.href;
}
if (links[fallbackIdx]) return links[fallbackIdx].$.href;
}
utils.getContent = function(content) {
if (typeof content._ === 'string') {
return content._;
} else if (typeof content === 'object') {
let builder = new xml2js.Builder({headless: true, explicitRoot: true, rootName: 'div', renderOpts: {pretty: false}});
return builder.buildObject(content);
} else {
return content;
}
}
utils.copyFromXML = function(xml, dest, fields) {
fields.forEach(function(f) {
let from = f;
let to = f;
let options = {};
if (Array.isArray(f)) {
from = f[0];
to = f[1];
if (f.length > 2) {
options = f[2];
}
}
const { keepArray, includeSnippet } = options;
if (xml[from] !== undefined){
dest[to] = keepArray ? xml[from] : xml[from][0];
}
if (dest[to] && typeof dest[to]._ === 'string') {
dest[to]=dest[to]._;
}
if (includeSnippet && dest[to] && typeof dest[to] === 'string') {
dest[to + 'Snippet'] = utils.getSnippet(dest[to]);
}
})
}
utils.maybePromisify = function(callback, promise) {
if (!callback) return promise;
return promise.then(
data => setTimeout(() => callback(null, data)),
err => setTimeout(() => callback(err))
);
}
const DEFAULT_ENCODING = 'utf8';
const ENCODING_REGEX = /(encoding|charset)\s*=\s*(\S+)/;
const SUPPORTED_ENCODINGS = ['ascii', 'utf8', 'utf16le', 'ucs2', 'base64', 'latin1', 'binary', 'hex'];
const ENCODING_ALIASES = {
'utf-8': 'utf8',
'iso-8859-1': 'latin1',
}
utils.getEncodingFromContentType = function(contentType) {
contentType = contentType || '';
let match = contentType.match(ENCODING_REGEX);
let encoding = (match || [])[2] || '';
encoding = encoding.toLowerCase();
encoding = ENCODING_ALIASES[encoding] || encoding;
if (!encoding || SUPPORTED_ENCODINGS.indexOf(encoding) === -1) {
encoding = DEFAULT_ENCODING;
}
return encoding;
}
+50
View File
@@ -0,0 +1,50 @@
{
"name": "rss-parser",
"version": "3.13.0",
"main": "index.js",
"types": "index.d.ts",
"scripts": {
"test": "mocha --reporter-option maxDiffSize=0 --exit",
"build": "./scripts/build.sh"
},
"author": "Bobby Brennan",
"license": "MIT",
"devDependencies": {
"@babel/core": "^7.21.4",
"@babel/preset-env": "^7.21.4",
"@types/xml2js": "^0.4.3",
"babel-core": "^6.26.3",
"babel-loader": "^8.0.4",
"babel-preset-env": "^1.7.0",
"chai": "^3.4.1",
"express": "^4.16.3",
"mocha": "^10.2.0",
"puppeteer": "^5.2.1",
"webpack": "^4.46.0",
"webpack-cli": "^3.3.9"
},
"dependencies": {
"entities": "^2.0.3",
"xml2js": "^0.5.0"
},
"directories": {
"test": "test"
},
"repository": {
"type": "git",
"url": "git+https://github.com/bobby-brennan/rss-parser.git"
},
"bugs": {
"url": "https://github.com/bobby-brennan/rss-parser/issues"
},
"homepage": "https://github.com/bobby-brennan/rss-parser#readme",
"description": "A lightweight RSS parser, for Node and the browser",
"keywords": [
"RSS",
"RSS to JSON",
"RSS reader",
"RSS parser",
"RSS to JS",
"Feed reader"
]
}
+4
View File
@@ -0,0 +1,4 @@
set -e
webpack-cli --mode=development --target=web
webpack-cli --mode=production --target=web --output-filename=dist/[name].min.js --profile --json > dist/stats.json
+29
View File
@@ -0,0 +1,29 @@
var webpack = require("webpack");
module.exports = {
entry: {
"rss-parser": "./index.js"
},
output: {
path: __dirname,
filename: "dist/[name].js",
libraryTarget: 'umd',
globalObject: 'this',
library: 'RSSParser'
},
resolve: {
extensions: ['.js']
},
devtool: 'source-map',
module: {
rules: [{
test: /\.js$/,
loader: 'babel-loader?presets[]=@babel/preset-env',
}]
},
externals: {
xmlbuilder:'xmlbuilder'
},
node: {
fs: "empty"
}
}