Initial commit of working RSS Aggregator build

2026-05-12 17:04:02 -03:00
parent ea3a2ca53e
commit 7ac2f6e384
4962 changed files with 1032666 additions and 0 deletions
@@ -0,0 +1,24 @@
+# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
+
+name: tests
+
+on: [push, pull_request]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        node-version: [14.x, 16.x, 18.x]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Use Node.js ${{ matrix.node-version }}
+      uses: actions/setup-node@v3
+      with:
+        node-version: ${{ matrix.node-version }}
+    - run: npm ci
+    - run: npm test
@@ -0,0 +1,14 @@
+dist: trusty
+
+language: node_js
+node_js:
+  - "8"
+
+before_script:
+  - npm install -g mocha
+script: npm test
+
+addons:
+  apt:
+    packages:
+      - libnss3
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Bobby Brennan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,271 @@
+# rss-parser
+
+[![Version][npm-image]][npm-link]
+[![Build Status][build-image]][build-link]
+[![Downloads][downloads-image]][npm-link]
+
+[downloads-image]: https://img.shields.io/npm/dm/rss-parser.svg
+[npm-image]: https://img.shields.io/npm/v/rss-parser.svg
+[npm-link]: https://npmjs.org/package/rss-parser
+[build-image]: https://github.com/rbren/rss-parser/workflows/tests/badge.svg
+[build-link]: https://github.com/rbren/rss-parser/actions
+
+A small library for turning RSS XML feeds into JavaScript objects.
+
+## Installation
+```bash
+npm install --save rss-parser
+```
+
+## Usage
+You can parse RSS from a URL (`parser.parseURL`) or an XML string (`parser.parseString`).
+
+Both callbacks and Promises are supported.
+
+### NodeJS
+Here's an example in NodeJS using Promises with async/await:
+
+```js
+let Parser = require('rss-parser');
+let parser = new Parser();
+
+(async () => {
+
+  let feed = await parser.parseURL('https://www.reddit.com/.rss');
+  console.log(feed.title);
+
+  feed.items.forEach(item => {
+    console.log(item.title + ':' + item.link)
+  });
+
+})();
+```
+
+### TypeScript
+When using TypeScript, you can set a type to control the custom fields:
+
+```typescript
+import Parser from 'rss-parser';
+
+type CustomFeed = {foo: string};
+type CustomItem = {bar: number};
+
+const parser: Parser<CustomFeed, CustomItem> = new Parser({
+  customFields: {
+    feed: ['foo', 'baz'],
+    //            ^ will error because `baz` is not a key of CustomFeed
+    item: ['bar']
+  }
+});
+
+(async () => {
+
+  const feed = await parser.parseURL('https://www.reddit.com/.rss');
+  console.log(feed.title); // feed will have a `foo` property, type as a string
+
+  feed.items.forEach(item => {
+    console.log(item.title + ':' + item.link) // item will have a `bar` property type as a number
+  });
+})();
+```
+
+### Web
+> We recommend using a bundler like [webpack](https://webpack.js.org/), but we also provide
+> pre-built browser distributions in the `dist/` folder. If you use the pre-built distribution,
+> you'll need a [polyfill](https://github.com/taylorhakes/promise-polyfill) for Promise support.
+
+Here's an example in the browser using callbacks:
+
+```html
+<script src="/node_modules/rss-parser/dist/rss-parser.min.js"></script>
+<script>
+
+// Note: some RSS feeds can't be loaded in the browser due to CORS security.
+// To get around this, you can use a proxy.
+const CORS_PROXY = "https://cors-anywhere.herokuapp.com/"
+
+let parser = new RSSParser();
+parser.parseURL(CORS_PROXY + 'https://www.reddit.com/.rss', function(err, feed) {
+  if (err) throw err;
+  console.log(feed.title);
+  feed.items.forEach(function(entry) {
+    console.log(entry.title + ':' + entry.link);
+  })
+})
+
+</script>
+```
+
+### Upgrading from v2 to v3
+A few minor breaking changes were made in v3. Here's what you need to know:
+
+* You need to construct a `new Parser()` before calling `parseString` or `parseURL`
+* `parseFile` is no longer available (for better browser support)
+* `options` are now passed to the Parser constructor
+* `parsed.feed` is now just `feed` (top-level object removed)
+* `feed.entries` is now `feed.items` (to better match RSS XML)
+
+
+## Output
+Check out the full output format in [test/output/reddit.json](test/output/reddit.json)
+
+```yaml
+feedUrl: 'https://www.reddit.com/.rss'
+title: 'reddit: the front page of the internet'
+description: ""
+link: 'https://www.reddit.com/'
+items:
+    - title: 'The water is too deep, so he improvises'
+      link: 'https://www.reddit.com/r/funny/comments/3skxqc/the_water_is_too_deep_so_he_improvises/'
+      pubDate: 'Thu, 12 Nov 2015 21:16:39 +0000'
+      creator: "John Doe"
+      content: '<a href="http://example.com">this is a link</a> &amp; <b>this is bold text</b>'
+      contentSnippet: 'this is a link & this is bold text'
+      guid: 'https://www.reddit.com/r/funny/comments/3skxqc/the_water_is_too_deep_so_he_improvises/'
+      categories:
+          - funny
+      isoDate: '2015-11-12T21:16:39.000Z'
+```
+
+##### Notes:
+* The `contentSnippet` field strips out HTML tags and unescapes HTML entities
+* The `dc:` prefix will be removed from all fields
+* Both `dc:date` and `pubDate` will be available in ISO 8601 format as `isoDate`
+* If `author` is specified, but not `dc:creator`, `creator` will be set to `author` ([see article](http://www.lowter.com/blogs/2008/2/9/rss-dccreator-author))
+* Atom's `updated` becomes `lastBuildDate` for consistency
+
+## XML Options
+
+### Custom Fields
+If your RSS feed contains fields that aren't currently returned, you can access them using the `customFields` option.
+
+```js
+let parser = new Parser({
+  customFields: {
+    feed: ['otherTitle', 'extendedDescription'],
+    item: ['coAuthor','subtitle'],
+  }
+});
+
+parser.parseURL('https://www.reddit.com/.rss', function(err, feed) {
+  console.log(feed.extendedDescription);
+
+  feed.items.forEach(function(entry) {
+    console.log(entry.coAuthor + ':' + entry.subtitle);
+  })
+})
+```
+
+To rename fields, you can pass in an array with two items, in the format `[fromField, toField]`:
+
+```js
+let parser = new Parser({
+  customFields: {
+    item: [
+      ['dc:coAuthor', 'coAuthor'],
+    ]
+  }
+})
+```
+
+To pass additional flags, provide an object as the third array item. Currently there is one such flag:
+
+* `keepArray (false)` - set to `true` to return *all* values for fields that can have multiple entries.
+* `includeSnippet (false)` - set to `true` to add an additional field, `${toField}Snippet`, with HTML stripped out
+
+```js
+let parser = new Parser({
+  customFields: {
+    item: [
+      ['media:content', 'media:content', {keepArray: true}],
+    ]
+  }
+})
+```
+
+### Default RSS version
+If your RSS Feed doesn't contain a `<rss>` tag with a `version` attribute,
+you can pass a `defaultRSS` option for the Parser to use:
+```js
+let parser = new Parser({
+  defaultRSS: 2.0
+});
+```
+
+
+### xml2js passthrough
+`rss-parser` uses [xml2js](https://github.com/Leonidas-from-XIV/node-xml2js)
+to parse XML. You can pass [these options](https://github.com/Leonidas-from-XIV/node-xml2js#options)
+to `new xml2js.Parser()` by specifying `options.xml2js`:
+
+```js
+let parser = new Parser({
+  xml2js: {
+    emptyTag: '--EMPTY--',
+  }
+});
+```
+
+## HTTP Options
+
+### Timeout
+You can set the amount of time (in milliseconds) to wait before the HTTP request times out (default 60 seconds):
+
+```js
+let parser = new Parser({
+  timeout: 1000,
+});
+```
+
+### Headers
+You can pass headers to the HTTP request:
+```js
+let parser = new Parser({
+  headers: {'User-Agent': 'something different'},
+});
+```
+
+### Redirects
+By default, `parseURL` will follow up to five redirects. You can change this
+with `options.maxRedirects`.
+
+```js
+let parser = new Parser({maxRedirects: 100});
+```
+
+### Request passthrough
+`rss-parser` uses [http](https://nodejs.org/docs/latest/api/http.html#http_http_get_url_options_callback)/[https](https://nodejs.org/docs/latest/api/https.html#https_https_get_url_options_callback) module
+to do requests. You can pass [these options](https://nodejs.org/docs/latest/api/https.html#https_https_request_options_callback)
+to `http.get()`/`https.get()` by specifying `options.requestOptions`:
+
+e.g. to allow unauthorized certificate
+```js
+let parser = new Parser({
+  requestOptions: {
+    rejectUnauthorized: false
+  }
+});
+```
+
+## Contributing
+Contributions are welcome! If you are adding a feature or fixing a bug, please be sure to add a [test case](https://github.com/bobby-brennan/rss-parser/tree/master/test/input)
+
+### Running Tests
+The tests run the RSS parser for several sample RSS feeds in `test/input` and outputs the resulting JSON into `test/output`. If there are any changes to the output files the tests will fail.
+
+To check if your changes affect the output of any test cases, run
+
+`npm test`
+
+To update the output files with your changes, run
+
+`WRITE_GOLDEN=true npm test`
+
+### Publishing Releases
+```bash
+npm run build
+git commit -a -m "Build distribution"
+npm version minor # or major/patch
+npm publish
+git push --follow-tags
+```
@@ -0,0 +1,21 @@
+{
+  "name": "rss-parser",
+  "description": "",
+  "version": "1.1.0",
+  "main": "dist/rss-parser.js",
+  "authors": [
+    "Bobby Brennan"
+  ],
+  "license": "MIT",
+  "homepage": "https://github.com/bobby-brennan/rss-parser",
+  "moduleType": [
+    "node"
+  ],
+  "ignore": [
+    "**/.*",
+    "node_modules",
+    "bower_components",
+    "test",
+    "tests"
+  ]
+}
@@ -0,0 +1,119 @@
+import { Options } from 'xml2js';
+import { RequestOptions } from 'https';
+
+declare namespace Parser {
+  type CustomFieldItem<U> = keyof U | (string | { keepArray: boolean })[]
+    
+  export interface CustomFields<T, U> {
+    readonly feed?: Array<keyof T>;
+    readonly item?: CustomFieldItem<U>[] | CustomFieldItem<U>[][];
+  }
+
+  export interface ParserOptions<T, U> {
+    readonly xml2js?: Options;
+    readonly requestOptions?: RequestOptions;
+    readonly headers?: Record<string, string>;
+    readonly defaultRSS?: number;
+    readonly maxRedirects?: number;
+    readonly customFields?: CustomFields<T, U>;
+    readonly timeout?: number;
+  }
+
+  export interface Enclosure {
+    url: string;
+    length?: number;
+    type?: string;
+  }
+
+  export interface Item {
+    link?: string;
+    guid?: string;
+    title?: string;
+    pubDate?: string;
+    creator?: string;
+    summary?: string;
+    content?: string;
+    isoDate?: string;
+    categories?: string[];
+    contentSnippet?: string;
+    enclosure?: Enclosure;
+  }
+
+  export interface PaginationLinks {
+    self?: string;
+    first?: string;
+    next?: string;
+    last?: string;
+    prev?: string;
+  }
+
+  export interface Output<U> {
+    image?: {
+      link?: string;
+      url: string;
+      title?: string;
+    },
+    paginationLinks?: PaginationLinks;
+    link?: string;
+    title?: string;
+    items: (U & Item)[];
+    feedUrl?: string;
+    description?: string;
+    itunes?: {
+      [key: string]: any;
+      image?: string;
+      owner?: {
+        name?: string;
+        email?: string;
+      };
+      author?: string;
+      summary?: string;
+      explicit?: string;
+      categories?: string[];
+      keywords?: string[];
+    };
+  }
+}
+
+/**
+ * Class that handles all parsing or URL, or even XML, RSS feed to JSON.
+ */
+declare class Parser<T = {[key: string]: any}, U = {[key: string]: any}> {
+  /**
+   * @param options - Parser options.
+   */
+  constructor(options?: Parser.ParserOptions<T, U>);
+  /**
+   * Parse XML content to JSON.
+   *
+   * @param xml - The xml to be parsed.
+   * @param callback - Traditional callback.
+   *
+   * @returns Promise that has the same Output as the callback.
+   */
+  parseString(
+    xml: string,
+    callback?: (err: Error, feed: Parser.Output<U>) => void
+  ): Promise<T & Parser.Output<U>>;
+
+  /**
+   * Parse URL content to JSON.
+   *
+   * @param feedUrl - The url that needs to be parsed to JSON.
+   * @param callback - Traditional callback.
+   * @param redirectCount - Max of redirects, default is set to five.
+   *
+   * @example
+   * await parseURL('https://www.reddit.com/.rss');
+   * parseURL('https://www.reddit.com/.rss', (err, feed) => { ... });
+   *
+   * @returns Promise that has the same Output as the callback.
+   */
+  parseURL(
+    feedUrl: string,
+    callback?: (err: Error, feed: Parser.Output<U>) => void,
+    redirectCount?: number
+  ): Promise<T & Parser.Output<U>>;
+}
+
+export = Parser;
@@ -0,0 +1,4 @@
+'use strict';
+
+module.exports = require('./lib/parser');
+
@@ -0,0 +1,73 @@
+const fields = module.exports = {};
+
+fields.feed = [
+  ['author', 'creator'],
+  ['dc:publisher', 'publisher'],
+  ['dc:creator', 'creator'],
+  ['dc:source', 'source'],
+  ['dc:title', 'title'],
+  ['dc:type', 'type'],
+  'title',
+  'description',
+  'author',
+  'pubDate',
+  'webMaster',
+  'managingEditor',
+  'generator',
+  'link',
+  'language',
+  'copyright',
+  'lastBuildDate',
+  'docs',
+  'generator',
+  'ttl',
+  'rating',
+  'skipHours',
+  'skipDays',
+];
+
+fields.item = [
+  ['author', 'creator'],
+  ['dc:creator', 'creator'],
+  ['dc:date', 'date'],
+  ['dc:language', 'language'],
+  ['dc:rights', 'rights'],
+  ['dc:source', 'source'],
+  ['dc:title', 'title'],
+  'title',
+  'link',
+  'pubDate',
+  'author',
+  'summary',
+  ['content:encoded', 'content:encoded', {includeSnippet: true}],
+  'enclosure',
+  'dc:creator',
+  'dc:date',
+  'comments',
+];
+
+var mapItunesField = function(f) {
+  return ['itunes:' + f, f];
+}
+
+fields.podcastFeed = ([
+  'author',
+  'subtitle',
+  'summary',
+  'explicit'
+]).map(mapItunesField);
+
+fields.podcastItem = ([
+  'author',
+  'subtitle',
+  'summary',
+  'explicit',
+  'duration',
+  'image',
+  'episode',
+  'image',
+  'season',
+  'keywords',
+  'episodeType'
+]).map(mapItunesField);
+
@@ -0,0 +1,349 @@
+"use strict";
+const http = require('http');
+const https = require('https');
+const xml2js = require('xml2js');
+const url = require('url');
+
+const fields = require('./fields');
+const utils = require('./utils');
+
+const DEFAULT_HEADERS = {
+  'User-Agent': 'rss-parser',
+  'Accept': 'application/rss+xml',
+}
+const DEFAULT_MAX_REDIRECTS = 5;
+const DEFAULT_TIMEOUT = 60000;
+
+class Parser {
+  constructor(options={}) {
+    options.headers = options.headers || {};
+    options.xml2js = options.xml2js || {};
+    options.customFields = options.customFields || {};
+    options.customFields.item = options.customFields.item || [];
+    options.customFields.feed = options.customFields.feed || [];
+    options.requestOptions = options.requestOptions || {};
+    if (!options.maxRedirects) options.maxRedirects = DEFAULT_MAX_REDIRECTS;
+    if (!options.timeout) options.timeout = DEFAULT_TIMEOUT;
+    this.options = options;
+    this.xmlParser = new xml2js.Parser(this.options.xml2js);
+  }
+
+  parseString(xml, callback) {
+    let prom = new Promise((resolve, reject) => {
+      this.xmlParser.parseString(xml, (err, result) => {
+        if (err) return reject(err);
+        if (!result) {
+          return reject(new Error('Unable to parse XML.'));
+        }
+        let feed = null;
+        if (result.feed) {
+          feed = this.buildAtomFeed(result);
+        } else if (result.rss && result.rss.$ && result.rss.$.version && result.rss.$.version.match(/^2/)) {
+          feed = this.buildRSS2(result);
+        } else if (result['rdf:RDF']) {
+          feed = this.buildRSS1(result);
+        } else if (result.rss && result.rss.$ && result.rss.$.version && result.rss.$.version.match(/0\.9/)) {
+          feed = this.buildRSS0_9(result);
+        } else if (result.rss && this.options.defaultRSS) {
+          switch(this.options.defaultRSS) {
+            case 0.9:
+              feed = this.buildRSS0_9(result);
+              break;
+            case 1:
+              feed = this.buildRSS1(result);
+              break;
+            case 2:
+              feed = this.buildRSS2(result);
+              break;
+            default:
+              return reject(new Error("default RSS version not recognized."))
+          }
+        } else {
+          return reject(new Error("Feed not recognized as RSS 1 or 2."))
+        }
+        resolve(feed);
+      });
+    });
+    prom = utils.maybePromisify(callback, prom);
+    return prom;
+  }
+
+  parseURL(feedUrl, callback, redirectCount=0) {
+    let xml = '';
+    let get = feedUrl.indexOf('https') === 0 ? https.get : http.get;
+    let urlParts = url.parse(feedUrl);
+    let headers = Object.assign({}, DEFAULT_HEADERS, this.options.headers);
+    let timeout = null;
+    let prom = new Promise((resolve, reject) => {
+      const requestOpts = Object.assign({headers}, urlParts, this.options.requestOptions);
+      let req = get(requestOpts, (res) => {
+        if (this.options.maxRedirects && res.statusCode >= 300 && res.statusCode < 400 && res.headers['location']) {
+          if (redirectCount === this.options.maxRedirects) {
+            return reject(new Error("Too many redirects"));
+          } else {
+            const newLocation = url.resolve(feedUrl, res.headers['location']);
+            return this.parseURL(newLocation, null, redirectCount + 1).then(resolve, reject);
+          }
+        } else if (res.statusCode >= 300) {
+          return reject(new Error("Status code " + res.statusCode))
+        }
+        let encoding = utils.getEncodingFromContentType(res.headers['content-type']);
+        res.setEncoding(encoding);
+        res.on('data', (chunk) => {
+          xml += chunk;
+        });
+        res.on('end', () => {
+          return this.parseString(xml).then(resolve, reject);
+        });
+      })
+      req.on('error', reject);
+      timeout = setTimeout(() => {
+        return reject(new Error("Request timed out after " + this.options.timeout + "ms"));
+      }, this.options.timeout);
+    }).then(data => {
+      clearTimeout(timeout);
+      return Promise.resolve(data);
+    }, e => {
+      clearTimeout(timeout);
+      return Promise.reject(e);
+    });
+    prom = utils.maybePromisify(callback, prom);
+    return prom;
+  }
+
+  buildAtomFeed(xmlObj) {
+    let feed = {items: []};
+    utils.copyFromXML(xmlObj.feed, feed, this.options.customFields.feed);
+    if (xmlObj.feed.link) {
+      feed.link = utils.getLink(xmlObj.feed.link, 'alternate', 0);
+      feed.feedUrl = utils.getLink(xmlObj.feed.link, 'self', 1);
+    }
+    if (xmlObj.feed.title) {
+      let title = xmlObj.feed.title[0] || '';
+      if (title._) title = title._
+      if (title) feed.title = title;
+    }
+    if (xmlObj.feed.updated) {
+      feed.lastBuildDate = xmlObj.feed.updated[0];
+    }
+    feed.items = (xmlObj.feed.entry || []).map(entry => this.parseItemAtom(entry));
+    return feed;
+  }
+
+  parseItemAtom(entry) {
+    let item = {};
+    utils.copyFromXML(entry, item, this.options.customFields.item);
+    if (entry.title) {
+      let title = entry.title[0] || '';
+      if (title._) title = title._;
+      if (title) item.title = title;
+    }
+    if (entry.link && entry.link.length) {
+      item.link = utils.getLink(entry.link, 'alternate', 0);
+    }
+    if (entry.published && entry.published.length && entry.published[0].length) item.pubDate = new Date(entry.published[0]).toISOString();
+    if (!item.pubDate && entry.updated && entry.updated.length && entry.updated[0].length) item.pubDate = new Date(entry.updated[0]).toISOString();
+    if (entry.author && entry.author.length && entry.author[0].name && entry.author[0].name.length) item.author = entry.author[0].name[0];
+    if (entry.content && entry.content.length) {
+      item.content = utils.getContent(entry.content[0]);
+      item.contentSnippet = utils.getSnippet(item.content)
+    }
+    if (entry.summary && entry.summary.length) {
+      item.summary = utils.getContent(entry.summary[0]);
+    }
+    if (entry.id) {
+      item.id = entry.id[0];
+    }
+    this.setISODate(item);
+    return item;
+  }
+
+  buildRSS0_9(xmlObj) {
+    var channel = xmlObj.rss.channel[0];
+    var items = channel.item;
+    return this.buildRSS(channel, items);
+  }
+
+  buildRSS1(xmlObj) {
+    xmlObj = xmlObj['rdf:RDF'];
+    let channel = xmlObj.channel[0];
+    let items = xmlObj.item;
+    return this.buildRSS(channel, items);
+  }
+
+  buildRSS2(xmlObj) {
+    let channel = xmlObj.rss.channel[0];
+    let items = channel.item;
+    let feed = this.buildRSS(channel, items);
+    if (xmlObj.rss.$ && xmlObj.rss.$['xmlns:itunes']) {
+      this.decorateItunes(feed, channel);
+    }
+    return feed;
+  }
+
+  buildRSS(channel, items) {
+    items = items || [];
+    let feed = {items: []};
+    let feedFields = fields.feed.concat(this.options.customFields.feed);
+    let itemFields = fields.item.concat(this.options.customFields.item);
+    if (channel['atom:link'] && channel['atom:link'][0] && channel['atom:link'][0].$) {
+      feed.feedUrl = channel['atom:link'][0].$.href;
+    }
+    if (channel.image && channel.image[0] && channel.image[0].url) {
+      feed.image = {};
+      let image = channel.image[0];
+      if (image.link) feed.image.link = image.link[0];
+      if (image.url) feed.image.url = image.url[0];
+      if (image.title) feed.image.title = image.title[0];
+      if (image.width) feed.image.width = image.width[0];
+      if (image.height) feed.image.height = image.height[0];
+    }
+    const paginationLinks = this.generatePaginationLinks(channel);
+    if (Object.keys(paginationLinks).length) {
+      feed.paginationLinks = paginationLinks;
+    }
+    utils.copyFromXML(channel, feed, feedFields);
+    feed.items = items.map(xmlItem => this.parseItemRss(xmlItem, itemFields));
+    return feed;
+  }
+
+  parseItemRss(xmlItem, itemFields) {
+    let item = {};
+    utils.copyFromXML(xmlItem, item, itemFields);
+    if (xmlItem.enclosure) {
+      item.enclosure = xmlItem.enclosure[0].$;
+    }
+    if (xmlItem.description) {
+      item.content = utils.getContent(xmlItem.description[0]);
+      item.contentSnippet = utils.getSnippet(item.content);
+    }
+    if (xmlItem.guid) {
+      item.guid = xmlItem.guid[0];
+      if (item.guid._) item.guid = item.guid._;
+    }
+    if (xmlItem.$ && xmlItem.$['rdf:about']) {
+      item['rdf:about'] = xmlItem.$['rdf:about']
+    }
+    if (xmlItem.category) item.categories = xmlItem.category;
+    this.setISODate(item);
+    return item;
+  }
+
+  /**
+   * Add iTunes specific fields from XML to extracted JSON
+   *
+   * @access public
+   * @param {object} feed extracted
+   * @param {object} channel parsed XML
+   */
+  decorateItunes(feed, channel) {
+    let items = channel.item || [];
+    let categories = [];
+    feed.itunes = {}
+
+    if (channel['itunes:owner']) {
+      let owner = {};
+
+      if(channel['itunes:owner'][0]['itunes:name']) {
+        owner.name = channel['itunes:owner'][0]['itunes:name'][0];
+      }
+      if(channel['itunes:owner'][0]['itunes:email']) {
+        owner.email = channel['itunes:owner'][0]['itunes:email'][0];
+      }
+      feed.itunes.owner = owner;
+    }
+
+    if (channel['itunes:image']) {
+      let image;
+      let hasImageHref = (channel['itunes:image'][0] &&
+        channel['itunes:image'][0].$ &&
+        channel['itunes:image'][0].$.href);
+      image = hasImageHref ? channel['itunes:image'][0].$.href : null;
+      if (image) {
+        feed.itunes.image = image;
+      }
+    }
+
+    if (channel['itunes:category']) {
+      const categoriesWithSubs = channel['itunes:category'].map((category) => {
+        return {
+          name: category && category.$ && category.$.text,
+          subs: category['itunes:category'] ?
+            category['itunes:category']
+              .map((subcategory) => ({
+                name: subcategory && subcategory.$ && subcategory.$.text
+              })) : null,
+        };
+      });
+
+      feed.itunes.categories = categoriesWithSubs.map((category) => category.name);
+      feed.itunes.categoriesWithSubs = categoriesWithSubs;
+    }
+
+    if (channel['itunes:keywords']) {
+      if (channel['itunes:keywords'].length > 1) {
+        feed.itunes.keywords = channel['itunes:keywords'].map(
+          keyword => keyword && keyword.$ && keyword.$.text
+        );
+      } else {
+        let keywords = channel['itunes:keywords'][0];
+        if (keywords && typeof keywords._ === 'string') {
+          keywords = keywords._;
+        }
+
+        if (keywords && keywords.$ && keywords.$.text) {
+          feed.itunes.keywords = keywords.$.text.split(',')
+        } else if (typeof keywords === "string") {
+          feed.itunes.keywords = keywords.split(',');
+        }
+      }
+    }
+
+    utils.copyFromXML(channel, feed.itunes, fields.podcastFeed);
+    items.forEach((item, index) => {
+      let entry = feed.items[index];
+      entry.itunes = {};
+      utils.copyFromXML(item, entry.itunes, fields.podcastItem);
+      let image = item['itunes:image'];
+      if (image && image[0] && image[0].$ && image[0].$.href) {
+        entry.itunes.image = image[0].$.href;
+      }
+    });
+  }
+
+  setISODate(item) {
+    let date = item.pubDate || item.date;
+    if (date) {
+      try {
+        item.isoDate = new Date(date.trim()).toISOString();
+      } catch (e) {
+        // Ignore bad date format
+      }
+    }
+  }
+
+  /**
+   * Generates a pagination object where the rel attribute is the key and href attribute is the value
+   *  { self: 'self-url', first: 'first-url', ...  }
+   *
+   * @access private
+   * @param {Object} channel parsed XML
+   * @returns {Object}
+   */
+  generatePaginationLinks(channel) {
+    if (!channel['atom:link']) {
+      return {};
+    }
+    const paginationRelAttributes = ['self', 'first', 'next', 'prev', 'last'];
+
+    return channel['atom:link'].reduce((paginationLinks, link) => {
+      if (!link.$ || !paginationRelAttributes.includes(link.$.rel)) {
+        return paginationLinks;
+      }
+      paginationLinks[link.$.rel] = link.$.href;
+      return paginationLinks;
+    }, {});
+  }
+}
+
+module.exports = Parser;
@@ -0,0 +1,85 @@
+const utils = module.exports = {};
+const entities = require('entities');
+const xml2js = require('xml2js');
+
+utils.stripHtml = function(str) {
+  str = str.replace(/([^\n])<\/?(h|br|p|ul|ol|li|blockquote|section|table|tr|div)(?:.|\n)*?>([^\n])/gm, '$1\n$3')
+  str = str.replace(/<(?:.|\n)*?>/gm, '');
+  return str;
+}
+
+utils.getSnippet = function(str) {
+  return entities.decodeHTML(utils.stripHtml(str)).trim();
+}
+
+utils.getLink = function(links, rel, fallbackIdx) {
+  if (!links) return;
+  for (let i = 0; i < links.length; ++i) {
+    if (links[i].$.rel === rel) return links[i].$.href;
+  }
+  if (links[fallbackIdx]) return links[fallbackIdx].$.href;
+}
+
+utils.getContent = function(content) {
+  if (typeof content._ === 'string') {
+    return content._;
+  } else if (typeof content === 'object') {
+    let builder = new xml2js.Builder({headless: true, explicitRoot: true, rootName: 'div', renderOpts: {pretty: false}});
+    return builder.buildObject(content);
+  } else {
+    return content;
+  }
+}
+
+utils.copyFromXML = function(xml, dest, fields) {
+  fields.forEach(function(f) {
+    let from = f;
+    let to = f;
+    let options = {};
+    if (Array.isArray(f)) {
+      from = f[0];
+      to = f[1];
+      if (f.length > 2) {
+        options = f[2];
+      }
+    }
+    const { keepArray, includeSnippet } = options;
+    if (xml[from] !== undefined){
+      dest[to] = keepArray ? xml[from] : xml[from][0];
+    }
+    if (dest[to] && typeof dest[to]._ === 'string') {
+      dest[to]=dest[to]._;
+    }
+    if (includeSnippet && dest[to] && typeof dest[to] === 'string') {
+      dest[to + 'Snippet'] = utils.getSnippet(dest[to]);
+    }
+  })
+}
+
+utils.maybePromisify = function(callback, promise) {
+  if (!callback) return promise;
+  return promise.then(
+    data => setTimeout(() => callback(null, data)),
+    err => setTimeout(() => callback(err))
+  );
+}
+
+const DEFAULT_ENCODING = 'utf8';
+const ENCODING_REGEX = /(encoding|charset)\s*=\s*(\S+)/;
+const SUPPORTED_ENCODINGS = ['ascii', 'utf8', 'utf16le', 'ucs2', 'base64', 'latin1', 'binary', 'hex'];
+const ENCODING_ALIASES = {
+  'utf-8': 'utf8',
+  'iso-8859-1': 'latin1',
+}
+
+utils.getEncodingFromContentType = function(contentType) {
+  contentType = contentType || '';
+  let match = contentType.match(ENCODING_REGEX);
+  let encoding = (match || [])[2] || '';
+  encoding = encoding.toLowerCase();
+  encoding = ENCODING_ALIASES[encoding] || encoding;
+  if (!encoding || SUPPORTED_ENCODINGS.indexOf(encoding) === -1) {
+    encoding = DEFAULT_ENCODING;
+  }
+  return encoding;
+}
@@ -0,0 +1,50 @@
+{
+  "name": "rss-parser",
+  "version": "3.13.0",
+  "main": "index.js",
+  "types": "index.d.ts",
+  "scripts": {
+    "test": "mocha --reporter-option maxDiffSize=0 --exit",
+    "build": "./scripts/build.sh"
+  },
+  "author": "Bobby Brennan",
+  "license": "MIT",
+  "devDependencies": {
+    "@babel/core": "^7.21.4",
+    "@babel/preset-env": "^7.21.4",
+    "@types/xml2js": "^0.4.3",
+    "babel-core": "^6.26.3",
+    "babel-loader": "^8.0.4",
+    "babel-preset-env": "^1.7.0",
+    "chai": "^3.4.1",
+    "express": "^4.16.3",
+    "mocha": "^10.2.0",
+    "puppeteer": "^5.2.1",
+    "webpack": "^4.46.0",
+    "webpack-cli": "^3.3.9"
+  },
+  "dependencies": {
+    "entities": "^2.0.3",
+    "xml2js": "^0.5.0"
+  },
+  "directories": {
+    "test": "test"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/bobby-brennan/rss-parser.git"
+  },
+  "bugs": {
+    "url": "https://github.com/bobby-brennan/rss-parser/issues"
+  },
+  "homepage": "https://github.com/bobby-brennan/rss-parser#readme",
+  "description": "A lightweight RSS parser, for Node and the browser",
+  "keywords": [
+    "RSS",
+    "RSS to JSON",
+    "RSS reader",
+    "RSS parser",
+    "RSS to JS",
+    "Feed reader"
+  ]
+}
@@ -0,0 +1,4 @@
+set -e
+webpack-cli --mode=development --target=web
+webpack-cli --mode=production --target=web --output-filename=dist/[name].min.js --profile --json > dist/stats.json
+
@@ -0,0 +1,29 @@
+var webpack = require("webpack");
+module.exports = {
+  entry: {
+    "rss-parser": "./index.js"
+  },
+  output: {
+    path: __dirname,
+    filename: "dist/[name].js",
+    libraryTarget: 'umd',
+    globalObject: 'this',
+    library: 'RSSParser'
+  },
+  resolve: {
+    extensions: ['.js']
+  },
+  devtool: 'source-map',
+  module: {
+    rules: [{
+      test: /\.js$/,
+      loader: 'babel-loader?presets[]=@babel/preset-env',
+    }]
+  },
+  externals: {
+    xmlbuilder:'xmlbuilder'
+  },
+  node: {
+    fs: "empty"
+  }
+}