Initial commit of working RSS Aggregator build
This commit is contained in:
+24
@@ -0,0 +1,24 @@
|
||||
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
|
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
|
||||
|
||||
name: tests
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [14.x, 16.x, 18.x]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Use Node.js ${{ matrix.node-version }}
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
- run: npm ci
|
||||
- run: npm test
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
dist: trusty
|
||||
|
||||
language: node_js
|
||||
node_js:
|
||||
- "8"
|
||||
|
||||
before_script:
|
||||
- npm install -g mocha
|
||||
script: npm test
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- libnss3
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2016 Bobby Brennan
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
+271
@@ -0,0 +1,271 @@
|
||||
# rss-parser
|
||||
|
||||
[![Version][npm-image]][npm-link]
|
||||
[![Build Status][build-image]][build-link]
|
||||
[![Downloads][downloads-image]][npm-link]
|
||||
|
||||
[downloads-image]: https://img.shields.io/npm/dm/rss-parser.svg
|
||||
[npm-image]: https://img.shields.io/npm/v/rss-parser.svg
|
||||
[npm-link]: https://npmjs.org/package/rss-parser
|
||||
[build-image]: https://github.com/rbren/rss-parser/workflows/tests/badge.svg
|
||||
[build-link]: https://github.com/rbren/rss-parser/actions
|
||||
|
||||
A small library for turning RSS XML feeds into JavaScript objects.
|
||||
|
||||
## Installation
|
||||
```bash
|
||||
npm install --save rss-parser
|
||||
```
|
||||
|
||||
## Usage
|
||||
You can parse RSS from a URL (`parser.parseURL`) or an XML string (`parser.parseString`).
|
||||
|
||||
Both callbacks and Promises are supported.
|
||||
|
||||
### NodeJS
|
||||
Here's an example in NodeJS using Promises with async/await:
|
||||
|
||||
```js
|
||||
let Parser = require('rss-parser');
|
||||
let parser = new Parser();
|
||||
|
||||
(async () => {
|
||||
|
||||
let feed = await parser.parseURL('https://www.reddit.com/.rss');
|
||||
console.log(feed.title);
|
||||
|
||||
feed.items.forEach(item => {
|
||||
console.log(item.title + ':' + item.link)
|
||||
});
|
||||
|
||||
})();
|
||||
```
|
||||
|
||||
### TypeScript
|
||||
When using TypeScript, you can set a type to control the custom fields:
|
||||
|
||||
```typescript
|
||||
import Parser from 'rss-parser';
|
||||
|
||||
type CustomFeed = {foo: string};
|
||||
type CustomItem = {bar: number};
|
||||
|
||||
const parser: Parser<CustomFeed, CustomItem> = new Parser({
|
||||
customFields: {
|
||||
feed: ['foo', 'baz'],
|
||||
// ^ will error because `baz` is not a key of CustomFeed
|
||||
item: ['bar']
|
||||
}
|
||||
});
|
||||
|
||||
(async () => {
|
||||
|
||||
const feed = await parser.parseURL('https://www.reddit.com/.rss');
|
||||
console.log(feed.title); // feed will have a `foo` property, type as a string
|
||||
|
||||
feed.items.forEach(item => {
|
||||
console.log(item.title + ':' + item.link) // item will have a `bar` property type as a number
|
||||
});
|
||||
})();
|
||||
```
|
||||
|
||||
### Web
|
||||
> We recommend using a bundler like [webpack](https://webpack.js.org/), but we also provide
|
||||
> pre-built browser distributions in the `dist/` folder. If you use the pre-built distribution,
|
||||
> you'll need a [polyfill](https://github.com/taylorhakes/promise-polyfill) for Promise support.
|
||||
|
||||
Here's an example in the browser using callbacks:
|
||||
|
||||
```html
|
||||
<script src="/node_modules/rss-parser/dist/rss-parser.min.js"></script>
|
||||
<script>
|
||||
|
||||
// Note: some RSS feeds can't be loaded in the browser due to CORS security.
|
||||
// To get around this, you can use a proxy.
|
||||
const CORS_PROXY = "https://cors-anywhere.herokuapp.com/"
|
||||
|
||||
let parser = new RSSParser();
|
||||
parser.parseURL(CORS_PROXY + 'https://www.reddit.com/.rss', function(err, feed) {
|
||||
if (err) throw err;
|
||||
console.log(feed.title);
|
||||
feed.items.forEach(function(entry) {
|
||||
console.log(entry.title + ':' + entry.link);
|
||||
})
|
||||
})
|
||||
|
||||
</script>
|
||||
```
|
||||
|
||||
### Upgrading from v2 to v3
|
||||
A few minor breaking changes were made in v3. Here's what you need to know:
|
||||
|
||||
* You need to construct a `new Parser()` before calling `parseString` or `parseURL`
|
||||
* `parseFile` is no longer available (for better browser support)
|
||||
* `options` are now passed to the Parser constructor
|
||||
* `parsed.feed` is now just `feed` (top-level object removed)
|
||||
* `feed.entries` is now `feed.items` (to better match RSS XML)
|
||||
|
||||
|
||||
## Output
|
||||
Check out the full output format in [test/output/reddit.json](test/output/reddit.json)
|
||||
|
||||
```yaml
|
||||
feedUrl: 'https://www.reddit.com/.rss'
|
||||
title: 'reddit: the front page of the internet'
|
||||
description: ""
|
||||
link: 'https://www.reddit.com/'
|
||||
items:
|
||||
- title: 'The water is too deep, so he improvises'
|
||||
link: 'https://www.reddit.com/r/funny/comments/3skxqc/the_water_is_too_deep_so_he_improvises/'
|
||||
pubDate: 'Thu, 12 Nov 2015 21:16:39 +0000'
|
||||
creator: "John Doe"
|
||||
content: '<a href="http://example.com">this is a link</a> & <b>this is bold text</b>'
|
||||
contentSnippet: 'this is a link & this is bold text'
|
||||
guid: 'https://www.reddit.com/r/funny/comments/3skxqc/the_water_is_too_deep_so_he_improvises/'
|
||||
categories:
|
||||
- funny
|
||||
isoDate: '2015-11-12T21:16:39.000Z'
|
||||
```
|
||||
|
||||
##### Notes:
|
||||
* The `contentSnippet` field strips out HTML tags and unescapes HTML entities
|
||||
* The `dc:` prefix will be removed from all fields
|
||||
* Both `dc:date` and `pubDate` will be available in ISO 8601 format as `isoDate`
|
||||
* If `author` is specified, but not `dc:creator`, `creator` will be set to `author` ([see article](http://www.lowter.com/blogs/2008/2/9/rss-dccreator-author))
|
||||
* Atom's `updated` becomes `lastBuildDate` for consistency
|
||||
|
||||
## XML Options
|
||||
|
||||
### Custom Fields
|
||||
If your RSS feed contains fields that aren't currently returned, you can access them using the `customFields` option.
|
||||
|
||||
```js
|
||||
let parser = new Parser({
|
||||
customFields: {
|
||||
feed: ['otherTitle', 'extendedDescription'],
|
||||
item: ['coAuthor','subtitle'],
|
||||
}
|
||||
});
|
||||
|
||||
parser.parseURL('https://www.reddit.com/.rss', function(err, feed) {
|
||||
console.log(feed.extendedDescription);
|
||||
|
||||
feed.items.forEach(function(entry) {
|
||||
console.log(entry.coAuthor + ':' + entry.subtitle);
|
||||
})
|
||||
})
|
||||
```
|
||||
|
||||
To rename fields, you can pass in an array with two items, in the format `[fromField, toField]`:
|
||||
|
||||
```js
|
||||
let parser = new Parser({
|
||||
customFields: {
|
||||
item: [
|
||||
['dc:coAuthor', 'coAuthor'],
|
||||
]
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
To pass additional flags, provide an object as the third array item. Currently there is one such flag:
|
||||
|
||||
* `keepArray (false)` - set to `true` to return *all* values for fields that can have multiple entries.
|
||||
* `includeSnippet (false)` - set to `true` to add an additional field, `${toField}Snippet`, with HTML stripped out
|
||||
|
||||
```js
|
||||
let parser = new Parser({
|
||||
customFields: {
|
||||
item: [
|
||||
['media:content', 'media:content', {keepArray: true}],
|
||||
]
|
||||
}
|
||||
})
|
||||
```
|
||||
|
||||
### Default RSS version
|
||||
If your RSS Feed doesn't contain a `<rss>` tag with a `version` attribute,
|
||||
you can pass a `defaultRSS` option for the Parser to use:
|
||||
```js
|
||||
let parser = new Parser({
|
||||
defaultRSS: 2.0
|
||||
});
|
||||
```
|
||||
|
||||
|
||||
### xml2js passthrough
|
||||
`rss-parser` uses [xml2js](https://github.com/Leonidas-from-XIV/node-xml2js)
|
||||
to parse XML. You can pass [these options](https://github.com/Leonidas-from-XIV/node-xml2js#options)
|
||||
to `new xml2js.Parser()` by specifying `options.xml2js`:
|
||||
|
||||
```js
|
||||
let parser = new Parser({
|
||||
xml2js: {
|
||||
emptyTag: '--EMPTY--',
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## HTTP Options
|
||||
|
||||
### Timeout
|
||||
You can set the amount of time (in milliseconds) to wait before the HTTP request times out (default 60 seconds):
|
||||
|
||||
```js
|
||||
let parser = new Parser({
|
||||
timeout: 1000,
|
||||
});
|
||||
```
|
||||
|
||||
### Headers
|
||||
You can pass headers to the HTTP request:
|
||||
```js
|
||||
let parser = new Parser({
|
||||
headers: {'User-Agent': 'something different'},
|
||||
});
|
||||
```
|
||||
|
||||
### Redirects
|
||||
By default, `parseURL` will follow up to five redirects. You can change this
|
||||
with `options.maxRedirects`.
|
||||
|
||||
```js
|
||||
let parser = new Parser({maxRedirects: 100});
|
||||
```
|
||||
|
||||
### Request passthrough
|
||||
`rss-parser` uses [http](https://nodejs.org/docs/latest/api/http.html#http_http_get_url_options_callback)/[https](https://nodejs.org/docs/latest/api/https.html#https_https_get_url_options_callback) module
|
||||
to do requests. You can pass [these options](https://nodejs.org/docs/latest/api/https.html#https_https_request_options_callback)
|
||||
to `http.get()`/`https.get()` by specifying `options.requestOptions`:
|
||||
|
||||
e.g. to allow unauthorized certificate
|
||||
```js
|
||||
let parser = new Parser({
|
||||
requestOptions: {
|
||||
rejectUnauthorized: false
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Contributing
|
||||
Contributions are welcome! If you are adding a feature or fixing a bug, please be sure to add a [test case](https://github.com/bobby-brennan/rss-parser/tree/master/test/input)
|
||||
|
||||
### Running Tests
|
||||
The tests run the RSS parser for several sample RSS feeds in `test/input` and outputs the resulting JSON into `test/output`. If there are any changes to the output files the tests will fail.
|
||||
|
||||
To check if your changes affect the output of any test cases, run
|
||||
|
||||
`npm test`
|
||||
|
||||
To update the output files with your changes, run
|
||||
|
||||
`WRITE_GOLDEN=true npm test`
|
||||
|
||||
### Publishing Releases
|
||||
```bash
|
||||
npm run build
|
||||
git commit -a -m "Build distribution"
|
||||
npm version minor # or major/patch
|
||||
npm publish
|
||||
git push --follow-tags
|
||||
```
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "rss-parser",
|
||||
"description": "",
|
||||
"version": "1.1.0",
|
||||
"main": "dist/rss-parser.js",
|
||||
"authors": [
|
||||
"Bobby Brennan"
|
||||
],
|
||||
"license": "MIT",
|
||||
"homepage": "https://github.com/bobby-brennan/rss-parser",
|
||||
"moduleType": [
|
||||
"node"
|
||||
],
|
||||
"ignore": [
|
||||
"**/.*",
|
||||
"node_modules",
|
||||
"bower_components",
|
||||
"test",
|
||||
"tests"
|
||||
]
|
||||
}
|
||||
+11002
File diff suppressed because one or more lines are too long
+1
File diff suppressed because one or more lines are too long
+11
File diff suppressed because one or more lines are too long
+1
File diff suppressed because one or more lines are too long
+119
@@ -0,0 +1,119 @@
|
||||
import { Options } from 'xml2js';
|
||||
import { RequestOptions } from 'https';
|
||||
|
||||
declare namespace Parser {
|
||||
type CustomFieldItem<U> = keyof U | (string | { keepArray: boolean })[]
|
||||
|
||||
export interface CustomFields<T, U> {
|
||||
readonly feed?: Array<keyof T>;
|
||||
readonly item?: CustomFieldItem<U>[] | CustomFieldItem<U>[][];
|
||||
}
|
||||
|
||||
export interface ParserOptions<T, U> {
|
||||
readonly xml2js?: Options;
|
||||
readonly requestOptions?: RequestOptions;
|
||||
readonly headers?: Record<string, string>;
|
||||
readonly defaultRSS?: number;
|
||||
readonly maxRedirects?: number;
|
||||
readonly customFields?: CustomFields<T, U>;
|
||||
readonly timeout?: number;
|
||||
}
|
||||
|
||||
export interface Enclosure {
|
||||
url: string;
|
||||
length?: number;
|
||||
type?: string;
|
||||
}
|
||||
|
||||
export interface Item {
|
||||
link?: string;
|
||||
guid?: string;
|
||||
title?: string;
|
||||
pubDate?: string;
|
||||
creator?: string;
|
||||
summary?: string;
|
||||
content?: string;
|
||||
isoDate?: string;
|
||||
categories?: string[];
|
||||
contentSnippet?: string;
|
||||
enclosure?: Enclosure;
|
||||
}
|
||||
|
||||
export interface PaginationLinks {
|
||||
self?: string;
|
||||
first?: string;
|
||||
next?: string;
|
||||
last?: string;
|
||||
prev?: string;
|
||||
}
|
||||
|
||||
export interface Output<U> {
|
||||
image?: {
|
||||
link?: string;
|
||||
url: string;
|
||||
title?: string;
|
||||
},
|
||||
paginationLinks?: PaginationLinks;
|
||||
link?: string;
|
||||
title?: string;
|
||||
items: (U & Item)[];
|
||||
feedUrl?: string;
|
||||
description?: string;
|
||||
itunes?: {
|
||||
[key: string]: any;
|
||||
image?: string;
|
||||
owner?: {
|
||||
name?: string;
|
||||
email?: string;
|
||||
};
|
||||
author?: string;
|
||||
summary?: string;
|
||||
explicit?: string;
|
||||
categories?: string[];
|
||||
keywords?: string[];
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Class that handles all parsing or URL, or even XML, RSS feed to JSON.
|
||||
*/
|
||||
declare class Parser<T = {[key: string]: any}, U = {[key: string]: any}> {
|
||||
/**
|
||||
* @param options - Parser options.
|
||||
*/
|
||||
constructor(options?: Parser.ParserOptions<T, U>);
|
||||
/**
|
||||
* Parse XML content to JSON.
|
||||
*
|
||||
* @param xml - The xml to be parsed.
|
||||
* @param callback - Traditional callback.
|
||||
*
|
||||
* @returns Promise that has the same Output as the callback.
|
||||
*/
|
||||
parseString(
|
||||
xml: string,
|
||||
callback?: (err: Error, feed: Parser.Output<U>) => void
|
||||
): Promise<T & Parser.Output<U>>;
|
||||
|
||||
/**
|
||||
* Parse URL content to JSON.
|
||||
*
|
||||
* @param feedUrl - The url that needs to be parsed to JSON.
|
||||
* @param callback - Traditional callback.
|
||||
* @param redirectCount - Max of redirects, default is set to five.
|
||||
*
|
||||
* @example
|
||||
* await parseURL('https://www.reddit.com/.rss');
|
||||
* parseURL('https://www.reddit.com/.rss', (err, feed) => { ... });
|
||||
*
|
||||
* @returns Promise that has the same Output as the callback.
|
||||
*/
|
||||
parseURL(
|
||||
feedUrl: string,
|
||||
callback?: (err: Error, feed: Parser.Output<U>) => void,
|
||||
redirectCount?: number
|
||||
): Promise<T & Parser.Output<U>>;
|
||||
}
|
||||
|
||||
export = Parser;
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
'use strict';
|
||||
|
||||
module.exports = require('./lib/parser');
|
||||
|
||||
+73
@@ -0,0 +1,73 @@
|
||||
const fields = module.exports = {};
|
||||
|
||||
fields.feed = [
|
||||
['author', 'creator'],
|
||||
['dc:publisher', 'publisher'],
|
||||
['dc:creator', 'creator'],
|
||||
['dc:source', 'source'],
|
||||
['dc:title', 'title'],
|
||||
['dc:type', 'type'],
|
||||
'title',
|
||||
'description',
|
||||
'author',
|
||||
'pubDate',
|
||||
'webMaster',
|
||||
'managingEditor',
|
||||
'generator',
|
||||
'link',
|
||||
'language',
|
||||
'copyright',
|
||||
'lastBuildDate',
|
||||
'docs',
|
||||
'generator',
|
||||
'ttl',
|
||||
'rating',
|
||||
'skipHours',
|
||||
'skipDays',
|
||||
];
|
||||
|
||||
fields.item = [
|
||||
['author', 'creator'],
|
||||
['dc:creator', 'creator'],
|
||||
['dc:date', 'date'],
|
||||
['dc:language', 'language'],
|
||||
['dc:rights', 'rights'],
|
||||
['dc:source', 'source'],
|
||||
['dc:title', 'title'],
|
||||
'title',
|
||||
'link',
|
||||
'pubDate',
|
||||
'author',
|
||||
'summary',
|
||||
['content:encoded', 'content:encoded', {includeSnippet: true}],
|
||||
'enclosure',
|
||||
'dc:creator',
|
||||
'dc:date',
|
||||
'comments',
|
||||
];
|
||||
|
||||
var mapItunesField = function(f) {
|
||||
return ['itunes:' + f, f];
|
||||
}
|
||||
|
||||
fields.podcastFeed = ([
|
||||
'author',
|
||||
'subtitle',
|
||||
'summary',
|
||||
'explicit'
|
||||
]).map(mapItunesField);
|
||||
|
||||
fields.podcastItem = ([
|
||||
'author',
|
||||
'subtitle',
|
||||
'summary',
|
||||
'explicit',
|
||||
'duration',
|
||||
'image',
|
||||
'episode',
|
||||
'image',
|
||||
'season',
|
||||
'keywords',
|
||||
'episodeType'
|
||||
]).map(mapItunesField);
|
||||
|
||||
+349
@@ -0,0 +1,349 @@
|
||||
"use strict";
|
||||
const http = require('http');
|
||||
const https = require('https');
|
||||
const xml2js = require('xml2js');
|
||||
const url = require('url');
|
||||
|
||||
const fields = require('./fields');
|
||||
const utils = require('./utils');
|
||||
|
||||
const DEFAULT_HEADERS = {
|
||||
'User-Agent': 'rss-parser',
|
||||
'Accept': 'application/rss+xml',
|
||||
}
|
||||
const DEFAULT_MAX_REDIRECTS = 5;
|
||||
const DEFAULT_TIMEOUT = 60000;
|
||||
|
||||
class Parser {
|
||||
constructor(options={}) {
|
||||
options.headers = options.headers || {};
|
||||
options.xml2js = options.xml2js || {};
|
||||
options.customFields = options.customFields || {};
|
||||
options.customFields.item = options.customFields.item || [];
|
||||
options.customFields.feed = options.customFields.feed || [];
|
||||
options.requestOptions = options.requestOptions || {};
|
||||
if (!options.maxRedirects) options.maxRedirects = DEFAULT_MAX_REDIRECTS;
|
||||
if (!options.timeout) options.timeout = DEFAULT_TIMEOUT;
|
||||
this.options = options;
|
||||
this.xmlParser = new xml2js.Parser(this.options.xml2js);
|
||||
}
|
||||
|
||||
parseString(xml, callback) {
|
||||
let prom = new Promise((resolve, reject) => {
|
||||
this.xmlParser.parseString(xml, (err, result) => {
|
||||
if (err) return reject(err);
|
||||
if (!result) {
|
||||
return reject(new Error('Unable to parse XML.'));
|
||||
}
|
||||
let feed = null;
|
||||
if (result.feed) {
|
||||
feed = this.buildAtomFeed(result);
|
||||
} else if (result.rss && result.rss.$ && result.rss.$.version && result.rss.$.version.match(/^2/)) {
|
||||
feed = this.buildRSS2(result);
|
||||
} else if (result['rdf:RDF']) {
|
||||
feed = this.buildRSS1(result);
|
||||
} else if (result.rss && result.rss.$ && result.rss.$.version && result.rss.$.version.match(/0\.9/)) {
|
||||
feed = this.buildRSS0_9(result);
|
||||
} else if (result.rss && this.options.defaultRSS) {
|
||||
switch(this.options.defaultRSS) {
|
||||
case 0.9:
|
||||
feed = this.buildRSS0_9(result);
|
||||
break;
|
||||
case 1:
|
||||
feed = this.buildRSS1(result);
|
||||
break;
|
||||
case 2:
|
||||
feed = this.buildRSS2(result);
|
||||
break;
|
||||
default:
|
||||
return reject(new Error("default RSS version not recognized."))
|
||||
}
|
||||
} else {
|
||||
return reject(new Error("Feed not recognized as RSS 1 or 2."))
|
||||
}
|
||||
resolve(feed);
|
||||
});
|
||||
});
|
||||
prom = utils.maybePromisify(callback, prom);
|
||||
return prom;
|
||||
}
|
||||
|
||||
parseURL(feedUrl, callback, redirectCount=0) {
|
||||
let xml = '';
|
||||
let get = feedUrl.indexOf('https') === 0 ? https.get : http.get;
|
||||
let urlParts = url.parse(feedUrl);
|
||||
let headers = Object.assign({}, DEFAULT_HEADERS, this.options.headers);
|
||||
let timeout = null;
|
||||
let prom = new Promise((resolve, reject) => {
|
||||
const requestOpts = Object.assign({headers}, urlParts, this.options.requestOptions);
|
||||
let req = get(requestOpts, (res) => {
|
||||
if (this.options.maxRedirects && res.statusCode >= 300 && res.statusCode < 400 && res.headers['location']) {
|
||||
if (redirectCount === this.options.maxRedirects) {
|
||||
return reject(new Error("Too many redirects"));
|
||||
} else {
|
||||
const newLocation = url.resolve(feedUrl, res.headers['location']);
|
||||
return this.parseURL(newLocation, null, redirectCount + 1).then(resolve, reject);
|
||||
}
|
||||
} else if (res.statusCode >= 300) {
|
||||
return reject(new Error("Status code " + res.statusCode))
|
||||
}
|
||||
let encoding = utils.getEncodingFromContentType(res.headers['content-type']);
|
||||
res.setEncoding(encoding);
|
||||
res.on('data', (chunk) => {
|
||||
xml += chunk;
|
||||
});
|
||||
res.on('end', () => {
|
||||
return this.parseString(xml).then(resolve, reject);
|
||||
});
|
||||
})
|
||||
req.on('error', reject);
|
||||
timeout = setTimeout(() => {
|
||||
return reject(new Error("Request timed out after " + this.options.timeout + "ms"));
|
||||
}, this.options.timeout);
|
||||
}).then(data => {
|
||||
clearTimeout(timeout);
|
||||
return Promise.resolve(data);
|
||||
}, e => {
|
||||
clearTimeout(timeout);
|
||||
return Promise.reject(e);
|
||||
});
|
||||
prom = utils.maybePromisify(callback, prom);
|
||||
return prom;
|
||||
}
|
||||
|
||||
buildAtomFeed(xmlObj) {
|
||||
let feed = {items: []};
|
||||
utils.copyFromXML(xmlObj.feed, feed, this.options.customFields.feed);
|
||||
if (xmlObj.feed.link) {
|
||||
feed.link = utils.getLink(xmlObj.feed.link, 'alternate', 0);
|
||||
feed.feedUrl = utils.getLink(xmlObj.feed.link, 'self', 1);
|
||||
}
|
||||
if (xmlObj.feed.title) {
|
||||
let title = xmlObj.feed.title[0] || '';
|
||||
if (title._) title = title._
|
||||
if (title) feed.title = title;
|
||||
}
|
||||
if (xmlObj.feed.updated) {
|
||||
feed.lastBuildDate = xmlObj.feed.updated[0];
|
||||
}
|
||||
feed.items = (xmlObj.feed.entry || []).map(entry => this.parseItemAtom(entry));
|
||||
return feed;
|
||||
}
|
||||
|
||||
parseItemAtom(entry) {
|
||||
let item = {};
|
||||
utils.copyFromXML(entry, item, this.options.customFields.item);
|
||||
if (entry.title) {
|
||||
let title = entry.title[0] || '';
|
||||
if (title._) title = title._;
|
||||
if (title) item.title = title;
|
||||
}
|
||||
if (entry.link && entry.link.length) {
|
||||
item.link = utils.getLink(entry.link, 'alternate', 0);
|
||||
}
|
||||
if (entry.published && entry.published.length && entry.published[0].length) item.pubDate = new Date(entry.published[0]).toISOString();
|
||||
if (!item.pubDate && entry.updated && entry.updated.length && entry.updated[0].length) item.pubDate = new Date(entry.updated[0]).toISOString();
|
||||
if (entry.author && entry.author.length && entry.author[0].name && entry.author[0].name.length) item.author = entry.author[0].name[0];
|
||||
if (entry.content && entry.content.length) {
|
||||
item.content = utils.getContent(entry.content[0]);
|
||||
item.contentSnippet = utils.getSnippet(item.content)
|
||||
}
|
||||
if (entry.summary && entry.summary.length) {
|
||||
item.summary = utils.getContent(entry.summary[0]);
|
||||
}
|
||||
if (entry.id) {
|
||||
item.id = entry.id[0];
|
||||
}
|
||||
this.setISODate(item);
|
||||
return item;
|
||||
}
|
||||
|
||||
buildRSS0_9(xmlObj) {
|
||||
var channel = xmlObj.rss.channel[0];
|
||||
var items = channel.item;
|
||||
return this.buildRSS(channel, items);
|
||||
}
|
||||
|
||||
buildRSS1(xmlObj) {
|
||||
xmlObj = xmlObj['rdf:RDF'];
|
||||
let channel = xmlObj.channel[0];
|
||||
let items = xmlObj.item;
|
||||
return this.buildRSS(channel, items);
|
||||
}
|
||||
|
||||
buildRSS2(xmlObj) {
|
||||
let channel = xmlObj.rss.channel[0];
|
||||
let items = channel.item;
|
||||
let feed = this.buildRSS(channel, items);
|
||||
if (xmlObj.rss.$ && xmlObj.rss.$['xmlns:itunes']) {
|
||||
this.decorateItunes(feed, channel);
|
||||
}
|
||||
return feed;
|
||||
}
|
||||
|
||||
buildRSS(channel, items) {
|
||||
items = items || [];
|
||||
let feed = {items: []};
|
||||
let feedFields = fields.feed.concat(this.options.customFields.feed);
|
||||
let itemFields = fields.item.concat(this.options.customFields.item);
|
||||
if (channel['atom:link'] && channel['atom:link'][0] && channel['atom:link'][0].$) {
|
||||
feed.feedUrl = channel['atom:link'][0].$.href;
|
||||
}
|
||||
if (channel.image && channel.image[0] && channel.image[0].url) {
|
||||
feed.image = {};
|
||||
let image = channel.image[0];
|
||||
if (image.link) feed.image.link = image.link[0];
|
||||
if (image.url) feed.image.url = image.url[0];
|
||||
if (image.title) feed.image.title = image.title[0];
|
||||
if (image.width) feed.image.width = image.width[0];
|
||||
if (image.height) feed.image.height = image.height[0];
|
||||
}
|
||||
const paginationLinks = this.generatePaginationLinks(channel);
|
||||
if (Object.keys(paginationLinks).length) {
|
||||
feed.paginationLinks = paginationLinks;
|
||||
}
|
||||
utils.copyFromXML(channel, feed, feedFields);
|
||||
feed.items = items.map(xmlItem => this.parseItemRss(xmlItem, itemFields));
|
||||
return feed;
|
||||
}
|
||||
|
||||
parseItemRss(xmlItem, itemFields) {
|
||||
let item = {};
|
||||
utils.copyFromXML(xmlItem, item, itemFields);
|
||||
if (xmlItem.enclosure) {
|
||||
item.enclosure = xmlItem.enclosure[0].$;
|
||||
}
|
||||
if (xmlItem.description) {
|
||||
item.content = utils.getContent(xmlItem.description[0]);
|
||||
item.contentSnippet = utils.getSnippet(item.content);
|
||||
}
|
||||
if (xmlItem.guid) {
|
||||
item.guid = xmlItem.guid[0];
|
||||
if (item.guid._) item.guid = item.guid._;
|
||||
}
|
||||
if (xmlItem.$ && xmlItem.$['rdf:about']) {
|
||||
item['rdf:about'] = xmlItem.$['rdf:about']
|
||||
}
|
||||
if (xmlItem.category) item.categories = xmlItem.category;
|
||||
this.setISODate(item);
|
||||
return item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add iTunes specific fields from XML to extracted JSON
|
||||
*
|
||||
* @access public
|
||||
* @param {object} feed extracted
|
||||
* @param {object} channel parsed XML
|
||||
*/
|
||||
decorateItunes(feed, channel) {
|
||||
let items = channel.item || [];
|
||||
let categories = [];
|
||||
feed.itunes = {}
|
||||
|
||||
if (channel['itunes:owner']) {
|
||||
let owner = {};
|
||||
|
||||
if(channel['itunes:owner'][0]['itunes:name']) {
|
||||
owner.name = channel['itunes:owner'][0]['itunes:name'][0];
|
||||
}
|
||||
if(channel['itunes:owner'][0]['itunes:email']) {
|
||||
owner.email = channel['itunes:owner'][0]['itunes:email'][0];
|
||||
}
|
||||
feed.itunes.owner = owner;
|
||||
}
|
||||
|
||||
if (channel['itunes:image']) {
|
||||
let image;
|
||||
let hasImageHref = (channel['itunes:image'][0] &&
|
||||
channel['itunes:image'][0].$ &&
|
||||
channel['itunes:image'][0].$.href);
|
||||
image = hasImageHref ? channel['itunes:image'][0].$.href : null;
|
||||
if (image) {
|
||||
feed.itunes.image = image;
|
||||
}
|
||||
}
|
||||
|
||||
if (channel['itunes:category']) {
|
||||
const categoriesWithSubs = channel['itunes:category'].map((category) => {
|
||||
return {
|
||||
name: category && category.$ && category.$.text,
|
||||
subs: category['itunes:category'] ?
|
||||
category['itunes:category']
|
||||
.map((subcategory) => ({
|
||||
name: subcategory && subcategory.$ && subcategory.$.text
|
||||
})) : null,
|
||||
};
|
||||
});
|
||||
|
||||
feed.itunes.categories = categoriesWithSubs.map((category) => category.name);
|
||||
feed.itunes.categoriesWithSubs = categoriesWithSubs;
|
||||
}
|
||||
|
||||
if (channel['itunes:keywords']) {
|
||||
if (channel['itunes:keywords'].length > 1) {
|
||||
feed.itunes.keywords = channel['itunes:keywords'].map(
|
||||
keyword => keyword && keyword.$ && keyword.$.text
|
||||
);
|
||||
} else {
|
||||
let keywords = channel['itunes:keywords'][0];
|
||||
if (keywords && typeof keywords._ === 'string') {
|
||||
keywords = keywords._;
|
||||
}
|
||||
|
||||
if (keywords && keywords.$ && keywords.$.text) {
|
||||
feed.itunes.keywords = keywords.$.text.split(',')
|
||||
} else if (typeof keywords === "string") {
|
||||
feed.itunes.keywords = keywords.split(',');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
utils.copyFromXML(channel, feed.itunes, fields.podcastFeed);
|
||||
items.forEach((item, index) => {
|
||||
let entry = feed.items[index];
|
||||
entry.itunes = {};
|
||||
utils.copyFromXML(item, entry.itunes, fields.podcastItem);
|
||||
let image = item['itunes:image'];
|
||||
if (image && image[0] && image[0].$ && image[0].$.href) {
|
||||
entry.itunes.image = image[0].$.href;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
setISODate(item) {
|
||||
let date = item.pubDate || item.date;
|
||||
if (date) {
|
||||
try {
|
||||
item.isoDate = new Date(date.trim()).toISOString();
|
||||
} catch (e) {
|
||||
// Ignore bad date format
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a pagination object where the rel attribute is the key and href attribute is the value
|
||||
* { self: 'self-url', first: 'first-url', ... }
|
||||
*
|
||||
* @access private
|
||||
* @param {Object} channel parsed XML
|
||||
* @returns {Object}
|
||||
*/
|
||||
generatePaginationLinks(channel) {
|
||||
if (!channel['atom:link']) {
|
||||
return {};
|
||||
}
|
||||
const paginationRelAttributes = ['self', 'first', 'next', 'prev', 'last'];
|
||||
|
||||
return channel['atom:link'].reduce((paginationLinks, link) => {
|
||||
if (!link.$ || !paginationRelAttributes.includes(link.$.rel)) {
|
||||
return paginationLinks;
|
||||
}
|
||||
paginationLinks[link.$.rel] = link.$.href;
|
||||
return paginationLinks;
|
||||
}, {});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Parser;
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
const utils = module.exports = {};
|
||||
const entities = require('entities');
|
||||
const xml2js = require('xml2js');
|
||||
|
||||
utils.stripHtml = function(str) {
|
||||
str = str.replace(/([^\n])<\/?(h|br|p|ul|ol|li|blockquote|section|table|tr|div)(?:.|\n)*?>([^\n])/gm, '$1\n$3')
|
||||
str = str.replace(/<(?:.|\n)*?>/gm, '');
|
||||
return str;
|
||||
}
|
||||
|
||||
utils.getSnippet = function(str) {
|
||||
return entities.decodeHTML(utils.stripHtml(str)).trim();
|
||||
}
|
||||
|
||||
utils.getLink = function(links, rel, fallbackIdx) {
|
||||
if (!links) return;
|
||||
for (let i = 0; i < links.length; ++i) {
|
||||
if (links[i].$.rel === rel) return links[i].$.href;
|
||||
}
|
||||
if (links[fallbackIdx]) return links[fallbackIdx].$.href;
|
||||
}
|
||||
|
||||
utils.getContent = function(content) {
|
||||
if (typeof content._ === 'string') {
|
||||
return content._;
|
||||
} else if (typeof content === 'object') {
|
||||
let builder = new xml2js.Builder({headless: true, explicitRoot: true, rootName: 'div', renderOpts: {pretty: false}});
|
||||
return builder.buildObject(content);
|
||||
} else {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
utils.copyFromXML = function(xml, dest, fields) {
|
||||
fields.forEach(function(f) {
|
||||
let from = f;
|
||||
let to = f;
|
||||
let options = {};
|
||||
if (Array.isArray(f)) {
|
||||
from = f[0];
|
||||
to = f[1];
|
||||
if (f.length > 2) {
|
||||
options = f[2];
|
||||
}
|
||||
}
|
||||
const { keepArray, includeSnippet } = options;
|
||||
if (xml[from] !== undefined){
|
||||
dest[to] = keepArray ? xml[from] : xml[from][0];
|
||||
}
|
||||
if (dest[to] && typeof dest[to]._ === 'string') {
|
||||
dest[to]=dest[to]._;
|
||||
}
|
||||
if (includeSnippet && dest[to] && typeof dest[to] === 'string') {
|
||||
dest[to + 'Snippet'] = utils.getSnippet(dest[to]);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
utils.maybePromisify = function(callback, promise) {
|
||||
if (!callback) return promise;
|
||||
return promise.then(
|
||||
data => setTimeout(() => callback(null, data)),
|
||||
err => setTimeout(() => callback(err))
|
||||
);
|
||||
}
|
||||
|
||||
const DEFAULT_ENCODING = 'utf8';
|
||||
const ENCODING_REGEX = /(encoding|charset)\s*=\s*(\S+)/;
|
||||
const SUPPORTED_ENCODINGS = ['ascii', 'utf8', 'utf16le', 'ucs2', 'base64', 'latin1', 'binary', 'hex'];
|
||||
const ENCODING_ALIASES = {
|
||||
'utf-8': 'utf8',
|
||||
'iso-8859-1': 'latin1',
|
||||
}
|
||||
|
||||
utils.getEncodingFromContentType = function(contentType) {
|
||||
contentType = contentType || '';
|
||||
let match = contentType.match(ENCODING_REGEX);
|
||||
let encoding = (match || [])[2] || '';
|
||||
encoding = encoding.toLowerCase();
|
||||
encoding = ENCODING_ALIASES[encoding] || encoding;
|
||||
if (!encoding || SUPPORTED_ENCODINGS.indexOf(encoding) === -1) {
|
||||
encoding = DEFAULT_ENCODING;
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
{
|
||||
"name": "rss-parser",
|
||||
"version": "3.13.0",
|
||||
"main": "index.js",
|
||||
"types": "index.d.ts",
|
||||
"scripts": {
|
||||
"test": "mocha --reporter-option maxDiffSize=0 --exit",
|
||||
"build": "./scripts/build.sh"
|
||||
},
|
||||
"author": "Bobby Brennan",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"@babel/core": "^7.21.4",
|
||||
"@babel/preset-env": "^7.21.4",
|
||||
"@types/xml2js": "^0.4.3",
|
||||
"babel-core": "^6.26.3",
|
||||
"babel-loader": "^8.0.4",
|
||||
"babel-preset-env": "^1.7.0",
|
||||
"chai": "^3.4.1",
|
||||
"express": "^4.16.3",
|
||||
"mocha": "^10.2.0",
|
||||
"puppeteer": "^5.2.1",
|
||||
"webpack": "^4.46.0",
|
||||
"webpack-cli": "^3.3.9"
|
||||
},
|
||||
"dependencies": {
|
||||
"entities": "^2.0.3",
|
||||
"xml2js": "^0.5.0"
|
||||
},
|
||||
"directories": {
|
||||
"test": "test"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/bobby-brennan/rss-parser.git"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/bobby-brennan/rss-parser/issues"
|
||||
},
|
||||
"homepage": "https://github.com/bobby-brennan/rss-parser#readme",
|
||||
"description": "A lightweight RSS parser, for Node and the browser",
|
||||
"keywords": [
|
||||
"RSS",
|
||||
"RSS to JSON",
|
||||
"RSS reader",
|
||||
"RSS parser",
|
||||
"RSS to JS",
|
||||
"Feed reader"
|
||||
]
|
||||
}
|
||||
+4
@@ -0,0 +1,4 @@
|
||||
set -e
|
||||
webpack-cli --mode=development --target=web
|
||||
webpack-cli --mode=production --target=web --output-filename=dist/[name].min.js --profile --json > dist/stats.json
|
||||
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
var webpack = require("webpack");
|
||||
module.exports = {
|
||||
entry: {
|
||||
"rss-parser": "./index.js"
|
||||
},
|
||||
output: {
|
||||
path: __dirname,
|
||||
filename: "dist/[name].js",
|
||||
libraryTarget: 'umd',
|
||||
globalObject: 'this',
|
||||
library: 'RSSParser'
|
||||
},
|
||||
resolve: {
|
||||
extensions: ['.js']
|
||||
},
|
||||
devtool: 'source-map',
|
||||
module: {
|
||||
rules: [{
|
||||
test: /\.js$/,
|
||||
loader: 'babel-loader?presets[]=@babel/preset-env',
|
||||
}]
|
||||
},
|
||||
externals: {
|
||||
xmlbuilder:'xmlbuilder'
|
||||
},
|
||||
node: {
|
||||
fs: "empty"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user