miniflux/reader/parser/parser.go

59 lines
1.5 KiB
Go
Raw Normal View History

// Copyright 2018 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package parser // import "miniflux.app/reader/parser"
import (
"strings"
"miniflux.app/errors"
"miniflux.app/logger"
"miniflux.app/model"
"miniflux.app/reader/atom"
"miniflux.app/reader/json"
"miniflux.app/reader/rdf"
"miniflux.app/reader/rss"
)
// ParseFeed analyzes the input data and returns a normalized feed object.
func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
data = stripInvalidXMLCharacters(data)
switch DetectFeedFormat(data) {
case FormatAtom:
return atom.Parse(strings.NewReader(data))
case FormatRSS:
return rss.Parse(strings.NewReader(data))
case FormatJSON:
return json.Parse(strings.NewReader(data))
case FormatRDF:
return rdf.Parse(strings.NewReader(data))
default:
return nil, errors.NewLocalizedError("Unsupported feed format")
}
}
func stripInvalidXMLCharacters(input string) string {
return strings.Map(func(r rune) rune {
if isInCharacterRange(r) {
return r
}
logger.Debug("Strip invalid XML characters: %U", r)
return -1
}, input)
}
// Decide whether the given rune is in the XML Character Range, per
// the Char production of http://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xDF77 ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}