2018-10-14 20:46:41 +02:00
|
|
|
// Copyright 2018 Frédéric Guillot. All rights reserved.
|
|
|
|
// Use of this source code is governed by the Apache 2.0
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package parser // import "miniflux.app/reader/parser"
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/xml"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"miniflux.app/reader/encoding"
|
|
|
|
)
|
|
|
|
|
|
|
|
// List of feed formats.
|
|
|
|
const (
|
|
|
|
FormatRDF = "rdf"
|
|
|
|
FormatRSS = "rss"
|
|
|
|
FormatAtom = "atom"
|
|
|
|
FormatJSON = "json"
|
|
|
|
FormatUnknown = "unknown"
|
|
|
|
)
|
|
|
|
|
|
|
|
// DetectFeedFormat tries to guess the feed format from input data.
|
|
|
|
func DetectFeedFormat(data string) string {
|
|
|
|
if strings.HasPrefix(strings.TrimSpace(data), "{") {
|
|
|
|
return FormatJSON
|
|
|
|
}
|
|
|
|
|
|
|
|
decoder := xml.NewDecoder(strings.NewReader(data))
|
2019-03-02 16:38:02 +01:00
|
|
|
decoder.Entity = xml.HTMLEntity
|
2018-10-14 20:46:41 +02:00
|
|
|
decoder.CharsetReader = encoding.CharsetReader
|
|
|
|
|
|
|
|
for {
|
|
|
|
token, _ := decoder.Token()
|
|
|
|
if token == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
if element, ok := token.(xml.StartElement); ok {
|
|
|
|
switch element.Name.Local {
|
|
|
|
case "rss":
|
|
|
|
return FormatRSS
|
|
|
|
case "feed":
|
|
|
|
return FormatAtom
|
|
|
|
case "RDF":
|
|
|
|
return FormatRDF
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return FormatUnknown
|
|
|
|
}
|