From a074773e6c5d3b2066094cbac0502094aa364713 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 6 Mar 2024 14:57:21 +0100 Subject: [PATCH] Use an io.ReadSeeker instead of an io.Reader to parse feeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will allow to make use of func (*Reader) Seek, instead of re-recreating a new reader. It's a large commit for a small change, but anything to simply the reader/buffer/ReadAll/… mess is a step in the right direction I think, and it should enable more follow-up simplifications. --- internal/reader/rdf/parser.go | 2 +- internal/reader/rdf/parser_test.go | 36 +++++----- internal/reader/rss/parser.go | 2 +- internal/reader/rss/parser_test.go | 104 ++++++++++++++--------------- internal/reader/xml/decoder.go | 5 +- 5 files changed, 75 insertions(+), 74 deletions(-) diff --git a/internal/reader/rdf/parser.go b/internal/reader/rdf/parser.go index feb069b5..695fb5ce 100644 --- a/internal/reader/rdf/parser.go +++ b/internal/reader/rdf/parser.go @@ -12,7 +12,7 @@ import ( ) // Parse returns a normalized feed struct from a RDF feed. -func Parse(baseURL string, data io.Reader) (*model.Feed, error) { +func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) { feed := new(rdfFeed) if err := xml.NewXMLDecoder(data).Decode(feed); err != nil { return nil, fmt.Errorf("rdf: unable to parse feed: %w", err) diff --git a/internal/reader/rdf/parser_test.go b/internal/reader/rdf/parser_test.go index 12b4f784..146c6c95 100644 --- a/internal/reader/rdf/parser_test.go +++ b/internal/reader/rdf/parser_test.go @@ -75,7 +75,7 @@ func TestParseRDFSample(t *testing.T) { ` - feed, err := Parse("http://xml.com/pub/rdf.xml", bytes.NewBufferString(data)) + feed, err := Parse("http://xml.com/pub/rdf.xml", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -186,7 +186,7 @@ func TestParseRDFSampleWithDublinCore(t *testing.T) { ` - feed, err := Parse("http://meerkat.oreillynet.com/feed.rdf", bytes.NewBufferString(data)) + feed, err := Parse("http://meerkat.oreillynet.com/feed.rdf", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -253,7 +253,7 @@ func TestParseItemWithOnlyFeedAuthor(t *testing.T) { ` - feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewBufferString(data)) + feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -278,7 +278,7 @@ func TestParseItemRelativeURL(t *testing.T) { ` - feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewBufferString(data)) + feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -307,7 +307,7 @@ func TestParseItemWithoutLink(t *testing.T) { ` - feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewBufferString(data)) + feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -338,7 +338,7 @@ func TestParseItemWithDublicCoreDate(t *testing.T) { ` - feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -366,7 +366,7 @@ func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) { ` - feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -392,7 +392,7 @@ func TestParseItemWithoutDate(t *testing.T) { ` - feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -419,7 +419,7 @@ func TestParseItemWithEncodedHTMLTitle(t *testing.T) { ` - feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -431,7 +431,7 @@ func TestParseItemWithEncodedHTMLTitle(t *testing.T) { func TestParseInvalidXml(t *testing.T) { data := `garbage` - _, err := Parse("http://example.org", bytes.NewBufferString(data)) + _, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err == nil { t.Fatal("Parse should returns an error") } @@ -446,7 +446,7 @@ func TestParseFeedWithHTMLEntity(t *testing.T) { ` - feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -465,7 +465,7 @@ func TestParseFeedWithInvalidCharacterEntity(t *testing.T) { ` - feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -521,7 +521,7 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) { ` - feed, err := Parse("http://biorxiv.org", bytes.NewBufferString(data)) + feed, err := Parse("http://biorxiv.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -556,7 +556,7 @@ func TestParseRDFWithContentEncoded(t *testing.T) { ` - feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -589,7 +589,7 @@ func TestParseRDFWithEncodedHTMLDescription(t *testing.T) { ` - feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -623,7 +623,7 @@ func TestParseRDFItemWithDuplicateTitleElement(t *testing.T) { ` - feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -656,7 +656,7 @@ func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) { ` - feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -688,7 +688,7 @@ func TestParseRDFItemWitEmptyTitleElement(t *testing.T) { ` - feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } diff --git a/internal/reader/rss/parser.go b/internal/reader/rss/parser.go index 3828e800..a8390dc6 100644 --- a/internal/reader/rss/parser.go +++ b/internal/reader/rss/parser.go @@ -12,7 +12,7 @@ import ( ) // Parse returns a normalized feed struct from a RSS feed. -func Parse(baseURL string, data io.Reader) (*model.Feed, error) { +func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) { feed := new(rssFeed) if err := xml.NewXMLDecoder(data).Decode(feed); err != nil { return nil, fmt.Errorf("rss: unable to parse feed: %w", err) diff --git a/internal/reader/rss/parser_test.go b/internal/reader/rss/parser_test.go index 56486060..b3a46719 100644 --- a/internal/reader/rss/parser_test.go +++ b/internal/reader/rss/parser_test.go @@ -58,7 +58,7 @@ func TestParseRss2Sample(t *testing.T) { ` - feed, err := Parse("http://liftoff.msfc.nasa.gov/rss.xml", bytes.NewBufferString(data)) + feed, err := Parse("http://liftoff.msfc.nasa.gov/rss.xml", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -117,7 +117,7 @@ func TestParseFeedWithoutTitle(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -138,7 +138,7 @@ func TestParseEntryWithoutTitleAndDescription(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -162,7 +162,7 @@ func TestParseEntryWithoutTitleButWithDescription(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -185,7 +185,7 @@ func TestParseEntryWithMediaTitle(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -207,7 +207,7 @@ func TestParseEntryWithDCTitleOnly(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -228,7 +228,7 @@ func TestParseEntryWithoutLink(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -256,7 +256,7 @@ func TestParseEntryWithOnlyGuidPermalink(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -282,7 +282,7 @@ func TestParseEntryWithAtomLink(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -305,7 +305,7 @@ func TestParseEntryWithMultipleAtomLinks(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -325,7 +325,7 @@ func TestParseFeedURLWithAtomLink(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -353,7 +353,7 @@ func TestParseFeedWithWebmaster(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -380,7 +380,7 @@ func TestParseFeedWithManagingEditor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -407,7 +407,7 @@ func TestParseEntryWithAuthorAndInnerHTML(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -436,7 +436,7 @@ func TestParseEntryWithAuthorAndCDATA(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -467,7 +467,7 @@ func TestParseEntryWithNonStandardAtomAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -496,7 +496,7 @@ func TestParseEntryWithAtomAuthorEmail(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -525,7 +525,7 @@ func TestParseEntryWithAtomAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -551,7 +551,7 @@ func TestParseEntryWithDublinCoreAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -577,7 +577,7 @@ func TestParseEntryWithItunesAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -603,7 +603,7 @@ func TestParseFeedWithItunesAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -632,7 +632,7 @@ func TestParseFeedWithItunesOwner(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -660,7 +660,7 @@ func TestParseFeedWithItunesOwnerEmail(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -686,7 +686,7 @@ func TestParseEntryWithGooglePlayAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -712,7 +712,7 @@ func TestParseFeedWithGooglePlayAuthor(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -740,7 +740,7 @@ func TestParseEntryWithDublinCoreDate(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -768,7 +768,7 @@ func TestParseEntryWithContentEncoded(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -792,7 +792,7 @@ func TestParseEntryWithFeedBurnerLink(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -818,7 +818,7 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -848,7 +848,7 @@ func TestParseEntryWithEnclosures(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -896,7 +896,7 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -933,7 +933,7 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -974,7 +974,7 @@ func TestParseEntryWithRelativeURL(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1000,7 +1000,7 @@ func TestParseEntryWithCommentsURL(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1025,7 +1025,7 @@ func TestParseEntryWithInvalidCommentsURL(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1037,7 +1037,7 @@ func TestParseEntryWithInvalidCommentsURL(t *testing.T) { func TestParseInvalidXml(t *testing.T) { data := `garbage` - _, err := Parse("https://example.org/", bytes.NewBufferString(data)) + _, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err == nil { t.Error("Parse should returns an error") } @@ -1052,7 +1052,7 @@ func TestParseFeedTitleWithHTMLEntity(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1071,7 +1071,7 @@ func TestParseFeedTitleWithUnicodeEntityAndCdata(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1094,7 +1094,7 @@ func TestParseItemTitleWithHTMLEntity(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1117,7 +1117,7 @@ func TestParseItemTitleWithNumericCharacterReference(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1140,7 +1140,7 @@ func TestParseItemTitleWithDoubleEncodedEntities(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1159,7 +1159,7 @@ func TestParseFeedLinkWithInvalidCharacterEntity(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1193,7 +1193,7 @@ func TestParseEntryWithMediaGroup(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1251,7 +1251,7 @@ func TestParseEntryWithMediaContent(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1302,7 +1302,7 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1354,7 +1354,7 @@ func TestEntryDescriptionFromItunesSummary(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1385,7 +1385,7 @@ func TestEntryDescriptionFromItunesSubtitle(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1419,7 +1419,7 @@ func TestEntryDescriptionFromGooglePlayDescription(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1451,7 +1451,7 @@ func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1485,7 +1485,7 @@ func TestParseEntryWithCategoryAndCDATA(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1515,7 +1515,7 @@ func TestParseFeedWithTTLField(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } @@ -1539,7 +1539,7 @@ func TestParseFeedWithIncorrectTTLValue(t *testing.T) { ` - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } diff --git a/internal/reader/xml/decoder.go b/internal/reader/xml/decoder.go index 3b46cf78..76f55cd1 100644 --- a/internal/reader/xml/decoder.go +++ b/internal/reader/xml/decoder.go @@ -14,13 +14,14 @@ import ( ) // NewXMLDecoder returns a XML decoder that filters illegal characters. -func NewXMLDecoder(data io.Reader) *xml.Decoder { +func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder { var decoder *xml.Decoder buffer, _ := io.ReadAll(data) enc := procInst("encoding", string(buffer)) if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") { // filter invalid chars later within decoder.CharsetReader - decoder = xml.NewDecoder(bytes.NewReader(buffer)) + data.Seek(0, io.SeekStart) + decoder = xml.NewDecoder(data) } else { // filter invalid chars now, since decoder.CharsetReader not called for utf-8 content filteredBytes := bytes.Map(filterValidXMLChar, buffer)