Update date parser to parse more invalid date formats

This commit is contained in:
Frédéric Guillot 2023-11-01 19:26:16 +00:00
parent 500c60b807
commit e3eaaea15a
7 changed files with 15 additions and 7 deletions

View file

@ -126,7 +126,7 @@ func (a *atom03Entry) entryDate() time.Time {
if dateText != "" { if dateText != "" {
result, err := date.Parse(dateText) result, err := date.Parse(dateText)
if err != nil { if err != nil {
slog.Warn("Unable to parse date from Atom 0.3 feed", slog.Debug("Unable to parse date from Atom 0.3 feed",
slog.String("date", dateText), slog.String("date", dateText),
slog.String("id", a.ID), slog.String("id", a.ID),
slog.Any("error", err), slog.Any("error", err),

View file

@ -144,7 +144,7 @@ func (a *atom10Entry) entryDate() time.Time {
if dateText != "" { if dateText != "" {
result, err := date.Parse(dateText) result, err := date.Parse(dateText)
if err != nil { if err != nil {
slog.Warn("Unable to parse date from Atom 0.3 feed", slog.Debug("Unable to parse date from Atom 0.3 feed",
slog.String("date", dateText), slog.String("date", dateText),
slog.String("id", a.ID), slog.String("id", a.ID),
slog.Any("error", err), slog.Any("error", err),

View file

@ -219,6 +219,10 @@ var dateFormats = []string{
"Mon, 2rd Jan 2006 15:04:05 MST", "Mon, 2rd Jan 2006 15:04:05 MST",
"Mon, 2nd Jan 2006 15:04:05 MST", "Mon, 2nd Jan 2006 15:04:05 MST",
"Mon, 2st Jan 2006 15:04:05 MST", "Mon, 2st Jan 2006 15:04:05 MST",
"Mon, Jan 02 2006 03:04:05 PM",
"Monday, January 2, 2006 - 15:04",
"01/02/06 15:04:05",
"02.01.06",
} }
var invalidTimezoneReplacer = strings.NewReplacer( var invalidTimezoneReplacer = strings.NewReplacer(
@ -309,6 +313,7 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
// Parse parses a given date string using a large // Parse parses a given date string using a large
// list of commonly found feed date formats. // list of commonly found feed date formats.
func Parse(rawInput string) (t time.Time, err error) { func Parse(rawInput string) (t time.Time, err error) {
rawInput = strings.TrimSpace(rawInput)
timestamp, err := strconv.ParseInt(rawInput, 10, 64) timestamp, err := strconv.ParseInt(rawInput, 10, 64)
if err == nil { if err == nil {
return time.Unix(timestamp, 0), nil return time.Unix(timestamp, 0), nil
@ -316,7 +321,6 @@ func Parse(rawInput string) (t time.Time, err error) {
processedInput := invalidLocalizedDateReplacer.Replace(rawInput) processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
processedInput = invalidTimezoneReplacer.Replace(processedInput) processedInput = invalidTimezoneReplacer.Replace(processedInput)
processedInput = strings.TrimSpace(processedInput)
if processedInput == "" { if processedInput == "" {
return t, errors.New(`date parser: empty value`) return t, errors.New(`date parser: empty value`)
} }

View file

@ -214,11 +214,15 @@ func TestParseWeirdDateFormat(t *testing.T) {
"Jun 23, 2023 19:00 GMT", "Jun 23, 2023 19:00 GMT",
"09/15/2014 4:20 pm PST", "09/15/2014 4:20 pm PST",
"Fri, 23rd Jun 2023 09:32:20 GMT", "Fri, 23rd Jun 2023 09:32:20 GMT",
"Sat, Oct 28 2023 08:28:28 PM",
"Monday, October 6, 2023 - 16:29\n",
"10/30/23 21:55:58",
"30.10.23",
} }
for _, date := range dates { for _, date := range dates {
if _, err := Parse(date); err != nil { if _, err := Parse(date); err != nil {
t.Errorf(`Unable to parse date: %q`, date) t.Errorf(`Unable to parse date: %q (%v)`, date, err)
} }
} }
} }

View file

@ -110,7 +110,7 @@ func (j *jsonItem) GetDate() time.Time {
if value != "" { if value != "" {
d, err := date.Parse(value) d, err := date.Parse(value)
if err != nil { if err != nil {
slog.Warn("Unable to parse date from JSON feed", slog.Debug("Unable to parse date from JSON feed",
slog.String("date", value), slog.String("date", value),
slog.String("url", j.URL), slog.String("url", j.URL),
slog.Any("error", err), slog.Any("error", err),

View file

@ -100,7 +100,7 @@ func (r *rdfItem) entryDate() time.Time {
if r.DublinCoreDate != "" { if r.DublinCoreDate != "" {
result, err := date.Parse(r.DublinCoreDate) result, err := date.Parse(r.DublinCoreDate)
if err != nil { if err != nil {
slog.Warn("Unable to parse date from RDF feed", slog.Debug("Unable to parse date from RDF feed",
slog.String("date", r.DublinCoreDate), slog.String("date", r.DublinCoreDate),
slog.String("link", r.Link), slog.String("link", r.Link),
slog.Any("error", err), slog.Any("error", err),

View file

@ -235,7 +235,7 @@ func (r *rssItem) entryDate() time.Time {
if value != "" { if value != "" {
result, err := date.Parse(value) result, err := date.Parse(value)
if err != nil { if err != nil {
slog.Warn("Unable to parse date from RSS feed", slog.Debug("Unable to parse date from RSS feed",
slog.String("date", value), slog.String("date", value),
slog.String("guid", r.GUID.Data), slog.String("guid", r.GUID.Data),
slog.Any("error", err), slog.Any("error", err),