Update date parser to parse more invalid date formats
This commit is contained in:
parent
500c60b807
commit
e3eaaea15a
7 changed files with 15 additions and 7 deletions
|
@ -126,7 +126,7 @@ func (a *atom03Entry) entryDate() time.Time {
|
||||||
if dateText != "" {
|
if dateText != "" {
|
||||||
result, err := date.Parse(dateText)
|
result, err := date.Parse(dateText)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("Unable to parse date from Atom 0.3 feed",
|
slog.Debug("Unable to parse date from Atom 0.3 feed",
|
||||||
slog.String("date", dateText),
|
slog.String("date", dateText),
|
||||||
slog.String("id", a.ID),
|
slog.String("id", a.ID),
|
||||||
slog.Any("error", err),
|
slog.Any("error", err),
|
||||||
|
|
|
@ -144,7 +144,7 @@ func (a *atom10Entry) entryDate() time.Time {
|
||||||
if dateText != "" {
|
if dateText != "" {
|
||||||
result, err := date.Parse(dateText)
|
result, err := date.Parse(dateText)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("Unable to parse date from Atom 0.3 feed",
|
slog.Debug("Unable to parse date from Atom 0.3 feed",
|
||||||
slog.String("date", dateText),
|
slog.String("date", dateText),
|
||||||
slog.String("id", a.ID),
|
slog.String("id", a.ID),
|
||||||
slog.Any("error", err),
|
slog.Any("error", err),
|
||||||
|
|
|
@ -219,6 +219,10 @@ var dateFormats = []string{
|
||||||
"Mon, 2rd Jan 2006 15:04:05 MST",
|
"Mon, 2rd Jan 2006 15:04:05 MST",
|
||||||
"Mon, 2nd Jan 2006 15:04:05 MST",
|
"Mon, 2nd Jan 2006 15:04:05 MST",
|
||||||
"Mon, 2st Jan 2006 15:04:05 MST",
|
"Mon, 2st Jan 2006 15:04:05 MST",
|
||||||
|
"Mon, Jan 02 2006 03:04:05 PM",
|
||||||
|
"Monday, January 2, 2006 - 15:04",
|
||||||
|
"01/02/06 15:04:05",
|
||||||
|
"02.01.06",
|
||||||
}
|
}
|
||||||
|
|
||||||
var invalidTimezoneReplacer = strings.NewReplacer(
|
var invalidTimezoneReplacer = strings.NewReplacer(
|
||||||
|
@ -309,6 +313,7 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
|
||||||
// Parse parses a given date string using a large
|
// Parse parses a given date string using a large
|
||||||
// list of commonly found feed date formats.
|
// list of commonly found feed date formats.
|
||||||
func Parse(rawInput string) (t time.Time, err error) {
|
func Parse(rawInput string) (t time.Time, err error) {
|
||||||
|
rawInput = strings.TrimSpace(rawInput)
|
||||||
timestamp, err := strconv.ParseInt(rawInput, 10, 64)
|
timestamp, err := strconv.ParseInt(rawInput, 10, 64)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return time.Unix(timestamp, 0), nil
|
return time.Unix(timestamp, 0), nil
|
||||||
|
@ -316,7 +321,6 @@ func Parse(rawInput string) (t time.Time, err error) {
|
||||||
|
|
||||||
processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
|
processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
|
||||||
processedInput = invalidTimezoneReplacer.Replace(processedInput)
|
processedInput = invalidTimezoneReplacer.Replace(processedInput)
|
||||||
processedInput = strings.TrimSpace(processedInput)
|
|
||||||
if processedInput == "" {
|
if processedInput == "" {
|
||||||
return t, errors.New(`date parser: empty value`)
|
return t, errors.New(`date parser: empty value`)
|
||||||
}
|
}
|
||||||
|
|
|
@ -214,11 +214,15 @@ func TestParseWeirdDateFormat(t *testing.T) {
|
||||||
"Jun 23, 2023 19:00 GMT",
|
"Jun 23, 2023 19:00 GMT",
|
||||||
"09/15/2014 4:20 pm PST",
|
"09/15/2014 4:20 pm PST",
|
||||||
"Fri, 23rd Jun 2023 09:32:20 GMT",
|
"Fri, 23rd Jun 2023 09:32:20 GMT",
|
||||||
|
"Sat, Oct 28 2023 08:28:28 PM",
|
||||||
|
"Monday, October 6, 2023 - 16:29\n",
|
||||||
|
"10/30/23 21:55:58",
|
||||||
|
"30.10.23",
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, date := range dates {
|
for _, date := range dates {
|
||||||
if _, err := Parse(date); err != nil {
|
if _, err := Parse(date); err != nil {
|
||||||
t.Errorf(`Unable to parse date: %q`, date)
|
t.Errorf(`Unable to parse date: %q (%v)`, date, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,7 +110,7 @@ func (j *jsonItem) GetDate() time.Time {
|
||||||
if value != "" {
|
if value != "" {
|
||||||
d, err := date.Parse(value)
|
d, err := date.Parse(value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("Unable to parse date from JSON feed",
|
slog.Debug("Unable to parse date from JSON feed",
|
||||||
slog.String("date", value),
|
slog.String("date", value),
|
||||||
slog.String("url", j.URL),
|
slog.String("url", j.URL),
|
||||||
slog.Any("error", err),
|
slog.Any("error", err),
|
||||||
|
|
|
@ -100,7 +100,7 @@ func (r *rdfItem) entryDate() time.Time {
|
||||||
if r.DublinCoreDate != "" {
|
if r.DublinCoreDate != "" {
|
||||||
result, err := date.Parse(r.DublinCoreDate)
|
result, err := date.Parse(r.DublinCoreDate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("Unable to parse date from RDF feed",
|
slog.Debug("Unable to parse date from RDF feed",
|
||||||
slog.String("date", r.DublinCoreDate),
|
slog.String("date", r.DublinCoreDate),
|
||||||
slog.String("link", r.Link),
|
slog.String("link", r.Link),
|
||||||
slog.Any("error", err),
|
slog.Any("error", err),
|
||||||
|
|
|
@ -235,7 +235,7 @@ func (r *rssItem) entryDate() time.Time {
|
||||||
if value != "" {
|
if value != "" {
|
||||||
result, err := date.Parse(value)
|
result, err := date.Parse(value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Warn("Unable to parse date from RSS feed",
|
slog.Debug("Unable to parse date from RSS feed",
|
||||||
slog.String("date", value),
|
slog.String("date", value),
|
||||||
slog.String("guid", r.GUID.Data),
|
slog.String("guid", r.GUID.Data),
|
||||||
slog.Any("error", err),
|
slog.Any("error", err),
|
||||||
|
|
Loading…
Reference in a new issue