Update date parser to parse more invalid date formats
This commit is contained in:
parent
500c60b807
commit
e3eaaea15a
7 changed files with 15 additions and 7 deletions
|
@ -126,7 +126,7 @@ func (a *atom03Entry) entryDate() time.Time {
|
|||
if dateText != "" {
|
||||
result, err := date.Parse(dateText)
|
||||
if err != nil {
|
||||
slog.Warn("Unable to parse date from Atom 0.3 feed",
|
||||
slog.Debug("Unable to parse date from Atom 0.3 feed",
|
||||
slog.String("date", dateText),
|
||||
slog.String("id", a.ID),
|
||||
slog.Any("error", err),
|
||||
|
|
|
@ -144,7 +144,7 @@ func (a *atom10Entry) entryDate() time.Time {
|
|||
if dateText != "" {
|
||||
result, err := date.Parse(dateText)
|
||||
if err != nil {
|
||||
slog.Warn("Unable to parse date from Atom 0.3 feed",
|
||||
slog.Debug("Unable to parse date from Atom 0.3 feed",
|
||||
slog.String("date", dateText),
|
||||
slog.String("id", a.ID),
|
||||
slog.Any("error", err),
|
||||
|
|
|
@ -219,6 +219,10 @@ var dateFormats = []string{
|
|||
"Mon, 2rd Jan 2006 15:04:05 MST",
|
||||
"Mon, 2nd Jan 2006 15:04:05 MST",
|
||||
"Mon, 2st Jan 2006 15:04:05 MST",
|
||||
"Mon, Jan 02 2006 03:04:05 PM",
|
||||
"Monday, January 2, 2006 - 15:04",
|
||||
"01/02/06 15:04:05",
|
||||
"02.01.06",
|
||||
}
|
||||
|
||||
var invalidTimezoneReplacer = strings.NewReplacer(
|
||||
|
@ -309,6 +313,7 @@ var invalidLocalizedDateReplacer = strings.NewReplacer(
|
|||
// Parse parses a given date string using a large
|
||||
// list of commonly found feed date formats.
|
||||
func Parse(rawInput string) (t time.Time, err error) {
|
||||
rawInput = strings.TrimSpace(rawInput)
|
||||
timestamp, err := strconv.ParseInt(rawInput, 10, 64)
|
||||
if err == nil {
|
||||
return time.Unix(timestamp, 0), nil
|
||||
|
@ -316,7 +321,6 @@ func Parse(rawInput string) (t time.Time, err error) {
|
|||
|
||||
processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
|
||||
processedInput = invalidTimezoneReplacer.Replace(processedInput)
|
||||
processedInput = strings.TrimSpace(processedInput)
|
||||
if processedInput == "" {
|
||||
return t, errors.New(`date parser: empty value`)
|
||||
}
|
||||
|
|
|
@ -214,11 +214,15 @@ func TestParseWeirdDateFormat(t *testing.T) {
|
|||
"Jun 23, 2023 19:00 GMT",
|
||||
"09/15/2014 4:20 pm PST",
|
||||
"Fri, 23rd Jun 2023 09:32:20 GMT",
|
||||
"Sat, Oct 28 2023 08:28:28 PM",
|
||||
"Monday, October 6, 2023 - 16:29\n",
|
||||
"10/30/23 21:55:58",
|
||||
"30.10.23",
|
||||
}
|
||||
|
||||
for _, date := range dates {
|
||||
if _, err := Parse(date); err != nil {
|
||||
t.Errorf(`Unable to parse date: %q`, date)
|
||||
t.Errorf(`Unable to parse date: %q (%v)`, date, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ func (j *jsonItem) GetDate() time.Time {
|
|||
if value != "" {
|
||||
d, err := date.Parse(value)
|
||||
if err != nil {
|
||||
slog.Warn("Unable to parse date from JSON feed",
|
||||
slog.Debug("Unable to parse date from JSON feed",
|
||||
slog.String("date", value),
|
||||
slog.String("url", j.URL),
|
||||
slog.Any("error", err),
|
||||
|
|
|
@ -100,7 +100,7 @@ func (r *rdfItem) entryDate() time.Time {
|
|||
if r.DublinCoreDate != "" {
|
||||
result, err := date.Parse(r.DublinCoreDate)
|
||||
if err != nil {
|
||||
slog.Warn("Unable to parse date from RDF feed",
|
||||
slog.Debug("Unable to parse date from RDF feed",
|
||||
slog.String("date", r.DublinCoreDate),
|
||||
slog.String("link", r.Link),
|
||||
slog.Any("error", err),
|
||||
|
|
|
@ -235,7 +235,7 @@ func (r *rssItem) entryDate() time.Time {
|
|||
if value != "" {
|
||||
result, err := date.Parse(value)
|
||||
if err != nil {
|
||||
slog.Warn("Unable to parse date from RSS feed",
|
||||
slog.Debug("Unable to parse date from RSS feed",
|
||||
slog.String("date", value),
|
||||
slog.String("guid", r.GUID.Data),
|
||||
slog.Any("error", err),
|
||||
|
|
Loading…
Reference in a new issue