Keep other table rows and columns

This commit is contained in:
Jake Walker 2023-04-01 10:02:58 +01:00 committed by Frédéric Guillot
parent 49d2596fc6
commit 8b6dd3e599
2 changed files with 19 additions and 18 deletions

View file

@ -342,25 +342,26 @@ func removeTables(entryContent string) string {
return entryContent
}
var table *goquery.Selection
selectors := []string{"table", "tbody", "thead", "td", "th", "td"}
for {
table = doc.Find("table").First()
var loopElement *goquery.Selection
if table.Length() == 0 {
break
for _, selector := range selectors {
for {
loopElement = doc.Find(selector).First()
if loopElement.Length() == 0 {
break
}
innerHtml, err := loopElement.Html()
if err != nil {
break
}
loopElement.Parent().AppendHtml(innerHtml)
loopElement.Remove()
}
td := table.Find("td").First()
if td.Length() == 0 {
break
}
tdHtml, _ := td.Html()
table.Parent().AppendHtml(tdHtml)
table.Remove()
}
output, _ := doc.Find("body").First().Html()

View file

@ -327,8 +327,8 @@ func TestRewriteBase64DecodeArgs(t *testing.T) {
}
func TestRewriteRemoveTables(t *testing.T) {
content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td>Hello World!</td></tr></tbody></table></td></tr></tbody></table>`
expected := `<p>Test</p>Hello World!`
content := `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`
expected := `<p>Test</p><p>Hello World!</p><p>Test</p>`
output := Rewriter("https://example.org/article", content, `remove_tables`)
if expected != output {