2017-11-20 06:10:04 +01:00
|
|
|
package goquery_test
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"log"
|
2018-07-07 06:18:14 +02:00
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"strings"
|
2017-11-20 06:10:04 +01:00
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
)
|
|
|
|
|
|
|
|
// This example scrapes the reviews shown on the home page of metalsucks.net.
|
|
|
|
func Example() {
|
2018-07-07 06:18:14 +02:00
|
|
|
// Request the HTML page.
|
|
|
|
res, err := http.Get("http://metalsucks.net")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
defer res.Body.Close()
|
|
|
|
if res.StatusCode != 200 {
|
|
|
|
log.Fatalf("status code error: %d %s", res.StatusCode, res.Status)
|
|
|
|
}
|
|
|
|
|
2017-11-20 06:10:04 +01:00
|
|
|
// Load the HTML document
|
2018-07-07 06:18:14 +02:00
|
|
|
doc, err := goquery.NewDocumentFromReader(res.Body)
|
2017-11-20 06:10:04 +01:00
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the review items
|
|
|
|
doc.Find(".sidebar-reviews article .content-block").Each(func(i int, s *goquery.Selection) {
|
|
|
|
// For each item found, get the band and title
|
|
|
|
band := s.Find("a").Text()
|
|
|
|
title := s.Find("i").Text()
|
|
|
|
fmt.Printf("Review %d: %s - %s\n", i, band, title)
|
|
|
|
})
|
|
|
|
// To see the output of the Example while running the test suite (go test), simply
|
|
|
|
// remove the leading "x" before Output on the next line. This will cause the
|
|
|
|
// example to fail (all the "real" tests should pass).
|
|
|
|
|
|
|
|
// xOutput: voluntarily fail the Example output.
|
|
|
|
}
|
2018-07-07 06:18:14 +02:00
|
|
|
|
|
|
|
// This example shows how to use NewDocumentFromReader from a file.
|
|
|
|
func ExampleNewDocumentFromReader_file() {
|
|
|
|
// create from a file
|
|
|
|
f, err := os.Open("some/file.html")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
doc, err := goquery.NewDocumentFromReader(f)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
// use the goquery document...
|
|
|
|
_ = doc.Find("h1")
|
|
|
|
}
|
|
|
|
|
|
|
|
// This example shows how to use NewDocumentFromReader from a string.
|
|
|
|
func ExampleNewDocumentFromReader_string() {
|
|
|
|
// create from a string
|
|
|
|
data := `
|
|
|
|
<html>
|
|
|
|
<head>
|
|
|
|
<title>My document</title>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<h1>Header</h1>
|
|
|
|
</body>
|
|
|
|
</html>`
|
|
|
|
|
|
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
header := doc.Find("h1").Text()
|
|
|
|
fmt.Println(header)
|
|
|
|
|
|
|
|
// Output: Header
|
|
|
|
}
|