package main import ( "fmt" "log" "os" "strings" "golang.org/x/net/html" ) func main() { fi, err := os.Open(os.Args[1]) if err != nil { log.Fatal(err) } doc, err := html.Parse(fi) if err != nil { log.Fatal(err) } content := findContent(doc) fmt.Println(extractText(content)) } func findContent(n *html.Node) *html.Node { for _, attr := range n.Attr { if attr.Key == "id" && attr.Val == "content" { return n } } for c := n.FirstChild; c != nil; c = c.NextSibling { if found := findContent(c); found != nil { return found } } return nil } func extractText(n *html.Node) string { text := "" if n.Type == html.TextNode { data := strings.TrimSpace(n.Data) if data != "" { text += data + "\n" } } for c := n.FirstChild; c != nil; c = c.NextSibling { extr := strings.TrimSpace(extractText(c)) if extr != "" { text += extr + "\n" } } return text }