1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- package main
- import (
- "fmt"
- "log"
- "os"
- "strings"
- "golang.org/x/net/html"
- )
- func main() {
- fi, err := os.Open(os.Args[1])
- if err != nil {
- log.Fatal(err)
- }
- doc, err := html.Parse(fi)
- if err != nil {
- log.Fatal(err)
- }
- content := findContent(doc)
- fmt.Println(extractText(content))
- }
- func findContent(n *html.Node) *html.Node {
- for _, attr := range n.Attr {
- if attr.Key == "id" && attr.Val == "content" {
- return n
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- if found := findContent(c); found != nil {
- return found
- }
- }
- return nil
- }
- func extractText(n *html.Node) string {
- text := ""
- if n.Type == html.TextNode {
- data := strings.TrimSpace(n.Data)
- if data != "" {
- text += data + "\n"
- }
- }
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- extr := strings.TrimSpace(extractText(c))
- if extr != "" {
- text += extr + "\n"
- }
- }
- return text
- }
|