package siteFavicon import ( "errors" "fmt" "net/http" "net/url" "regexp" "strconv" "strings" "github.com/PuerkitoBio/goquery" ) func IsHTTPURL(url string) bool { httpPattern := `^(http://|https://)` match, err := regexp.MatchString(httpPattern, url) if err != nil { return false } return match } func GetOneFaviconURL(urlStr string) (string, bool) { iconURLs, err := getFaviconURL(urlStr) if err != nil { fmt.Println("Error:", err) return "", false } for _, v := range iconURLs { // 标准的路径地址 if IsHTTPURL(v) { return v, true } else { urlInfo, _ := url.Parse(urlStr) fullUrl := urlInfo.Scheme + "://" + urlInfo.Host + "/" + strings.TrimPrefix(v, "/") return fullUrl, true } } return "", false } func getFaviconURL(url string) ([]string, error) { var icons []string icons = make([]string, 0) resp, err := http.Get(url) if err != nil { return icons, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return icons, errors.New("HTTP request failed with status code " + strconv.Itoa(resp.StatusCode)) } doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return icons, err } // 查找所有link标签,筛选包含rel属性为"icon"的标签 doc.Find("link").Each(func(i int, s *goquery.Selection) { rel, _ := s.Attr("rel") href, _ := s.Attr("href") if strings.Contains(rel, "icon") && href != "" { // fmt.Println(href) icons = append(icons, href) } }) if len(icons) == 0 { return icons, fmt.Errorf("favicon not found on the page") } return icons, nil }