30 Star 70 Fork 15

andeyalee / surfer

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
surfer.go 3.09 KB
一键复制 编辑 原始数据 按行查看 历史
// Copyright 2015 henrylee2cn Author. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Package surfer is a high level concurrency http client.
//
// It has `surf` and` phantom` download engines, highly simulated browser behavior, the function of analog login and so on.
//
// Features:
// - Both surf and phantomjs engines are supported
// - Support random User-Agent
// - Support cache cookie
// - Support http/https
//
// Usage:
// package main
//
// import (
// "github.com/henrylee2cn/surfer"
// "io/ioutil"
// "log"
// )
//
// func main() {
// // Use surf engine
// resp, err := surfer.Download(&surfer.Request{
// Url: "http://github.com/henrylee2cn/surfer",
// })
// if err != nil {
// log.Fatal(err)
// }
// b, err := ioutil.ReadAll(resp.Body)
// log.Println(string(b), err)
//
// // Use phantomjs engine
// resp, err = surfer.Download(&surfer.Request{
// Url: "http://github.com/henrylee2cn",
// DownloaderID: 1,
// })
// if err != nil {
// log.Fatal(err)
// }
// b, err = ioutil.ReadAll(resp.Body)
// log.Println(string(b), err)
// resp.Body.Close()
// surfer.DestroyJsFiles()
// }
package surfer
import (
"net/http"
"net/http/cookiejar"
"sync"
)
var (
surf Surfer
phantom Surfer
once_surf sync.Once
once_phantom sync.Once
tempJsDir = "./tmp"
phantomjsFile = "./phantomjs"
cookieJar, _ = cookiejar.New(nil)
)
// Download 实现surfer下载器接口
func Download(req *Request) (resp *http.Response, err error) {
switch req.DownloaderID {
case SurfID:
once_surf.Do(func() { surf = New(cookieJar) })
resp, err = surf.Download(req)
case PhomtomJsID:
once_phantom.Do(func() { phantom = NewPhantom(phantomjsFile, tempJsDir, cookieJar) })
resp, err = phantom.Download(req)
}
return
}
// 指定phantomjs可执行文件的位置
func SetPhantomJsFilePath(filePath string) {
phantomjsFile = filePath
}
// DestroyJsFiles 销毁Phantomjs的js临时文件
func DestroyJsFiles() {
if pt, ok := phantom.(*Phantom); ok {
pt.DestroyJsFiles()
}
}
// Surfer represents an core of HTTP web browser for crawler.
type Surfer interface {
// GET @param url string, header http.Header, cookies []*http.Cookie
// HEAD @param url string, header http.Header, cookies []*http.Cookie
// POST PostForm @param url, referer string, values url.Values, header http.Header, cookies []*http.Cookie
// POST-M PostMultipart @param url, referer string, values url.Values, header http.Header, cookies []*http.Cookie
Download(*Request) (resp *http.Response, err error)
}
Go
1
https://gitee.com/andeyalee/surfer.git
git@gitee.com:andeyalee/surfer.git
andeyalee
surfer
surfer
master

搜索帮助