browser
-- import "github.com/headzoo/surf/browser"
Package browser contains the primary browser implementation.
Usage
var InitialAssetsSliceSize = 20
InitialAssetsArraySize is the initial size when allocating a slice of page assets. Increasing this size may lead to a very small performance increase when downloading assets from a page with a lot of assets.
func DownloadAsset
func DownloadAsset(asset Downloadable, out io.Writer) (int64, error)
DownloadAsset copies a remote file to the given writer.
func DownloadAssetAsync
func DownloadAssetAsync(asset Downloadable, out io.Writer, c AsyncDownloadChannel)
DownloadAssetAsync downloads an asset asynchronously and notifies the given channel when the download is complete.
type Asset
type Asset struct {
// ID is the value of the id attribute if available.
ID string
// URL is the asset URL.
URL *url.URL
// Type describes the type of asset.
Type AssetType
}
Asset implements Assetable.
func (*Asset) AssetType
func (at *Asset) AssetType() AssetType
Type returns the asset type.
func (*Asset) Id
func (at *Asset) Id() string
Id returns the asset ID or an empty string when not available.
func (*Asset) Url
func (at *Asset) Url() *url.URL
Url returns the asset URL.
type AssetType
type AssetType uint16
AssetType describes a type of page asset, such as an image or stylesheet.
const (
// LinkAsset describes a *Link asset.
LinkAsset AssetType = iota
// ImageAsset describes an *Image asset.
ImageAsset
// StylesheetAsset describes a *Stylesheet asset.
StylesheetAsset
// ScriptAsset describes a *Script asset.
ScriptAsset
)
type Assetable
type Assetable interface {
// Url returns the asset URL.
Url() *url.URL
// Id returns the asset ID or an empty string when not available.
Id() string
// Type describes the type of asset.
AssetType() AssetType
}
Assetable represents a page asset, such as an image or stylesheet.
type AsyncDownloadChannel
type AsyncDownloadChannel chan *AsyncDownloadResult
AsyncDownloadChannel is a channel upon which the results of an async download are passed.
type AsyncDownloadResult
type AsyncDownloadResult struct {
// Asset is a pointer to the Downloadable asset that was downloaded.
Asset Downloadable
// Writer where the asset data was written.
Writer io.Writer
// Size is the number of bytes written to the io.Writer.
Size int64
// Error contains any error that occurred during the download or nil.
Error error
}
AsyncDownloadResult has the results of an asynchronous download.
type Attribute
type Attribute int
Attribute represents a Browser capability.
const (
// SendRefererAttribute instructs a Browser to send the Referer header.
SendReferer Attribute = iota
// MetaRefreshHandlingAttribute instructs a Browser to handle the refresh meta tag.
MetaRefreshHandling
// FollowRedirectsAttribute instructs a Browser to follow Location headers.
FollowRedirects
)
type AttributeMap
type AttributeMap map[Attribute]bool
AttributeMap represents a map of Attribute values.
type Browsable
type Browsable interface {
// SetUserAgent sets the user agent.
SetUserAgent(ua string)
// SetAttribute sets a browser instruction attribute.
SetAttribute(a Attribute, v bool)
// SetAttributes is used to set all the browser attributes.
SetAttributes(a AttributeMap)
// SetState sets the init browser state.
SetState(sj *jar.State)
// SetBookmarksJar sets the bookmarks jar the browser uses.
SetBookmarksJar(bj jar.BookmarksJar)
// SetCookieJar is used to set the cookie jar the browser uses.
SetCookieJar(cj http.CookieJar)
// SetHistoryJar is used to set the history jar the browser uses.
SetHistoryJar(hj jar.History)
// SetHeadersJar sets the headers the browser sends with each request.
SetHeadersJar(h http.Header)
// SetTimeout sets the timeout for requests.
SetTimeout(t time.Duration)
// SetTransport sets the http library transport mechanism for each request.
SetTransport(rt http.RoundTripper)
// AddRequestHeader adds a header the browser sends with each request.
AddRequestHeader(name, value string)
// Open requests the given URL using the GET method.
Open(url string) error
// Open requests the given URL using the HEAD method.
Head(url string) error
// OpenForm appends the data values to the given URL and sends a GET request.
OpenForm(url string, data url.Values) error
// OpenBookmark calls Get() with the URL for the bookmark with the given name.
OpenBookmark(name string) error
// Post requests the given URL using the POST method.
Post(url string, contentType string, body io.Reader) error
// PostForm requests the given URL using the POST method with the given data.
PostForm(url string, data url.Values) error
// PostMultipart requests the given URL using the POST method with the given data using multipart/form-data format.
PostMultipart(u string, fields url.Values, files FileSet) error
// Back loads the previously requested page.
Back() bool
// Reload duplicates the last successful request.
Reload() error
// Bookmark saves the page URL in the bookmarks with the given name.
Bookmark(name string) error
// Click clicks on the page element matched by the given expression.
Click(expr string) error
// Form returns the form in the current page that matches the given expr.
Form(expr string) (Submittable, error)
// Forms returns an array of every form in the page.
Forms() []Submittable
// Links returns an array of every link found in the page.
Links() []*Link
// Images returns an array of every image found in the page.
Images() []*Image
// Stylesheets returns an array of every stylesheet linked to the document.
Stylesheets() []*Stylesheet
// Scripts returns an array of every script linked to the document.
Scripts() []*Script
// SiteCookies returns the cookies for the current site.
SiteCookies() []*http.Cookie
// ResolveUrl returns an absolute URL for a possibly relative URL.
ResolveUrl(u *url.URL) *url.URL
// ResolveStringUrl works just like ResolveUrl, but the argument and return value are strings.
ResolveStringUrl(u string) (string, error)
// Download writes the contents of the document to the given writer.
Download(o io.Writer) (int64, error)
// Url returns the page URL as a string.
Url() *url.URL
// StatusCode returns the response status code.
StatusCode() int
// Title returns the page title.
Title() string
// ResponseHeaders returns the page headers.
ResponseHeaders() http.Header
// Body returns the page body as a string of html.
Body() string
// Dom returns the inner *goquery.Selection.
Dom() *goquery.Selection
// Find returns the dom selections matching the given expression.
Find(expr string) *goquery.Selection
}
Browsable represents an HTTP web browser.
type Browser
type Browser struct {
}
Default is the default Browser implementation.
func (*Browser) AddRequestHeader
func (bow *Browser) AddRequestHeader(name, value string)
AddRequestHeader sets a header the browser sends with each request.
func (*Browser) Back
func (bow *Browser) Back() bool
Back loads the previously requested page.
Returns a boolean value indicating whether a previous page existed, and was successfully loaded.
func (*Browser) Body
func (bow *Browser) Body() string
Body returns the page body as a string of html.
func (*Browser) Bookmark
func (bow *Browser) Bookmark(name string) error
Bookmark saves the page URL in the bookmarks with the given name.
func (*Browser) Click
func (bow *Browser) Click(expr string) error
Click clicks on the page element matched by the given expression.
Currently this is only useful for click on links, which will cause the browser to load the page pointed at by the link. Future versions of Surf may support JavaScript and clicking on elements will fire the click event.
func (*Browser) DelRequestHeader
func (bow *Browser) DelRequestHeader(name string)
DelRequestHeader deletes a header so the browser will not send it with future requests.
func (*Browser) Dom
func (bow *Browser) Dom() *goquery.Selection
Dom returns the inner *goquery.Selection.
func (*Browser) Download
func (bow *Browser) Download(o io.Writer) (int64, error)
Download writes the contents of the document to the given writer.
func (*Browser) Find
func (bow *Browser) Find(expr string) *goquery.Selection
Find returns the dom selections matching the given expression.
func (*Browser) Form
func (bow *Browser) Form(expr string) (Submittable, error)
Form returns the form in the current page that matches the given expr.
func (*Browser) Forms
func (bow *Browser) Forms() []Submittable
Forms returns an array of every form in the page.
func (*Browser) Head
func (bow *Browser) Head(u string) error
Open requests the given URL using the HEAD method.
func (*Browser) Images
func (bow *Browser) Images() []*Image
Images returns an array of every image found in the page.
func (*Browser) Links
func (bow *Browser) Links() []*Link
Links returns an array of every link found in the page.
func (*Browser) Open
func (bow *Browser) Open(u string) error
Open requests the given URL using the GET method.
func (*Browser) OpenBookmark
func (bow *Browser) OpenBookmark(name string) error
OpenBookmark calls Open() with the URL for the bookmark with the given name.
func (*Browser) OpenForm
func (bow *Browser) OpenForm(u string, data url.Values) error
OpenForm appends the data values to the given URL and sends a GET request.
func (*Browser) Post
func (bow *Browser) Post(u string, contentType string, body io.Reader) error
Post requests the given URL using the POST method.
func (*Browser) PostForm
func (bow *Browser) PostForm(u string, data url.Values) error
PostForm requests the given URL using the POST method with the given data.
func (*Browser) PostMultipart
func (bow *Browser) PostMultipart(u string, fields url.Values, files FileSet) error
PostMultipart requests the given URL using the POST method with the given data using multipart/form-data format.
func (*Browser) Reload
func (bow *Browser) Reload() error
Reload duplicates the last successful request.
func (*Browser) ResolveStringUrl
func (bow *Browser) ResolveStringUrl(u string) (string, error)
ResolveStringUrl works just like ResolveUrl, but the argument and return value are strings.
func (*Browser) ResolveUrl
func (bow *Browser) ResolveUrl(u *url.URL) *url.URL
ResolveUrl returns an absolute URL for a possibly relative URL.
func (*Browser) ResponseHeaders
func (bow *Browser) ResponseHeaders() http.Header
ResponseHeaders returns the page headers.
func (*Browser) Scripts
func (bow *Browser) Scripts() []*Script
Scripts returns an array of every script linked to the document.
func (*Browser) SetAttribute
func (bow *Browser) SetAttribute(a Attribute, v bool)
SetAttribute sets a browser instruction attribute.
func (*Browser) SetAttributes
func (bow *Browser) SetAttributes(a AttributeMap)
SetAttributes is used to set all the browser attributes.
func (*Browser) SetBookmarksJar
func (bow *Browser) SetBookmarksJar(bj jar.BookmarksJar)
SetBookmarksJar sets the bookmarks jar the browser uses.
func (*Browser) SetCookieJar
func (bow *Browser) SetCookieJar(cj http.CookieJar)
SetCookieJar is used to set the cookie jar the browser uses.
func (*Browser) SetHeadersJar
func (bow *Browser) SetHeadersJar(h http.Header)
SetHeadersJar sets the headers the browser sends with each request.
func (*Browser) SetHistoryJar
func (bow *Browser) SetHistoryJar(hj jar.History)
SetHistoryJar is used to set the history jar the browser uses.
func (*Browser) SetState
func (bow *Browser) SetState(sj *jar.State)
SetState sets the browser state.
func (*Browser) SetTimeout
func (bow *Browser) SetTimeout(t time.Duration)
SetTimeout sets the timeout for requests.
func (*Browser) SetTransport
func (bow *Browser) SetTransport(rt http.RoundTripper)
SetTransport sets the http library transport mechanism for each request.
func (*Browser) SetUserAgent
func (bow *Browser) SetUserAgent(userAgent string)
SetUserAgent sets the user agent.
func (*Browser) SiteCookies
func (bow *Browser) SiteCookies() []*http.Cookie
SiteCookies returns the cookies for the current site.
func (*Browser) StatusCode
func (bow *Browser) StatusCode() int
StatusCode returns the response status code.
func (*Browser) Stylesheets
func (bow *Browser) Stylesheets() []*Stylesheet
Stylesheets returns an array of every stylesheet linked to the document.
func (*Browser) Title
func (bow *Browser) Title() string
Title returns the page title.
func (*Browser) Url
func (bow *Browser) Url() *url.URL
Url returns the page URL as a string.
type Downloadable
type Downloadable interface {
Assetable
// Download writes the contents of the element to the given writer.
//
// Returns the number of bytes written.
Download(out io.Writer) (int64, error)
// DownloadAsync downloads the contents of the element asynchronously.
//
// An instance of AsyncDownloadResult will be sent down the given channel
// when the download is complete.
DownloadAsync(out io.Writer, ch AsyncDownloadChannel)
}
Downloadable represents an asset that may be downloaded.
type DownloadableAsset
type DownloadableAsset struct {
Asset
}
DownloadableAsset is an asset that may be downloaded.
func (*DownloadableAsset) Download
func (at *DownloadableAsset) Download(out io.Writer) (int64, error)
Download writes the asset to the given io.Writer type.
func (*DownloadableAsset) DownloadAsync
func (at *DownloadableAsset) DownloadAsync(out io.Writer, ch AsyncDownloadChannel)
DownloadAsync downloads the asset asynchronously.
type File
type File struct {
}
File represents a input type file, that includes the fileName and a io.reader
type FileSet
type FileSet map[string]*File
FileSet represents a map of files used to port multipart
type Form
type Form struct {
}
Form is the default form element.
func NewForm
func NewForm(bow Browsable, s *goquery.Selection) *Form
NewForm creates and returns a *Form type.
func (*Form) Action
func (f *Form) Action() string
Action returns the form action URL. The URL will always be absolute.
func (*Form) Click
func (f *Form) Click(button string) error
Click submits the form by clicking the button with the given name.
func (*Form) ClickByValue
func (f *Form) ClickByValue(name, value string) error
Click submits the form by clicking the button with the given name and value.
func (*Form) Dom
func (f *Form) Dom() *goquery.Selection
Dom returns the inner *goquery.Selection.
func (*Form) File
func (f *Form) File(name string, fileName string, data io.Reader) error
File sets the value for an form input type file, it returns an ElementNotFound error if the field does not exists
func (*Form) Input
func (f *Form) Input(name, value string) error
Input sets the value of a form field. it returns an ElementNotFound error if the field does not exists
func (*Form) Method
func (f *Form) Method() string
Method returns the form method, eg "GET" or "POST".
func (*Form) Set
func (f *Form) Set(name, value string) error
Set will set the value of a form field if it exists, or create and set it if it does not.
func (*Form) SetFile
func (f *Form) SetFile(name string, fileName string, data io.Reader)
SetFile sets the value for an form input type file, It adds the field to the form if necessary
func (*Form) Submit
func (f *Form) Submit() error
Submit submits the form. Clicks the first button in the form, or submits the form without using any button when the form does not contain any buttons.
type Image
type Image struct {
DownloadableAsset
// Alt is the value of the image alt attribute if available.
Alt string
// Title is the value of the image title attribute if available.
Title string
}
Image stores the properties of an image.
func NewImageAsset
func NewImageAsset(url *url.URL, id, alt, title string) *Image
NewImageAsset creates and returns a new *Image type.
type Link
type Link struct {
Asset
// Text is the text appearing between the opening and closing anchor tag.
Text string
}
Link stores the properties of a page link.
func NewLinkAsset
func NewLinkAsset(u *url.URL, id, text string) *Link
NewLinkAsset creates and returns a new *Link type.
type Script
type Script struct {
DownloadableAsset
// Type is the value of the type attribute. Defaults to "text/javascript" when not specified.
Type string
}
Script stores the properties of a linked script.
func NewScriptAsset
func NewScriptAsset(url *url.URL, id, typ string) *Script
NewScriptAsset creates and returns a new *Script type.
type Stylesheet
type Stylesheet struct {
DownloadableAsset
// Media is the value of the media attribute. Defaults to "all" when not specified.
Media string
// Type is the value of the type attribute. Defaults to "text/css" when not specified.
Type string
}
Stylesheet stores the properties of a linked stylesheet.
func NewStylesheetAsset
func NewStylesheetAsset(url *url.URL, id, media, typ string) *Stylesheet
NewStylesheetAsset creates and returns a new *Stylesheet type.
type Submittable
type Submittable interface {
Method() string
Action() string
Input(name, value string) error
Set(name, value string) error
File(name string, fileName string, data io.Reader) error
SetFile(name string, fileName string, data io.Reader)
Click(button string) error
ClickByValue(name, value string) error
Submit() error
Dom() *goquery.Selection
}
Submittable represents an element that may be submitted, such as a form.