From 7f040a3618749adf0b485c8c68f01764c8194a55 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Tue, 26 Nov 2019 13:26:22 +1100 Subject: [PATCH 01/16] add test for simpletracker --- tests/chromedp_test.go | 13 ++++++++++++- tests/simpletracker_test.go | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 tests/simpletracker_test.go diff --git a/tests/chromedp_test.go b/tests/chromedp_test.go index bf28ff3..6eb2f8e 100644 --- a/tests/chromedp_test.go +++ b/tests/chromedp_test.go @@ -7,7 +7,7 @@ import ( "github.com/xiahongze/pricetracker/trackers" ) -func TestChromedp(t *testing.T) { +func TestColes(t *testing.T) { url := "https://shop.coles.com.au/a/a-nsw-metro-westmead/product/goldn-canola-canola-oil" xpath := `//span/strong[@class="product-price"]` price, ok := trackers.ChromeTracker(&url, &xpath) @@ -17,3 +17,14 @@ func TestChromedp(t *testing.T) { } log.Printf("price: %s", price) } + +func TestChemist(t *testing.T) { + url := "https://www.chemistwarehouse.com.au/buy/1062/beconase-hayfever-nasal-spray-200-doses" + xpath := `//div[@class="product__price"]` + price, ok := trackers.ChromeTracker(&url, &xpath) + if !ok { + t.Errorf("can't fetch price from %s with %s", url, xpath) + return + } + log.Printf("price: %s", price) +} diff --git a/tests/simpletracker_test.go b/tests/simpletracker_test.go new file mode 100644 index 0000000..b850506 --- /dev/null +++ b/tests/simpletracker_test.go @@ -0,0 +1,19 @@ +package main + +import ( + "log" + "testing" + + "github.com/xiahongze/pricetracker/trackers" +) + +func TestChemistSimple(t *testing.T) { + url := "https://www.chemistwarehouse.com.au/buy/1062/beconase-hayfever-nasal-spray-200-doses" + xpath := `//div[@class="product__price"]` + price, ok := trackers.SimpleTracker(&url, &xpath) + if !ok { + t.Errorf("can't fetch price from %s with %s", url, xpath) + return + } + log.Printf("price: %s", price) +} From a2906b1bc2df7928f7b52c6d14d0419136f6c9e1 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Fri, 29 Nov 2019 10:54:11 +1100 Subject: [PATCH 02/16] options omittable --- gutils/converters.go | 2 +- models/requests.go | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gutils/converters.go b/gutils/converters.go index 8be97e4..538d27f 100644 --- a/gutils/converters.go +++ b/gutils/converters.go @@ -9,7 +9,7 @@ import ( // ConvReq2Ent converts CreateRequest to Entity in datastore func ConvReq2Ent(req *models.CreateRequest) models.Entity { return models.Entity{ - Options: *req.Options, + Options: req.Options, URL: req.URL, Name: req.Name, XPATH: req.XPATH, diff --git a/models/requests.go b/models/requests.go index 31997f1..178ed33 100644 --- a/models/requests.go +++ b/models/requests.go @@ -15,11 +15,11 @@ type ( // CreateRequest defines the contract to add an entry CreateRequest struct { - URL string `json:"url"` - XPATH string `json:"xpath"` - Name string `json:"name"` - ExpectedPrice string `json:"expectedPrice"` - Options *Options `json:"options"` + URL string `json:"url"` + XPATH string `json:"xpath"` + Name string `json:"name"` + ExpectedPrice string `json:"expectedPrice"` + Options Options `json:"options,omitempty"` } // UpdateRequest defines the contract to update an entry @@ -28,7 +28,7 @@ type ( XPATH string `json:"xpath"` Name string `json:"name"` Key *datastore.Key `json:"key"` - Options *Options `json:"options"` + Options *Options `json:"options,omitempty"` } // ReadOrDelRequest defines the contract to read/delete an entry From 889d540d6fbb5b778db538d3538d1964c7790884 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Fri, 29 Nov 2019 11:38:36 +1100 Subject: [PATCH 03/16] support richer xpath with antchfx/htmlquery --- go.mod | 8 ++--- go.sum | 15 ++++------ tests/simpletracker_test.go | 2 +- trackers/chrome_tracker.go | 1 + trackers/simple_tracker.go | 60 +++++++------------------------------ 5 files changed, 23 insertions(+), 63 deletions(-) diff --git a/go.mod b/go.mod index 743db39..279ae2d 100644 --- a/go.mod +++ b/go.mod @@ -4,24 +4,24 @@ go 1.12 require ( cloud.google.com/go v0.30.0 + github.com/antchfx/htmlquery v1.2.0 + github.com/antchfx/xpath v1.1.2 // indirect github.com/chromedp/cdproto v0.0.0-20191114225735-6626966fbae4 github.com/chromedp/chromedp v0.5.1 + github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 // indirect github.com/google/go-cmp v0.3.1 // indirect github.com/googleapis/gax-go v2.0.0+incompatible // indirect - github.com/kr/pretty v0.1.0 // indirect github.com/labstack/echo v0.0.0-20180911044237-1abaa3049251 github.com/labstack/gommon v0.2.7 // indirect github.com/mattn/go-isatty v0.0.4 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect go.opencensus.io v0.17.0 // indirect golang.org/x/crypto v0.0.0-20181012144002-a92615f3c490 // indirect - golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1 + golang.org/x/net v0.0.0-20181011144130-49bb7cea24b1 // indirect golang.org/x/oauth2 v0.0.0-20181003184128-c57b0facaced // indirect golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e // indirect google.golang.org/api v0.0.0-20181012000736-72df7e5ac770 google.golang.org/appengine v1.2.0 // indirect google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f // indirect google.golang.org/grpc v1.15.0 // indirect - gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect - gopkg.in/xmlpath.v2 v2.0.0-20150820204837-860cbeca3ebc ) diff --git a/go.sum b/go.sum index 41f0df0..edd6878 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,10 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.30.0 h1:xKvyLgk56d0nksWq49J0UyGEeUIicTl4+UBiX1NPX9g= cloud.google.com/go v0.30.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= +github.com/antchfx/htmlquery v1.2.0 h1:oKShnsGlnOHX6t4uj5OHgLKkABcJoqnXpqnscoi9Lpw= +github.com/antchfx/htmlquery v1.2.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8= +github.com/antchfx/xpath v1.1.2 h1:YziPrtM0gEJBnhdUGxYcIVYXZ8FXbtbovxOi+UW/yWQ= +github.com/antchfx/xpath v1.1.2/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/chromedp/cdproto v0.0.0-20191009033829-c22f49c9ff0a/go.mod h1:PfAWWKJqjlGFYJEidUM6aVIWPr0EpobeyVWEEmplX7g= github.com/chromedp/cdproto v0.0.0-20191114225735-6626966fbae4 h1:QD3KxSJ59L2lxG6MXBjNHxiQO2RmxTQ3XcK+wO44WOg= @@ -21,6 +25,8 @@ github.com/gobwas/ws v1.0.2 h1:CoAavW/wd/kulfZmSIBt6p24n4j7tHgNVCjsfHVNUbo= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9 h1:uHTyIjqVhYRhLbJ8nIiOJHkEZZ+5YoOsAbD3sk82NiE= +github.com/golang/groupcache v0.0.0-20191027212112-611e8accdfc9/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= @@ -32,11 +38,6 @@ github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/knq/sysutil v0.0.0-20191005231841-15668db23d08 h1:V0an7KRw92wmJysvFvtqtKMAPmvS5O0jtB0nYo6t+gs= github.com/knq/sysutil v0.0.0-20191005231841-15668db23d08/go.mod h1:dFWs1zEqDjFtnBXsd1vPOZaLsESovai349994nHx3e0= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/labstack/echo v0.0.0-20180911044237-1abaa3049251 h1:4q++nZ4OEtmbHazhA/7i3T9B+CBWtnHpuMMcW55ZjRk= github.com/labstack/echo v0.0.0-20180911044237-1abaa3049251/go.mod h1:rWD2DNQgFb1IY9lVYZVLWn2Ko4dyHZ/LpHORyBLP3hI= github.com/labstack/gommon v0.0.0-20180312174116-6fe1405d73ec/go.mod h1:/tj9csK2iPSBvn+3NLM9e52usepMtrd5ilFYA+wQNJ4= @@ -102,8 +103,4 @@ google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoA google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.15.0 h1:Az/KuahOM4NAidTEuJCv/RonAA7rYsTPkqXVjr+8OOw= google.golang.org/grpc v1.15.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/xmlpath.v2 v2.0.0-20150820204837-860cbeca3ebc h1:LMEBgNcZUqXaP7evD1PZcL6EcDVa2QOFuI+cqM3+AJM= -gopkg.in/xmlpath.v2 v2.0.0-20150820204837-860cbeca3ebc/go.mod h1:N8UOSI6/c2yOpa/XDz3KVUiegocTziPiqNkeNTMiG1k= honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/tests/simpletracker_test.go b/tests/simpletracker_test.go index b850506..5395eb7 100644 --- a/tests/simpletracker_test.go +++ b/tests/simpletracker_test.go @@ -9,7 +9,7 @@ import ( func TestChemistSimple(t *testing.T) { url := "https://www.chemistwarehouse.com.au/buy/1062/beconase-hayfever-nasal-spray-200-doses" - xpath := `//div[@class="product__price"]` + xpath := `//span[@class="product__price"] | //div[@class="product__price"]` price, ok := trackers.SimpleTracker(&url, &xpath) if !ok { t.Errorf("can't fetch price from %s with %s", url, xpath) diff --git a/trackers/chrome_tracker.go b/trackers/chrome_tracker.go index fc1ce05..7909347 100644 --- a/trackers/chrome_tracker.go +++ b/trackers/chrome_tracker.go @@ -77,6 +77,7 @@ func ChromeTracker(url, xpath *string) (string, bool) { ctx, cancel = chromedp.NewContext(ctx) defer cancel() + log.Printf("INFO: loading %s", *url) var res string err := chromedp.Run(ctx, diff --git a/trackers/simple_tracker.go b/trackers/simple_tracker.go index ad22178..e0f3fbd 100644 --- a/trackers/simple_tracker.go +++ b/trackers/simple_tracker.go @@ -1,15 +1,10 @@ package trackers import ( - "bytes" - "io" - "io/ioutil" + "fmt" "log" - "net/http" - "strings" - "golang.org/x/net/html" - "gopkg.in/xmlpath.v2" + "github.com/antchfx/htmlquery" ) // SimpleTracker accepts url and xpath to extract content @@ -19,55 +14,22 @@ func SimpleTracker(url, xpath *string) (content string, ok bool) { if !ok { log.Println(content) } - log.Println("INFO: Found", content, "from", *url) + log.Printf("INFO: Found innerText=%s", content) }() - xpExec, err := xmlpath.Compile(*xpath) + log.Printf("INFO: loading %s", *url) + doc, err := htmlquery.LoadURL(*url) if err != nil { - content = "ERROR: failed to compile xpath %s" + *xpath ok = false - return + content = fmt.Sprintf("WARN: failed to load html with error %v", err) } - - resp, getErr := http.Get(*url) - if getErr != nil { - content = "ERROR: failed to fetch the website" + elem := htmlquery.FindOne(doc, *xpath) + if elem == nil { ok = false - return - } - - body, _ := ioutil.ReadAll(resp.Body) - - // create closure - extractHelper := func(reader io.Reader) { - xmlRoot, xmlErr := xmlpath.ParseHTML(reader) - if xmlErr != nil { - content = "ERROR: parse xml error: " + xmlErr.Error() - ok = false - return - } - content, ok = xpExec.String(xmlRoot) - content = strings.TrimSpace(content) - if !ok { - content = "value not found" - return - } - } - - // step 1. read directly from body - extractHelper(bytes.NewReader(body)) - - // step 2. try clean up HTML and do it again - if !ok { - root, err := html.Parse(bytes.NewReader(body)) - if err != nil { - content = "ERROR: parse html" + err.Error() - return - } - var b bytes.Buffer - html.Render(&b, root) - extractHelper(bytes.NewReader(b.Bytes())) + content = fmt.Sprintf("WARN: failed to find element with `%s`", *xpath) } + ok = true + content = htmlquery.InnerText(elem) return } From ef56874065c2d2721faee32043ef9735fd5c0a23 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Fri, 29 Nov 2019 16:33:14 +1100 Subject: [PATCH 04/16] add travis and update readme --- .travis.yml | 8 ++++++++ README.md | 1 + 2 files changed, 9 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..d799bac --- /dev/null +++ b/.travis.yml @@ -0,0 +1,8 @@ +language: go +go: + - 1.12.x + - 1.13.x + +script: + - go build + - GOARCH=arm GOARM=7 go build \ No newline at end of file diff --git a/README.md b/README.md index 9dd0049..6e4f026 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Price-Tracker +[![Build Status](https://travis-ci.org/xiahongze/pricetracker.svg?branch=master)](https://travis-ci.org/xiahongze/pricetracker) ## Introduction From 8452707aa7307260cd02dcfdd2838af852409d70 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Fri, 29 Nov 2019 17:35:50 +1100 Subject: [PATCH 05/16] enable gomodule --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index d799bac..c9039ac 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,5 +4,6 @@ go: - 1.13.x script: + - export GO111MODULE=on - go build - GOARCH=arm GOARM=7 go build \ No newline at end of file From 188d2bfa8e5959e8f9c6a3963aee8609eb2e1c93 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Fri, 29 Nov 2019 21:11:34 +1100 Subject: [PATCH 06/16] fixed bug in simple tracker --- trackers/simple_tracker.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/trackers/simple_tracker.go b/trackers/simple_tracker.go index e0f3fbd..86dbb6d 100644 --- a/trackers/simple_tracker.go +++ b/trackers/simple_tracker.go @@ -13,6 +13,7 @@ func SimpleTracker(url, xpath *string) (content string, ok bool) { defer func() { if !ok { log.Println(content) + return } log.Printf("INFO: Found innerText=%s", content) }() @@ -22,11 +23,13 @@ func SimpleTracker(url, xpath *string) (content string, ok bool) { if err != nil { ok = false content = fmt.Sprintf("WARN: failed to load html with error %v", err) + return } elem := htmlquery.FindOne(doc, *xpath) if elem == nil { ok = false content = fmt.Sprintf("WARN: failed to find element with `%s`", *xpath) + return } ok = true content = htmlquery.InnerText(elem) From 96ad16195f7df0030dd316dd08d0891d56f91c75 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sat, 30 Nov 2019 12:50:33 +1100 Subject: [PATCH 07/16] bugfix in tasks --- gutils/tasks.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gutils/tasks.go b/gutils/tasks.go index 0bc7674..212fc0c 100644 --- a/gutils/tasks.go +++ b/gutils/tasks.go @@ -19,10 +19,10 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) { // save the entity before returning defer func() { ctx, cancel := context.WithTimeout(context.Background(), time.Duration(CancelWaitTime)) + defer cancel() if err := ent.Save(ctx, EntityType, DsClient, true); err != nil { log.Printf("ERROR: failed to save entity [%s] with %v", ent.Name, err) } - cancel() }() msg := pushover.Message{ @@ -43,7 +43,7 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) { msg.Title = fmt.Sprintf("[%s] Alert: failed to fetch price because`%s`!", ent.Name, content) pushClient.Send(&msg) // do not check again after 30 minutes - ent.NextCheck.Add(time.Minute * 30) + ent.NextCheck = ent.NextCheck.Add(time.Minute * 30) return } if ent.History == nil { @@ -59,7 +59,7 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) { msg.Title = fmt.Sprintf("[%s] Alert: failed to convert price `%s`!", ent.Name, content) pushClient.Send(&msg) // do not check again after 30 minutes - ent.NextCheck.Add(time.Minute * 30) + ent.NextCheck = ent.NextCheck.Add(time.Minute * 30) return } From 5224af12fc013fbb6f5fc9ac2f19571fbc371d0a Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sat, 30 Nov 2019 13:08:19 +1100 Subject: [PATCH 08/16] Tracker returns error instead of ok, tests fixed --- tests/chromedp_test.go | 14 +++++++------- tests/simpletracker_test.go | 6 +++--- trackers/chrome_tracker.go | 14 +++++--------- trackers/simple_tracker.go | 16 +++++----------- trackers/tracker.go | 2 +- 5 files changed, 21 insertions(+), 31 deletions(-) diff --git a/tests/chromedp_test.go b/tests/chromedp_test.go index 6eb2f8e..542df8b 100644 --- a/tests/chromedp_test.go +++ b/tests/chromedp_test.go @@ -10,9 +10,9 @@ import ( func TestColes(t *testing.T) { url := "https://shop.coles.com.au/a/a-nsw-metro-westmead/product/goldn-canola-canola-oil" xpath := `//span/strong[@class="product-price"]` - price, ok := trackers.ChromeTracker(&url, &xpath) - if !ok { - t.Errorf("can't fetch price from %s with %s", url, xpath) + price, err := trackers.ChromeTracker(&url, &xpath) + if err != nil { + t.Errorf("can't fetch price from %s with %s error: %v", url, xpath, err) return } log.Printf("price: %s", price) @@ -20,10 +20,10 @@ func TestColes(t *testing.T) { func TestChemist(t *testing.T) { url := "https://www.chemistwarehouse.com.au/buy/1062/beconase-hayfever-nasal-spray-200-doses" - xpath := `//div[@class="product__price"]` - price, ok := trackers.ChromeTracker(&url, &xpath) - if !ok { - t.Errorf("can't fetch price from %s with %s", url, xpath) + xpath := `//span[@class="product__price"] | //div[@class="product__price"]` + price, err := trackers.ChromeTracker(&url, &xpath) + if err != nil { + t.Errorf("can't fetch price from %s with %s error: %v", url, xpath, err) return } log.Printf("price: %s", price) diff --git a/tests/simpletracker_test.go b/tests/simpletracker_test.go index 5395eb7..fd1f8e8 100644 --- a/tests/simpletracker_test.go +++ b/tests/simpletracker_test.go @@ -10,9 +10,9 @@ import ( func TestChemistSimple(t *testing.T) { url := "https://www.chemistwarehouse.com.au/buy/1062/beconase-hayfever-nasal-spray-200-doses" xpath := `//span[@class="product__price"] | //div[@class="product__price"]` - price, ok := trackers.SimpleTracker(&url, &xpath) - if !ok { - t.Errorf("can't fetch price from %s with %s", url, xpath) + price, err := trackers.SimpleTracker(&url, &xpath) + if err != nil { + t.Errorf("can't fetch price from %s with %s error: %v", url, xpath, err) return } log.Printf("price: %s", price) diff --git a/trackers/chrome_tracker.go b/trackers/chrome_tracker.go index 7909347..e3a5b02 100644 --- a/trackers/chrome_tracker.go +++ b/trackers/chrome_tracker.go @@ -68,8 +68,8 @@ func init() { } // ChromeTracker uses headless chrome to fetch content from given url and xpath -// and returns content/error message, ok -func ChromeTracker(url, xpath *string) (string, bool) { +// and returns content, error +func ChromeTracker(url, xpath *string) (res string, err error) { ctx, cancel := context.WithTimeout(context.Background(), chromeTimeout) defer cancel() ctx, cancel = chromedp.NewExecAllocator(ctx, chromeOpts...) @@ -78,17 +78,13 @@ func ChromeTracker(url, xpath *string) (string, bool) { defer cancel() log.Printf("INFO: loading %s", *url) - var res string - err := chromedp.Run(ctx, + err = chromedp.Run(ctx, hide, chromedp.Navigate(*url), chromedp.Text(*xpath, &res, chromedp.NodeVisible, chromedp.BySearch), ) + res = strings.TrimSpace(res) - if err != nil { - log.Printf("WARN: failed to fetch with chromedp with %v", err) - } - - return strings.TrimSpace(res), true + return } diff --git a/trackers/simple_tracker.go b/trackers/simple_tracker.go index 86dbb6d..e5cdd69 100644 --- a/trackers/simple_tracker.go +++ b/trackers/simple_tracker.go @@ -8,30 +8,24 @@ import ( ) // SimpleTracker accepts url and xpath to extract content -// and returns content/error message, ok -func SimpleTracker(url, xpath *string) (content string, ok bool) { +// and returns content, error message +func SimpleTracker(url, xpath *string) (content string, err error) { defer func() { - if !ok { - log.Println(content) - return + if err == nil { + log.Printf("INFO: Found innerText=%s", content) } - log.Printf("INFO: Found innerText=%s", content) }() log.Printf("INFO: loading %s", *url) doc, err := htmlquery.LoadURL(*url) if err != nil { - ok = false - content = fmt.Sprintf("WARN: failed to load html with error %v", err) return } elem := htmlquery.FindOne(doc, *xpath) if elem == nil { - ok = false - content = fmt.Sprintf("WARN: failed to find element with `%s`", *xpath) + err = fmt.Errorf("WARN: failed to find element with `%s`", *xpath) return } - ok = true content = htmlquery.InnerText(elem) return diff --git a/trackers/tracker.go b/trackers/tracker.go index df55615..faa7da3 100644 --- a/trackers/tracker.go +++ b/trackers/tracker.go @@ -1,4 +1,4 @@ package trackers // Tracker is the type future implementation should follow -type Tracker func(url, xpath *string) (string, bool) +type Tracker func(url, xpath *string) (string, error) From deefd0e2c373f75bf3fc3d74fc9fe7efbb5ff92a Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sat, 30 Nov 2019 13:23:07 +1100 Subject: [PATCH 09/16] fixed tasks --- gutils/tasks.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gutils/tasks.go b/gutils/tasks.go index 212fc0c..d67f158 100644 --- a/gutils/tasks.go +++ b/gutils/tasks.go @@ -15,9 +15,16 @@ import ( var priceRegex, _ = regexp.Compile("\\d+\\.?\\d{0,}") -func processEntity(ent *models.Entity, pushClient *pushover.Client) { +func processEntity(ent *models.Entity, pushClient *pushover.Client) (err error) { // save the entity before returning defer func() { + if err != nil { + log.Printf("ERROR: %v", err) + key, _ := ent.K.MarshalJSON() + log.Printf("INFO: URL: %s\tXPATH: %s\tKey: %s", ent.URL, ent.XPATH, key) + // do not check again after 30 minutes + ent.NextCheck = ent.NextCheck.Add(time.Minute * 30) + } ctx, cancel := context.WithTimeout(context.Background(), time.Duration(CancelWaitTime)) defer cancel() if err := ent.Save(ctx, EntityType, DsClient, true); err != nil { @@ -35,19 +42,14 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) { tracker = trackers.ChromeTracker } - content, ok := tracker(&ent.URL, &ent.XPATH) - if !ok { - log.Println("ERROR: failed to fetch price.", content) - key, _ := ent.K.MarshalJSON() - log.Printf("URL: %s\nXPATH: %s\nKey: %s", ent.URL, ent.XPATH, key) - msg.Title = fmt.Sprintf("[%s] Alert: failed to fetch price because`%s`!", ent.Name, content) + content, err := tracker(&ent.URL, &ent.XPATH) + if err != nil { + msg.Title = fmt.Sprintf("[%s] Alert: failed to fetch price `%v`!", ent.Name, err) pushClient.Send(&msg) - // do not check again after 30 minutes - ent.NextCheck = ent.NextCheck.Add(time.Minute * 30) return } if ent.History == nil { - log.Println("WARN: zero price history.", ent) + log.Println("WARN: zero price history") ent.History = []models.DataPoint{{Price: content, Timestamp: time.Now()}} return } @@ -55,11 +57,8 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) { last := ent.History[len(ent.History)-1] thisP, err := strconv.ParseFloat(priceRegex.FindString(content), 32) if err != nil { - log.Println("ERROR: failed to convert price", err, "this price:", content) msg.Title = fmt.Sprintf("[%s] Alert: failed to convert price `%s`!", ent.Name, content) pushClient.Send(&msg) - // do not check again after 30 minutes - ent.NextCheck = ent.NextCheck.Add(time.Minute * 30) return } @@ -79,6 +78,7 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) { msg.Title = fmt.Sprintf("[%s] Alert: price drops to %s!", ent.Name, content) pushClient.Send(&msg) } + return } // Refresh refreshes prices from datastore From 65efec6a3172ec1c683ecac1bfab6c9a18dd6876 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sat, 30 Nov 2019 15:17:42 +1100 Subject: [PATCH 10/16] fixed create handler --- handlers/create.go | 26 +++++++++++++------------- models/requests.go | 14 ++++++++------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/handlers/create.go b/handlers/create.go index 87c8774..c09ea22 100644 --- a/handlers/create.go +++ b/handlers/create.go @@ -16,31 +16,31 @@ import ( // MakeCreate creates create handler request func MakeCreate(client *pushover.Client) echo.HandlerFunc { return func(c echo.Context) error { + var ( + content string + err error + useChrome = true + ) + req := &models.CreateRequest{} - if err := c.Bind(req); err != nil { + if err = c.Bind(req); err != nil { return c.String(http.StatusBadRequest, err.Error()) } - if msg, ok := req.Validate(); !ok { - return c.String(http.StatusBadRequest, msg) + if err = req.Validate(); err != nil { + return c.String(http.StatusBadRequest, err.Error()) } - var ( - content string - ok bool - useChrome = true - ) - if req.Options.UseChrome == nil || !*req.Options.UseChrome { - content, ok = trackers.SimpleTracker(&req.URL, &req.XPATH) + content, err = trackers.SimpleTracker(&req.URL, &req.XPATH) } - if !ok { + if err != nil { req.Options.UseChrome = &useChrome log.Println("INFO: Resorting to Chrome") } if req.Options.UseChrome != nil && *req.Options.UseChrome { - if content, ok = trackers.ChromeTracker(&req.URL, &req.XPATH); !ok { - return c.String(http.StatusBadRequest, content) + if content, err = trackers.ChromeTracker(&req.URL, &req.XPATH); err != nil { + return c.String(http.StatusBadRequest, err.Error()) } } diff --git a/models/requests.go b/models/requests.go index 178ed33..f115f83 100644 --- a/models/requests.go +++ b/models/requests.go @@ -2,6 +2,8 @@ package models import "cloud.google.com/go/datastore" +import "fmt" + type ( // Options is the options for an entry Options struct { @@ -38,21 +40,21 @@ type ( ) // Validate validates -func (r *CreateRequest) Validate() (string, bool) { +func (r *CreateRequest) Validate() error { if r.URL == "" { - return "url is not set", false + return fmt.Errorf("url is not set") } if r.XPATH == "" { - return "xpath is not set", false + return fmt.Errorf("xpath is not set") } if r.Name == "" { - return "name is not set", false + return fmt.Errorf("name is not set") } if r.ExpectedPrice == "" { - return "expectedPrice is not set", false + return fmt.Errorf("ExpectedPrice is not set") } r.Options.setDefault() - return "", true + return nil } func (o *Options) setDefault() { From f3243104fe13bf5f038bf9ae77662abb780d0d4b Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sat, 30 Nov 2019 15:20:34 +1100 Subject: [PATCH 11/16] Validate now returns error --- models/requests.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/models/requests.go b/models/requests.go index f115f83..7770362 100644 --- a/models/requests.go +++ b/models/requests.go @@ -73,20 +73,20 @@ func (o *Options) setDefault() { } // Validate validates -func (r *ReadOrDelRequest) Validate() (string, bool) { +func (r *ReadOrDelRequest) Validate() error { if r.Key == nil { - return "key is not given", false + return fmt.Errorf("key is not given") } - return "", true + return nil } // Validate validates -func (r *UpdateRequest) Validate() (string, bool) { +func (r *UpdateRequest) Validate() error { if r.Key == nil { - return "key is not given", false + return fmt.Errorf("key is not given") } if r.Options == nil { - return "options is not given", false + return fmt.Errorf("options is not given") } - return "", true + return nil } From a893b8db159d4b75d26b7a5fb945bce1c5296bb5 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sun, 1 Dec 2019 09:38:45 +1100 Subject: [PATCH 12/16] update msg before send out --- gutils/tasks.go | 1 + 1 file changed, 1 insertion(+) diff --git a/gutils/tasks.go b/gutils/tasks.go index d67f158..c7ba489 100644 --- a/gutils/tasks.go +++ b/gutils/tasks.go @@ -69,6 +69,7 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) (err error) if deltaRecordCnt > 0 { ent.History = ent.History[deltaRecordCnt:] } + msg.Msg = ent.String() // update message // send alert if ent.Options.AlertType == "onChange" && content != last.Price { msg.Title = fmt.Sprintf("[%s] Alert: price changes to %s!", ent.Name, content) From e309bf3cb5ef259036cf4d7f9bdddb5940112e90 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Sun, 1 Dec 2019 15:10:50 +1100 Subject: [PATCH 13/16] useChrome is a bool instd of ptr, xcpt for update --- gutils/tasks.go | 2 +- handlers/create.go | 11 ++++------- handlers/update.go | 4 ++-- models/requests.go | 13 +++++++------ 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/gutils/tasks.go b/gutils/tasks.go index c7ba489..f488115 100644 --- a/gutils/tasks.go +++ b/gutils/tasks.go @@ -38,7 +38,7 @@ func processEntity(ent *models.Entity, pushClient *pushover.Client) (err error) } var tracker trackers.Tracker = trackers.SimpleTracker - if ent.Options.UseChrome != nil && *ent.Options.UseChrome { + if ent.Options.UseChrome { tracker = trackers.ChromeTracker } diff --git a/handlers/create.go b/handlers/create.go index c09ea22..172e7d2 100644 --- a/handlers/create.go +++ b/handlers/create.go @@ -17,9 +17,8 @@ import ( func MakeCreate(client *pushover.Client) echo.HandlerFunc { return func(c echo.Context) error { var ( - content string - err error - useChrome = true + content string + err error ) req := &models.CreateRequest{} @@ -31,14 +30,12 @@ func MakeCreate(client *pushover.Client) echo.HandlerFunc { return c.String(http.StatusBadRequest, err.Error()) } - if req.Options.UseChrome == nil || !*req.Options.UseChrome { + if !req.Options.UseChrome { content, err = trackers.SimpleTracker(&req.URL, &req.XPATH) } if err != nil { - req.Options.UseChrome = &useChrome + req.Options.UseChrome = true log.Println("INFO: Resorting to Chrome") - } - if req.Options.UseChrome != nil && *req.Options.UseChrome { if content, err = trackers.ChromeTracker(&req.URL, &req.XPATH); err != nil { return c.String(http.StatusBadRequest, err.Error()) } diff --git a/handlers/update.go b/handlers/update.go index d98a3e4..8d88088 100644 --- a/handlers/update.go +++ b/handlers/update.go @@ -39,8 +39,8 @@ func MakeUpdate(client *pushover.Client) echo.HandlerFunc { if req.Options.Threshold != 0 { entity.Options.Threshold = req.Options.Threshold } - if req.Options.UseChrome != nil { - entity.Options.UseChrome = req.Options.UseChrome + if req.UseChrome != nil { + entity.Options.UseChrome = *req.UseChrome } if req.Name != "" { diff --git a/models/requests.go b/models/requests.go index 7770362..c897037 100644 --- a/models/requests.go +++ b/models/requests.go @@ -12,7 +12,7 @@ type ( AlertType string `json:"alertType"` Threshold float32 `json:"threshold"` MaxRecords int16 `json:"maxRecords"` - UseChrome *bool `json:"useChrome"` + UseChrome bool `json:"useChrome"` } // CreateRequest defines the contract to add an entry @@ -26,11 +26,12 @@ type ( // UpdateRequest defines the contract to update an entry UpdateRequest struct { - URL string `json:"url"` - XPATH string `json:"xpath"` - Name string `json:"name"` - Key *datastore.Key `json:"key"` - Options *Options `json:"options,omitempty"` + URL string `json:"url"` + XPATH string `json:"xpath"` + Name string `json:"name"` + Key *datastore.Key `json:"key"` + UseChrome *bool `json:useChrome` + Options *Options `json:"options,omitempty"` } // ReadOrDelRequest defines the contract to read/delete an entry From cca1f286e0fad054c34448a6a7707b193c7c3046 Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Mon, 2 Dec 2019 11:34:05 +1100 Subject: [PATCH 14/16] fixed useChrome json opt --- models/requests.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/requests.go b/models/requests.go index c897037..b9231e9 100644 --- a/models/requests.go +++ b/models/requests.go @@ -30,7 +30,7 @@ type ( XPATH string `json:"xpath"` Name string `json:"name"` Key *datastore.Key `json:"key"` - UseChrome *bool `json:useChrome` + UseChrome *bool `json:"useChrome,omitempty"` Options *Options `json:"options,omitempty"` } From 7dc2718d75b5bdcaa6c12454916fa5c96cbc4e3e Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Tue, 3 Dec 2019 16:33:56 +1100 Subject: [PATCH 15/16] rm newline in price --- trackers/chrome_tracker.go | 1 + trackers/simple_tracker.go | 3 +++ 2 files changed, 4 insertions(+) diff --git a/trackers/chrome_tracker.go b/trackers/chrome_tracker.go index e3a5b02..d6766ed 100644 --- a/trackers/chrome_tracker.go +++ b/trackers/chrome_tracker.go @@ -85,6 +85,7 @@ func ChromeTracker(url, xpath *string) (res string, err error) { chromedp.Text(*xpath, &res, chromedp.NodeVisible, chromedp.BySearch), ) res = strings.TrimSpace(res) + res = strings.Replace(res, "\n", " ", -1) return } diff --git a/trackers/simple_tracker.go b/trackers/simple_tracker.go index e5cdd69..9791608 100644 --- a/trackers/simple_tracker.go +++ b/trackers/simple_tracker.go @@ -3,6 +3,7 @@ package trackers import ( "fmt" "log" + "strings" "github.com/antchfx/htmlquery" ) @@ -27,6 +28,8 @@ func SimpleTracker(url, xpath *string) (content string, err error) { return } content = htmlquery.InnerText(elem) + content = strings.TrimSpace(content) + content = strings.Replace(content, "\n", " ", -1) return } From 04881ac4cf2d7068ffcdef7eb5e27219b74ca21b Mon Sep 17 00:00:00 2001 From: Hongze Xia Date: Tue, 17 Dec 2019 10:04:00 +1100 Subject: [PATCH 16/16] bugfix in create --- handlers/create.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handlers/create.go b/handlers/create.go index 172e7d2..11904d4 100644 --- a/handlers/create.go +++ b/handlers/create.go @@ -33,7 +33,7 @@ func MakeCreate(client *pushover.Client) echo.HandlerFunc { if !req.Options.UseChrome { content, err = trackers.SimpleTracker(&req.URL, &req.XPATH) } - if err != nil { + if err != nil || req.Options.UseChrome { req.Options.UseChrome = true log.Println("INFO: Resorting to Chrome") if content, err = trackers.ChromeTracker(&req.URL, &req.XPATH); err != nil {