diff --git a/server/database.go b/server/database.go index 653103d..d58dbe8 100644 --- a/server/database.go +++ b/server/database.go @@ -45,9 +45,6 @@ func unmarshalActivity(key, value []byte) (Activity, error) { if err != nil { return nil, err } - if post.Host == "" { - post.Host = "www.pathofexile.com" - } if post.Id != 0 { return post, nil } diff --git a/server/database_test.go b/server/database_test.go index d816b79..6654c54 100644 --- a/server/database_test.go +++ b/server/database_test.go @@ -21,14 +21,12 @@ func testDatabase_ForumPosts(t *testing.T, db Database) { Id: 9000, Poster: "Chris", Time: time.Unix(1486332365, 0), - Host: locale.ForumHost(), } post2 := &ForumPost{ Id: 9001, Poster: "Chris", Time: time.Unix(1486332364, 0), - Host: locale.ForumHost(), } db.AddActivity([]Activity{post1, post2}) diff --git a/server/forum_indexer.go b/server/forum_indexer.go index 989a717..aa4969b 100644 --- a/server/forum_indexer.go +++ b/server/forum_indexer.go @@ -9,7 +9,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/PuerkitoBio/goquery" @@ -73,33 +72,37 @@ func (indexer *ForumIndexer) run() { } } - var wg sync.WaitGroup - wg.Add(len(Locales)) - - for _, l := range Locales { - l := l - go func() { - for { - for _, account := range accounts { - select { - case <-indexer.closeSignal: - return - default: - if err := indexer.index(l, account, timezone); err != nil { - log.WithError(err).Error("error indexing forum account: " + account) - } - time.Sleep(time.Second) - } + for { + for _, locale := range Locales { + select { + case <-indexer.closeSignal: + return + default: + logger := log.WithField("host", locale.ForumHost()) + if err := locale.RefreshForumIds(); err != nil { + logger.WithError(err).Error("error refreshing forum ids") + } else { + logger.Info("refreshed forum ids") } + time.Sleep(time.Second) } - }() + } + for _, account := range accounts { + select { + case <-indexer.closeSignal: + return + default: + if err := indexer.index(account, timezone); err != nil { + log.WithError(err).Error("error indexing forum account: " + account) + } + time.Sleep(time.Second) + } + } } - - wg.Wait() } -func (indexer *ForumIndexer) requestDocument(host, resource string) (*goquery.Document, error) { - urlString := fmt.Sprintf("https://%v/%v", host, strings.TrimPrefix(resource, "/")) +func (indexer *ForumIndexer) requestDocument(resource string) (*goquery.Document, error) { + urlString := fmt.Sprintf("https://www.pathofexile.com/%v", strings.TrimPrefix(resource, "/")) jar, _ := cookiejar.New(nil) u, _ := url.Parse(urlString) jar.SetCookies(u, []*http.Cookie{ @@ -127,7 +130,7 @@ var postURLExpression = regexp.MustCompile("^/forum/view-post/([0-9]+)") var threadURLExpression = regexp.MustCompile("^/forum/view-thread/([0-9]+)") var forumURLExpression = regexp.MustCompile("^/forum/view-forum/([0-9]+)") -func ScrapeForumPosts(doc *goquery.Document, locale *Locale, timezone *time.Location) ([]*ForumPost, error) { +func ScrapeForumPosts(doc *goquery.Document, timezone *time.Location) ([]*ForumPost, error) { posts := []*ForumPost(nil) err := error(nil) @@ -145,7 +148,7 @@ func ScrapeForumPosts(doc *goquery.Document, locale *Locale, timezone *time.Loca timeText := sel.Find(".post_date").Text() - if post.Time, err = locale.ParseTime(timeText, timezone); err != nil { + if post.Time, err = time.ParseInLocation("Jan _2, 2006, 3:04:05 PM", timeText, timezone); err != nil { log.WithField("text", timeText).Error("unable to parse time") return false } @@ -177,24 +180,20 @@ func ScrapeForumPosts(doc *goquery.Document, locale *Locale, timezone *time.Loca return posts, nil } -func (indexer *ForumIndexer) forumPosts(locale *Locale, poster string, page int, timezone *time.Location) ([]*ForumPost, error) { - doc, err := indexer.requestDocument(locale.ForumHost(), fmt.Sprintf("/account/view-posts/%v/page/%v", poster, page)) +func (indexer *ForumIndexer) forumPosts(poster string, page int, timezone *time.Location) ([]*ForumPost, error) { + doc, err := indexer.requestDocument(fmt.Sprintf("/account/view-posts/%v/page/%v", poster, page)) if err != nil { return nil, err } - posts, err := ScrapeForumPosts(doc, locale, timezone) + posts, err := ScrapeForumPosts(doc, timezone) if err != nil { return nil, err } - for _, post := range posts { - post.Host = locale.ForumHost() - } return posts, nil } -func (indexer *ForumIndexer) index(locale *Locale, poster string, timezone *time.Location) error { +func (indexer *ForumIndexer) index(poster string, timezone *time.Location) error { logger := log.WithFields(log.Fields{ - "host": locale.ForumHost(), "poster": poster, }) @@ -203,7 +202,7 @@ func (indexer *ForumIndexer) index(locale *Locale, poster string, timezone *time activity := []Activity(nil) for page := 1; ; page++ { - posts, err := indexer.forumPosts(locale, poster, page, timezone) + posts, err := indexer.forumPosts(poster, page, timezone) if err != nil { logger.WithError(err).Error("error requesting forum posts") } @@ -243,7 +242,7 @@ func ScrapeForumTimezone(doc *goquery.Document) (*time.Location, error) { } func (indexer *ForumIndexer) sessionTimezone() (*time.Location, error) { - doc, err := indexer.requestDocument("www.pathofexile.com", "/my-account/preferences") + doc, err := indexer.requestDocument("/my-account/preferences") if err != nil { return nil, err } diff --git a/server/forum_indexer_test.go b/server/forum_indexer_test.go index 442b950..d2b8700 100644 --- a/server/forum_indexer_test.go +++ b/server/forum_indexer_test.go @@ -21,7 +21,7 @@ func TestScrapeForumPosts(t *testing.T) { tz, err := time.LoadLocation("America/Los_Angeles") require.NoError(t, err) - posts, err := ScrapeForumPosts(doc, Locales[0], tz) + posts, err := ScrapeForumPosts(doc, tz) require.NoError(t, err) assert.Equal(t, 10, len(posts)) diff --git a/server/forum_post.go b/server/forum_post.go index 3d4f0e7..1cd6e41 100644 --- a/server/forum_post.go +++ b/server/forum_post.go @@ -1,13 +1,13 @@ package server import ( + "encoding/json" "fmt" "time" ) type ForumPost struct { Id int `json:"id"` - Host string `json:"host"` BodyHTML string `json:"body_html"` Time time.Time `json:"time"` Poster string `json:"poster"` @@ -17,6 +17,32 @@ type ForumPost struct { ForumName string `json:"forum_name"` } +type forumPostWithHost struct { + Id int `json:"id"` + BodyHTML string `json:"body_html"` + Time time.Time `json:"time"` + Poster string `json:"poster"` + ThreadId int `json:"thread_id"` + ThreadTitle string `json:"thread_title"` + ForumId int `json:"forum_id"` + ForumName string `json:"forum_name"` + Host string `json:"host"` +} + +func (p ForumPost) MarshalJSON() ([]byte, error) { + return json.Marshal(&forumPostWithHost{ + Id: p.Id, + BodyHTML: p.BodyHTML, + Time: p.Time, + Poster: p.Poster, + ThreadId: p.ThreadId, + ThreadTitle: p.ThreadTitle, + ForumId: p.ForumId, + ForumName: p.ForumName, + Host: p.Host(), + }) +} + func (p *ForumPost) ActivityTime() time.Time { return p.Time } @@ -25,6 +51,15 @@ func (p *ForumPost) ActivityKey() uint32 { return uint32(p.Id) } +func (p *ForumPost) Host() string { + for _, l := range Locales { + if l.ForumIds()[p.ForumId] { + return l.ForumHost() + } + } + return "www.pathofexile.com" +} + func (p *ForumPost) PostURL() string { - return fmt.Sprintf("https://%v/forum/view-post/%v", p.Host, p.Id) + return fmt.Sprintf("https://%v/forum/view-post/%v", p.Host(), p.Id) } diff --git a/server/localization.go b/server/localization.go index 83fd1f5..a420eaa 100644 --- a/server/localization.go +++ b/server/localization.go @@ -1,9 +1,14 @@ package server import ( + "fmt" "net/http" + "strconv" "strings" + "sync/atomic" "time" + + "github.com/PuerkitoBio/goquery" ) type Locale struct { @@ -12,6 +17,8 @@ type Locale struct { IncludeReddit bool Translations map[string]string ParseTime func(s string, tz *time.Location) (time.Time, error) + + forumIds atomic.Value } func (l *Locale) Translate(s string) string { @@ -24,7 +31,7 @@ func (l *Locale) Translate(s string) string { func (l *Locale) ActivityFilter(a Activity) bool { switch a := a.(type) { case *ForumPost: - return a.Host == l.ForumHost() + return a.Host() == l.ForumHost() case *RedditComment: return l.IncludeReddit case *RedditPost: @@ -40,95 +47,47 @@ func (l *Locale) ForumHost() string { return "www.pathofexile.com" } -var esMonthReplacer = strings.NewReplacer( - "ene", "Jan", - "feb", "Feb", - "mar", "Mar", - "abr", "Apr", - "may", "May", - "jun", "Jun", - "jul", "Jul", - "ago", "Aug", - "sept", "Sep", - "oct", "Oct", - "nov", "Nov", - "dic", "Dec", -) +func (l *Locale) ForumIds() map[int]bool { + ret, _ := l.forumIds.Load().(map[int]bool) + return ret +} -var brMonthReplacer = strings.NewReplacer( - "de jan de", "Jan", - "de fev de", "Feb", - "de mar de", "Mar", - "de abr de", "Apr", - "de mai de", "May", - "de jun de", "Jun", - "de jul de", "Jul", - "de ago de", "Aug", - "de set de", "Sep", - "de out de", "Oct", - "de nov de", "Nov", - "de dez de", "Dec", -) +func (l *Locale) RefreshForumIds() error { + client := http.Client{ + Timeout: time.Second * 10, + } + resp, err := client.Get(fmt.Sprintf("https://%v/forum", l.ForumHost())) + if err != nil { + return err + } + defer resp.Body.Close() -var thMonthReplacer = strings.NewReplacer( - "ม.ค.", "Jan", - "ก.พ.", "Feb", - "มี.ค.", "Mar", - "เม.ย.", "Apr", - "พ.ค.", "May", - "มิ.ย.", "Jun", - "ก.ค.", "Jul", - "ส.ค.", "Aug", - "ก.ย.", "Sep", - "ต.ค.", "Oct", - "พ.ย.", "Nov", - "ธ.ค.", "Dec", -) + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return err + } -var frMonthReplacer = strings.NewReplacer( - "janv.", "Jan", - "févr.", "Feb", - "mars", "Mar", - "avr.", "Apr", - "mai", "May", - "juin", "Jun", - "juil.", "Jul", - "août", "Aug", - "sept.", "Sep", - "oct.", "Oct", - "nov.", "Nov", - "déc.", "Dec", -) + forumIds := map[int]bool{} + doc.Find(".forumTable tbody tr").Each(func(i int, sel *goquery.Selection) { + if idStr := sel.AttrOr("data-id", ""); idStr != "" { + if id, err := strconv.Atoi(idStr); err == nil { + forumIds[id] = true + } + } + }) + l.forumIds.Store(forumIds) -var ruMonthReplacer = strings.NewReplacer( - "янв.", "Jan", - "февр.", "Feb", - "марта", "Mar", - "апр.", "Apr", - "мая", "May", - "июня", "Jun", - "июля", "Jul", - "авг.", "Aug", - "сент.", "Sep", - "окт.", "Oct", - "нояб.", "Nov", - "дек.", "Dec", -) + return nil +} var Locales = []*Locale{ { IncludeReddit: true, Image: "static/images/locales/gb.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("Jan _2, 2006, 3:04:05 PM", s, tz) - }, }, { Subdomain: "br", Image: "static/images/locales/br.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("2 Jan 2006 15:04:05", brMonthReplacer.Replace(s), tz) - }, Translations: map[string]string{ "Activity": "Atividade", "Thread": "Discussão", @@ -140,9 +99,6 @@ var Locales = []*Locale{ { Subdomain: "ru", Image: "static/images/locales/ru.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("2 Jan 2006 г., 15:04:05", ruMonthReplacer.Replace(s), tz) - }, Translations: map[string]string{ "Activity": "Активность", "Thread": "Тема", @@ -154,16 +110,10 @@ var Locales = []*Locale{ { Subdomain: "th", Image: "static/images/locales/th.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("_2 Jan 2006 15:04:05", thMonthReplacer.Replace(s), tz) - }, }, { Subdomain: "de", Image: "static/images/locales/de.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("2.1.2006, 15:04:05", s, tz) - }, Translations: map[string]string{ "Activity": "Aktivität", "Thread": "Beitrag", @@ -175,9 +125,6 @@ var Locales = []*Locale{ { Subdomain: "fr", Image: "static/images/locales/fr.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("_2 Jan 2006 15:04:05", frMonthReplacer.Replace(s), tz) - }, Translations: map[string]string{ "Activity": "Activité", "Thread": "Fil de discussion", @@ -189,9 +136,6 @@ var Locales = []*Locale{ { Subdomain: "es", Image: "static/images/locales/es.png", - ParseTime: func(s string, tz *time.Location) (time.Time, error) { - return time.ParseInLocation("2 Jan. 2006 15:04:05", esMonthReplacer.Replace(s), tz) - }, Translations: map[string]string{ "Activity": "Actividad", "Thread": "Tema", diff --git a/server/localization_test.go b/server/localization_test.go index 12b53e1..a50ff22 100644 --- a/server/localization_test.go +++ b/server/localization_test.go @@ -2,31 +2,15 @@ package server import ( "testing" - "time" "github.com/stretchr/testify/assert" ) -func TestLocale_ParseTime(t *testing.T) { - for subdomain, s := range map[string]string{ - "": "Aug 29, 2018, 5:51:19 PM", - "br": "31 de ago de 2018 00:50:19", - "ru": "1 сент. 2018 г., 2:09:52", - "th": "31 ส.ค. 2018 00:50:25", - "de": "31.08.2018, 00:50:20", - "fr": "31 août 2018 00:50:22", - "es": "31 ago. 2018 0:50:23", - } { - t.Run(subdomain, func(t *testing.T) { - var locale *Locale - for _, l := range Locales { - if l.Subdomain == subdomain { - locale = l - break - } - } - _, err := locale.ParseTime(s, time.FixedZone("UTC-5", -5*60*60)) - assert.NoError(t, err) +func TestLocale_RefreshForumIds(t *testing.T) { + for _, l := range Locales { + t.Run(l.ForumHost(), func(t *testing.T) { + assert.NoError(t, l.RefreshForumIds()) + assert.NotEmpty(t, l.ForumIds()) }) } }