package controller import ( "crawler/config" "crawler/model" "crawler/service" "crawler/utility" "fmt" "github.com/gin-gonic/gin" "github.com/gocolly/colly" "log" "net" "net/http" "os" "strconv" "time" ) type CronController interface { List(c *gin.Context) Info(c *gin.Context) Detail(c *gin.Context) GetKey(c *gin.Context) string SetLastPage(page int) GetLastPage() int } type Cron struct { MovieListModel model.MovieListModel MovieInfoModel model.MovieInfoModel MovieDetailModel model.MovieDetailModel MovieStatsModel model.MovieStatsModel Kobis model.Kobis Rest service.Rest } /** * 영화진흥위원회 영화 목록 */ func (this *Cron) List(c *gin.Context) { var ( start = time.Now() page = GetLastPage() perPage = 100 total, errors, insertRows, updateRows = 0, 0, 0, 0 output = func(n ...int) string { s := fmt.Sprintf("Total : %d\n", n[0]) s += fmt.Sprintf("Error: %d\n", n[1]) s += fmt.Sprintf("InsertRows: %d\n", n[2]) s += fmt.Sprintf("UpdateRows: %d\n", n[3]) s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds()) return s } key = GetKey(c) ) //defer func() { // if r := recover(); r != nil { // msg := fmt.Sprintf("[영화 목록 수집 오류 발생]\n") // msg += output(total, errors, insertRows, updateRows) // utility.SendMessage(msg) // } //}() for { var ( req = this.MovieListModel.SearchMovieListParams insertData, updateData []model.MovieListInfo ) req.Key = key req.CurPage = page req.ItemPerPage = perPage data, err := this.Kobis.MovieListAPI(req) if err != nil { c.JSON(http.StatusBadRequest, err.Error()) break } // 더 이상 값이 없다면 중지 if data.MovieListResult.TotCnt <= 0 { errors++ break } // 입력할 값과 수정할 값 구분 for _, row := range data.MovieListResult.MovieList { if this.MovieListModel.IsExists(row.MovieCd) == true { updateData = append(updateData, row) updateRows++ } else { insertData = append(insertData, row) insertRows++ } total++ } if insertRows > 0 { if err = this.MovieListModel.Insert(insertData); err != nil { errors++ } } if updateRows > 0 { if err = this.MovieListModel.Update(updateData); err != nil { errors++ } } fmt.Println(output(total, errors, insertRows, updateRows)) SetLastPage(page) page++ } //msg := "[영화 목록 수집 종료]\n" //msg += output(total, errors, insertRows, updateRows) //utility.SendMessage(msg) c.JSON(http.StatusOK, gin.H{ "total": total, "errors": errors, "page": page, "perPage": perPage, "insertRows": insertRows, "updateRows": updateRows, }) } /** * 영화진흥위원회 영화 기본 정보 */ func (this *Cron) Info(c *gin.Context) { var ( start = time.Now() codes = this.MovieListModel.MovieInfoExcludeCodes() total, errors, insertRows, updateRows = 0, 0, 0, 0 output = func(n ...int) string { s := fmt.Sprintf("Total : %d\n", n[0]) s += fmt.Sprintf("Error: %d\n", n[1]) s += fmt.Sprintf("InsertRows: %d\n", n[2]) s += fmt.Sprintf("UpdateRows: %d\n", n[3]) s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds()) return s } key = GetKey(c) ) //defer func() { // if r := recover(); r != nil { // msg := fmt.Sprintf("[영화 기본 정보 수집 오류 발생]\n") // msg += output(total, errors, insertRows, updateRows) // utility.SendMessage(msg) // } //}() for _, movieCd := range codes { var ( req = this.MovieInfoModel.SearchMovieInfoParams ) req.Key = key req.MovieCd = movieCd data, err := this.Kobis.MovieInfoAPI(req) if err != nil { c.JSON(http.StatusBadRequest, err.Error()) break } row := data.MovieInfoResult.MovieInfo if row.MovieCd == "" { errors++ break } if this.MovieInfoModel.IsExists(row.MovieCd) == true { if err = this.MovieInfoModel.Update(row); err == nil { updateRows++ } else { errors++ } } else { if err = this.MovieInfoModel.Insert(row); err == nil { insertRows++ } else { errors++ } } fmt.Println(output(total, errors, insertRows, updateRows)) total++ } //msg := "[영화 기본 정보 수집 종료]\n" //msg += output(total, errors, insertRows, updateRows) //utility.SendMessage(msg) c.JSON(http.StatusOK, gin.H{ "total": total, "error": errors, "insertRows": insertRows, "updateRows": updateRows, }) } /* * 호출 순서 * OnRequest -> OnError -> OnResponseHeaders -> OnResponse -> OnHTML -> OnXML -> OnScraped */ func (this *Cron) Detail(c *gin.Context) { var ( start = time.Now() codes = this.MovieListModel.MovieDetailExcludeCodes() total, scraped, errors, insertRows, updateRows, target = 0, 0, 0, 0, 0, len(codes) output = func(n ...int) string { s := fmt.Sprintf("Total : %d\n", n[0]) s += fmt.Sprintf("Scraped: %d\n", n[1]) s += fmt.Sprintf("Error: %d\n", n[2]) s += fmt.Sprintf("InsertRows: %d\n", n[3]) s += fmt.Sprintf("UpdateRows: %d\n", n[4]) s += fmt.Sprintf("Target : %d\n", n[5]) s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds()) return s } c1 = colly.NewCollector( colly.AllowedDomains(config.KOBIS_DOMAIN), colly.IgnoreRobotsTxt(), colly.Async(false), ) ) c1.WithTransport(&http.Transport{ DialContext: (&net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, }).DialContext, MaxIdleConns: 0, MaxIdleConnsPerHost: 100, IdleConnTimeout: 30 * time.Second, TLSHandshakeTimeout: 30 * time.Second, ExpectContinueTimeout: 30 * time.Second, DisableCompression: false, }) //var c2 = c1.Clone() c1.OnRequest(func(r *colly.Request) { r.Headers.Set("User-Agent", utility.RandomString()) r.Headers.Set("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") }) c1.OnError(func(_ *colly.Response, err error) { log.Printf("Error(c1) : %s\n", err.Error()) errors++ }) c1.OnScraped(func(r *colly.Response) { scraped++ }) /* 관객 수, 누적 매출액 조회 */ //c2.OnRequest(func(r *colly.Request) { // r.Headers.Set("User-Agent", utility.RandomString()) // r.Headers.Set("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") //}) // //c2.OnError(func(_ *colly.Response, err error) { // log.Printf("Error(c2) : %s\n", err.Error()) // errors++ //}) // //c2.OnScraped(func(r *colly.Response) { // scraped++ //}) // 복구처리 defer func() { if e := recover(); e != nil { //msg := "[영화 상세 정보 수집 오류 발생]\n" //msg += output(total, scraped, errors, insertRows, updateRows, len(codes)) //utility.SendMessage(msg) } }() for i, movieCd := range codes { if movieCd == "" { continue } movieDetail := this.MovieDetailModel.MovieDetail movieDetail.MovieCd = movieCd c1.OnHTML(".item_tab.basic", func(e *colly.HTMLElement) { var host = config.KOBIS_HOST movieDetail.MainImg = e.ChildAttr("a.fl.thumb", "href") if movieDetail.MainImg != "" && movieDetail.MainImg != "#" { movieDetail.MainImg = host + movieDetail.MainImg } movieDetail.ThumbImg = e.ChildAttr("a.fl.thumb > img", "src") if movieDetail.ThumbImg != "" && movieDetail.ThumbImg != "#" { movieDetail.ThumbImg = host + movieDetail.ThumbImg } movieDetail.Synopsis = e.ChildText("div.info.info2 p.desc_info") e.ForEach("div#post > input", func(_ int, ee *colly.HTMLElement) { movieDetail.Poster = append(movieDetail.Poster, model.Poster{ Thumb: host + ee.Attr("thn_img"), Origin: host + ee.Attr("img"), }) }) e.ForEach("div#stl > input", func(_ int, ee *colly.HTMLElement) { movieDetail.StillCut = append(movieDetail.StillCut, model.StillCut{ Thumb: host + ee.Attr("thn_img"), Origin: host + ee.Attr("img"), }) }) }) //c2.OnHTML("body", func(e *colly.HTMLElement) { // var ( // tr = e.DOM.Find(".info").Eq(0).Find("table tbody tr").Eq(1) // saleAcc = utility.RemoveSpecialChar(strings.Replace(tr.Find("td").Eq(2).Text(), "(100%)", "", 1)) // audiAcc = utility.RemoveSpecialChar(strings.Replace(tr.Find("td").Eq(3).Text(), "(100%)", "", 1)) // ) // SaleAcc, _ := strconv.Atoi(saleAcc) // AudiAcc, _ := strconv.Atoi(audiAcc) // // movieDetail.SaleAcc = SaleAcc // movieDetail.AudiAcc = AudiAcc //}) err := c1.Post(config.MOVIE_DETAIL, map[string]string{ "code": movieCd, "sType": "", "titleYN": "Y", "etcParam": "", "isOuterReq": "false", }) if err != nil { errors++ continue } //if this.Rest.Check(err) { // errors++ // continue //} //err = c2.Post(config.MOVIE_DETAIL, map[string]string{ // "code": movieCd, // "sType": "stat", //}) //if err != nil { // errors++ // continue //} // //if this.Rest.Check(err) { // errors++ // continue //} if this.MovieDetailModel.IsExists(movieCd) == true { if err = this.MovieDetailModel.Update(movieDetail); err == nil { updateRows++ } else { errors++ } } else { if err = this.MovieDetailModel.Insert(movieDetail); err == nil { insertRows++ } else { errors++ } } fmt.Println(output(total, scraped, errors, insertRows, updateRows, target)) codes[i] = "" target-- total++ } //msg := "[영화 상세 정보 수집 종료]\n" //msg += output(total, scraped, insertRows, updateRows, errors) //utility.SendMessage(msg) c.JSON(http.StatusOK, gin.H{ "total": total, "insertRows": insertRows, "updateRows": updateRows, }) } /** * 영화진흥위원회 박스오피스 (통계 조회) */ /* func (this *Cron) Stats(c *gin.Context) { var ( start = time.Now() total, errors, insertRows, updateRows, page = 0, 0, 0, 0, 1 output = func(n ...int) string { s := fmt.Sprintf("Total : %d\n", n[0]) s += fmt.Sprintf("Error: %d\n", n[1]) s += fmt.Sprintf("InsertRows: %d\n", n[2]) s += fmt.Sprintf("UpdateRows: %d\n", n[3]) s += fmt.Sprintf("Page: %d\n", n[4]) s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds()) return s } ) req := this.MovieStatsModel.SearchBoxOfficeParams req.ServiceKey = config.Env.Movie.Kcisa.BoxOfficeKey req.NumOfRows = 2000 req.PageNo = 1 for { var ( insertData = make([]model.BoxOfficeInfo, 0) updateData = make([]model.BoxOfficeInfo, 0) ) req.PageNo = page data, err := this.Kobis.MovieBoxOfficeAPI(req) if err != nil { c.JSON(http.StatusBadRequest, err.Error()) break } list := data.Response.Body.Items.Item if len(list) <= 0 { break } for _, row := range list { query, err := url.ParseQuery(row.Url) if err != nil { errors++ continue } movieCd := query.Get("dtCd") if this.MovieStatsModel.IsExists(movieCd) == true { updateData = append(updateData, row) updateRows++ } else { insertData = append(insertData, row) insertRows++ } } if insertRows > 0 { if err = this.MovieStatsModel.Insert(insertData); err != nil { errors++ } } if updateRows > 0 { if err = this.MovieStatsModel.Update(updateData); err != nil { errors++ } } fmt.Println(output(total, errors, insertRows, updateRows, page)) page++ total++ } msg := "[영화 통계 정보 수집 종료]\n" msg += output(total, errors, insertRows, updateRows, page) utility.SendMessage(msg) c.JSON(http.StatusOK, gin.H{ "total": total, "error": errors, "insertRows": insertRows, "updateRows": updateRows, "page": page, }) } */ func GetKey(c *gin.Context) string { if c.Query("key") == "1" { return config.Movie.Kobis.ApiKey_1 } else if c.Query("key") == "2" { return config.Movie.Kobis.ApiKey_2 } else { return "f5eef3421c602c6cb7ea224104795888" } } // 마지막 호출 Page 저장 func SetLastPage(page int) { data, err := os.Create(config.LAST_PAGE_PATH_KOBIS) if err != nil { fmt.Println(err) } defer func() { if data.Close() != nil { fmt.Println(err) } }() _, _ = data.WriteString(strconv.FormatInt(int64(page), 10)) fmt.Printf("Set last page: %d\n", page) } // 마지막 호출 Page 조회 func GetLastPage() int { byte, err := os.ReadFile(config.LAST_PAGE_PATH_KOBIS) if err != nil { fmt.Println(err) } page, _ := strconv.Atoi(string(byte)) if page == 0 { page = 1 } return page }