| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 |
- package controller
- import (
- "crawler/config"
- "crawler/model"
- "crawler/service"
- "crawler/utility"
- "fmt"
- "github.com/gin-gonic/gin"
- "github.com/gocolly/colly"
- "log"
- "net"
- "net/http"
- "os"
- "strconv"
- "time"
- )
- type CronController interface {
- List(c *gin.Context)
- Info(c *gin.Context)
- Detail(c *gin.Context)
- GetKey(c *gin.Context) string
- SetLastPage(page int)
- GetLastPage() int
- }
- type Cron struct {
- MovieListModel model.MovieListModel
- MovieInfoModel model.MovieInfoModel
- MovieDetailModel model.MovieDetailModel
- MovieStatsModel model.MovieStatsModel
- Kobis model.Kobis
- Rest service.Rest
- }
- /**
- * 영화진흥위원회 영화 목록
- */
- func (this *Cron) List(c *gin.Context) {
- var (
- start = time.Now()
- page = GetLastPage()
- perPage = 100
- total, errors, insertRows, updateRows = 0, 0, 0, 0
- output = func(n ...int) string {
- s := fmt.Sprintf("Total : %d\n", n[0])
- s += fmt.Sprintf("Error: %d\n", n[1])
- s += fmt.Sprintf("InsertRows: %d\n", n[2])
- s += fmt.Sprintf("UpdateRows: %d\n", n[3])
- s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds())
- return s
- }
- key = GetKey(c)
- )
- //defer func() {
- // if r := recover(); r != nil {
- // msg := fmt.Sprintf("[영화 목록 수집 오류 발생]\n")
- // msg += output(total, errors, insertRows, updateRows)
- // utility.SendMessage(msg)
- // }
- //}()
- for {
- var (
- req = this.MovieListModel.SearchMovieListParams
- insertData, updateData []model.MovieListInfo
- )
- req.Key = key
- req.CurPage = page
- req.ItemPerPage = perPage
- data, err := this.Kobis.MovieListAPI(req)
- if err != nil {
- c.JSON(http.StatusBadRequest, err.Error())
- break
- }
- // 더 이상 값이 없다면 중지
- if data.MovieListResult.TotCnt <= 0 {
- errors++
- break
- }
- // 입력할 값과 수정할 값 구분
- for _, row := range data.MovieListResult.MovieList {
- if this.MovieListModel.IsExists(row.MovieCd) == true {
- updateData = append(updateData, row)
- updateRows++
- } else {
- insertData = append(insertData, row)
- insertRows++
- }
- total++
- }
- if insertRows > 0 {
- if err = this.MovieListModel.Insert(insertData); err != nil {
- errors++
- }
- }
- if updateRows > 0 {
- if err = this.MovieListModel.Update(updateData); err != nil {
- errors++
- }
- }
- fmt.Println(output(total, errors, insertRows, updateRows))
- SetLastPage(page)
- page++
- }
- //msg := "[영화 목록 수집 종료]\n"
- //msg += output(total, errors, insertRows, updateRows)
- //utility.SendMessage(msg)
- c.JSON(http.StatusOK, gin.H{
- "total": total,
- "errors": errors,
- "page": page,
- "perPage": perPage,
- "insertRows": insertRows,
- "updateRows": updateRows,
- })
- }
- /**
- * 영화진흥위원회 영화 기본 정보
- */
- func (this *Cron) Info(c *gin.Context) {
- var (
- start = time.Now()
- codes = this.MovieListModel.MovieInfoExcludeCodes()
- total, errors, insertRows, updateRows = 0, 0, 0, 0
- output = func(n ...int) string {
- s := fmt.Sprintf("Total : %d\n", n[0])
- s += fmt.Sprintf("Error: %d\n", n[1])
- s += fmt.Sprintf("InsertRows: %d\n", n[2])
- s += fmt.Sprintf("UpdateRows: %d\n", n[3])
- s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds())
- return s
- }
- key = GetKey(c)
- )
- //defer func() {
- // if r := recover(); r != nil {
- // msg := fmt.Sprintf("[영화 기본 정보 수집 오류 발생]\n")
- // msg += output(total, errors, insertRows, updateRows)
- // utility.SendMessage(msg)
- // }
- //}()
- for _, movieCd := range codes {
- var (
- req = this.MovieInfoModel.SearchMovieInfoParams
- )
- req.Key = key
- req.MovieCd = movieCd
- data, err := this.Kobis.MovieInfoAPI(req)
- if err != nil {
- c.JSON(http.StatusBadRequest, err.Error())
- break
- }
- row := data.MovieInfoResult.MovieInfo
- if row.MovieCd == "" {
- errors++
- break
- }
- if this.MovieInfoModel.IsExists(row.MovieCd) == true {
- if err = this.MovieInfoModel.Update(row); err == nil {
- updateRows++
- } else {
- errors++
- }
- } else {
- if err = this.MovieInfoModel.Insert(row); err == nil {
- insertRows++
- } else {
- errors++
- }
- }
- fmt.Println(output(total, errors, insertRows, updateRows))
- total++
- }
- //msg := "[영화 기본 정보 수집 종료]\n"
- //msg += output(total, errors, insertRows, updateRows)
- //utility.SendMessage(msg)
- c.JSON(http.StatusOK, gin.H{
- "total": total,
- "error": errors,
- "insertRows": insertRows,
- "updateRows": updateRows,
- })
- }
- /*
- * 호출 순서
- * OnRequest -> OnError -> OnResponseHeaders -> OnResponse -> OnHTML -> OnXML -> OnScraped
- */
- func (this *Cron) Detail(c *gin.Context) {
- var (
- start = time.Now()
- codes = this.MovieListModel.MovieDetailExcludeCodes()
- total, scraped, errors, insertRows, updateRows, target = 0, 0, 0, 0, 0, len(codes)
- output = func(n ...int) string {
- s := fmt.Sprintf("Total : %d\n", n[0])
- s += fmt.Sprintf("Scraped: %d\n", n[1])
- s += fmt.Sprintf("Error: %d\n", n[2])
- s += fmt.Sprintf("InsertRows: %d\n", n[3])
- s += fmt.Sprintf("UpdateRows: %d\n", n[4])
- s += fmt.Sprintf("Target : %d\n", n[5])
- s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds())
- return s
- }
- c1 = colly.NewCollector(
- colly.AllowedDomains(config.KOBIS_DOMAIN),
- colly.IgnoreRobotsTxt(),
- colly.Async(false),
- )
- )
- c1.WithTransport(&http.Transport{
- DialContext: (&net.Dialer{
- Timeout: 30 * time.Second,
- KeepAlive: 30 * time.Second,
- }).DialContext,
- MaxIdleConns: 0,
- MaxIdleConnsPerHost: 100,
- IdleConnTimeout: 30 * time.Second,
- TLSHandshakeTimeout: 30 * time.Second,
- ExpectContinueTimeout: 30 * time.Second,
- DisableCompression: false,
- })
- //var c2 = c1.Clone()
- c1.OnRequest(func(r *colly.Request) {
- r.Headers.Set("User-Agent", utility.RandomString())
- r.Headers.Set("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8")
- })
- c1.OnError(func(_ *colly.Response, err error) {
- log.Printf("Error(c1) : %s\n", err.Error())
- errors++
- })
- c1.OnScraped(func(r *colly.Response) {
- scraped++
- })
- /*
- 관객 수, 누적 매출액 조회
- */
- //c2.OnRequest(func(r *colly.Request) {
- // r.Headers.Set("User-Agent", utility.RandomString())
- // r.Headers.Set("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8")
- //})
- //
- //c2.OnError(func(_ *colly.Response, err error) {
- // log.Printf("Error(c2) : %s\n", err.Error())
- // errors++
- //})
- //
- //c2.OnScraped(func(r *colly.Response) {
- // scraped++
- //})
- // 복구처리
- defer func() {
- if e := recover(); e != nil {
- //msg := "[영화 상세 정보 수집 오류 발생]\n"
- //msg += output(total, scraped, errors, insertRows, updateRows, len(codes))
- //utility.SendMessage(msg)
- }
- }()
- for i, movieCd := range codes {
- if movieCd == "" {
- continue
- }
- movieDetail := this.MovieDetailModel.MovieDetail
- movieDetail.MovieCd = movieCd
- c1.OnHTML(".item_tab.basic", func(e *colly.HTMLElement) {
- var host = config.KOBIS_HOST
- movieDetail.MainImg = e.ChildAttr("a.fl.thumb", "href")
- if movieDetail.MainImg != "" && movieDetail.MainImg != "#" {
- movieDetail.MainImg = host + movieDetail.MainImg
- }
- movieDetail.ThumbImg = e.ChildAttr("a.fl.thumb > img", "src")
- if movieDetail.ThumbImg != "" && movieDetail.ThumbImg != "#" {
- movieDetail.ThumbImg = host + movieDetail.ThumbImg
- }
- movieDetail.Synopsis = e.ChildText("div.info.info2 p.desc_info")
- e.ForEach("div#post > input", func(_ int, ee *colly.HTMLElement) {
- movieDetail.Poster = append(movieDetail.Poster, model.Poster{
- Thumb: host + ee.Attr("thn_img"),
- Origin: host + ee.Attr("img"),
- })
- })
- e.ForEach("div#stl > input", func(_ int, ee *colly.HTMLElement) {
- movieDetail.StillCut = append(movieDetail.StillCut, model.StillCut{
- Thumb: host + ee.Attr("thn_img"),
- Origin: host + ee.Attr("img"),
- })
- })
- })
- //c2.OnHTML("body", func(e *colly.HTMLElement) {
- // var (
- // tr = e.DOM.Find(".info").Eq(0).Find("table tbody tr").Eq(1)
- // saleAcc = utility.RemoveSpecialChar(strings.Replace(tr.Find("td").Eq(2).Text(), "(100%)", "", 1))
- // audiAcc = utility.RemoveSpecialChar(strings.Replace(tr.Find("td").Eq(3).Text(), "(100%)", "", 1))
- // )
- // SaleAcc, _ := strconv.Atoi(saleAcc)
- // AudiAcc, _ := strconv.Atoi(audiAcc)
- //
- // movieDetail.SaleAcc = SaleAcc
- // movieDetail.AudiAcc = AudiAcc
- //})
- err := c1.Post(config.MOVIE_DETAIL, map[string]string{
- "code": movieCd,
- "sType": "",
- "titleYN": "Y",
- "etcParam": "",
- "isOuterReq": "false",
- })
- if err != nil {
- errors++
- continue
- }
- //if this.Rest.Check(err) {
- // errors++
- // continue
- //}
- //err = c2.Post(config.MOVIE_DETAIL, map[string]string{
- // "code": movieCd,
- // "sType": "stat",
- //})
- //if err != nil {
- // errors++
- // continue
- //}
- //
- //if this.Rest.Check(err) {
- // errors++
- // continue
- //}
- if this.MovieDetailModel.IsExists(movieCd) == true {
- if err = this.MovieDetailModel.Update(movieDetail); err == nil {
- updateRows++
- } else {
- errors++
- }
- } else {
- if err = this.MovieDetailModel.Insert(movieDetail); err == nil {
- insertRows++
- } else {
- errors++
- }
- }
- fmt.Println(output(total, scraped, errors, insertRows, updateRows, target))
- codes[i] = ""
- target--
- total++
- }
- //msg := "[영화 상세 정보 수집 종료]\n"
- //msg += output(total, scraped, insertRows, updateRows, errors)
- //utility.SendMessage(msg)
- c.JSON(http.StatusOK, gin.H{
- "total": total,
- "insertRows": insertRows,
- "updateRows": updateRows,
- })
- }
- /**
- * 영화진흥위원회 박스오피스 (통계 조회)
- */
- /*
- func (this *Cron) Stats(c *gin.Context) {
- var (
- start = time.Now()
- total, errors, insertRows, updateRows, page = 0, 0, 0, 0, 1
- output = func(n ...int) string {
- s := fmt.Sprintf("Total : %d\n", n[0])
- s += fmt.Sprintf("Error: %d\n", n[1])
- s += fmt.Sprintf("InsertRows: %d\n", n[2])
- s += fmt.Sprintf("UpdateRows: %d\n", n[3])
- s += fmt.Sprintf("Page: %d\n", n[4])
- s += fmt.Sprintf("소요시간: %f초\n", time.Since(start).Seconds())
- return s
- }
- )
- req := this.MovieStatsModel.SearchBoxOfficeParams
- req.ServiceKey = config.Env.Movie.Kcisa.BoxOfficeKey
- req.NumOfRows = 2000
- req.PageNo = 1
- for {
- var (
- insertData = make([]model.BoxOfficeInfo, 0)
- updateData = make([]model.BoxOfficeInfo, 0)
- )
- req.PageNo = page
- data, err := this.Kobis.MovieBoxOfficeAPI(req)
- if err != nil {
- c.JSON(http.StatusBadRequest, err.Error())
- break
- }
- list := data.Response.Body.Items.Item
- if len(list) <= 0 {
- break
- }
- for _, row := range list {
- query, err := url.ParseQuery(row.Url)
- if err != nil {
- errors++
- continue
- }
- movieCd := query.Get("dtCd")
- if this.MovieStatsModel.IsExists(movieCd) == true {
- updateData = append(updateData, row)
- updateRows++
- } else {
- insertData = append(insertData, row)
- insertRows++
- }
- }
- if insertRows > 0 {
- if err = this.MovieStatsModel.Insert(insertData); err != nil {
- errors++
- }
- }
- if updateRows > 0 {
- if err = this.MovieStatsModel.Update(updateData); err != nil {
- errors++
- }
- }
- fmt.Println(output(total, errors, insertRows, updateRows, page))
- page++
- total++
- }
- msg := "[영화 통계 정보 수집 종료]\n"
- msg += output(total, errors, insertRows, updateRows, page)
- utility.SendMessage(msg)
- c.JSON(http.StatusOK, gin.H{
- "total": total,
- "error": errors,
- "insertRows": insertRows,
- "updateRows": updateRows,
- "page": page,
- })
- }
- */
- func GetKey(c *gin.Context) string {
- if c.Query("key") == "1" {
- return config.Movie.Kobis.ApiKey_1
- } else if c.Query("key") == "2" {
- return config.Movie.Kobis.ApiKey_2
- } else {
- return "f5eef3421c602c6cb7ea224104795888"
- }
- }
- // 마지막 호출 Page 저장
- func SetLastPage(page int) {
- data, err := os.Create(config.LAST_PAGE_PATH_KOBIS)
- if err != nil {
- fmt.Println(err)
- }
- defer func() {
- if data.Close() != nil {
- fmt.Println(err)
- }
- }()
- _, _ = data.WriteString(strconv.FormatInt(int64(page), 10))
- fmt.Printf("Set last page: %d\n", page)
- }
- // 마지막 호출 Page 조회
- func GetLastPage() int {
- byte, err := os.ReadFile(config.LAST_PAGE_PATH_KOBIS)
- if err != nil {
- fmt.Println(err)
- }
- page, _ := strconv.Atoi(string(byte))
- if page == 0 {
- page = 1
- }
- return page
- }
|