using Application.Abstractions.Data; using Application.Abstractions.News; using Domain.Entities.News; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using System.Text.Json; using System.Xml.Linq; using System.Text.RegularExpressions; namespace Infrastructure.News; public sealed class RssCollectorService( IServiceScopeFactory scopeFactory, IHttpClientFactory httpClientFactory, ILogger logger ) : BackgroundService, IRssCollector { private static readonly XNamespace _dc = "http://purl.org/dc/elements/1.1/"; private static readonly XNamespace _content = "http://purl.org/rss/1.0/modules/content/"; private static readonly XNamespace _media = "http://search.yahoo.com/mrss/"; private static readonly XNamespace _slash = "http://purl.org/rss/1.0/modules/slash/"; protected override async Task ExecuteAsync(CancellationToken ct) { // 서비스 시작 직후 잠시 대기 (DB 준비) await Task.Delay(5000, ct); while (!ct.IsCancellationRequested) { try { await CollectAllFeedsAsync(ct); } catch (OperationCanceledException) when (ct.IsCancellationRequested) { break; } catch (Exception ex) { logger.LogError(ex, "RSS 수집 루프에서 예외 발생"); } await Task.Delay(TimeSpan.FromMinutes(1), ct); } } public async Task FetchSourceAsync(int sourceID, CancellationToken ct) { using var scope = scopeFactory.CreateScope(); var db = scope.ServiceProvider.GetRequiredService(); var source = await db.RssFeedSource.FindAsync([sourceID], ct) ?? throw new KeyNotFoundException($"소스 ID {sourceID}를 찾을 수 없습니다."); var count = await FetchAndStoreAsync(source, db, ct); source.MarkFetched(); await db.SaveChangesAsync(ct); return count; } private async Task CollectAllFeedsAsync(CancellationToken ct) { using var scope = scopeFactory.CreateScope(); var db = scope.ServiceProvider.GetRequiredService(); // 초기 시드: 테이블이 비어있으면 기본 소스 삽입 if (!await db.RssFeedSource.AnyAsync(ct)) { await SeedDefaultSourcesAsync(db, ct); } var sources = await db.RssFeedSource.Where(x => x.IsActive).ToListAsync(ct); var now = DateTime.UtcNow; foreach (var source in sources) { // 수집 주기 체크 if (source.LastFetchedAt.HasValue && (now - source.LastFetchedAt.Value).TotalMinutes < source.IntervalMinutes) { continue; } try { var count = await FetchAndStoreAsync(source, db, ct); source.MarkFetched(); await db.SaveChangesAsync(ct); if (count > 0) { logger.LogInformation("[RSS] {SourceName}: {Count}건 수집 완료", source.Name, count); } } catch (Exception ex) { logger.LogWarning(ex, "[RSS] {SourceName} ({Url}) 수집 실패", source.Name, source.Url); } } } private async Task FetchAndStoreAsync(RssFeedSource source, IAppDbContext db, CancellationToken ct) { var client = httpClientFactory.CreateClient("RssFeed"); var xml = await client.GetStringAsync(source.Url, ct); var doc = XDocument.Parse(xml); var items = doc.Descendants("item").ToList(); if (items.Count == 0) { return 0; } // 파싱 var articles = items.Select(item => ParseItem(source.ID, item)).ToList(); // 중복 체크: 기존 Guid 조회 var guids = articles.Where(a => a.Guid != null).Select(a => a.Guid!).ToList(); var existingGuids = await db.RssNewsArticle .Where(x => x.RssFeedSourceID == source.ID && x.Guid != null && guids.Contains(x.Guid!)) .Select(x => x.Guid!) .ToListAsync(ct); var existingSet = existingGuids.ToHashSet(); var newArticles = articles.Where(a => a.Guid == null || !existingSet.Contains(a.Guid)).ToList(); if (newArticles.Count == 0) { return 0; } db.RssNewsArticle.AddRange(newArticles); await db.SaveChangesAsync(ct); return newArticles.Count; } private static RssNewsArticle ParseItem(int sourceID, XElement item) { var title = item.Element("title")?.Value?.Trim() ?? ""; var link = item.Element("link")?.Value?.Trim(); var guid = item.Element("guid")?.Value?.Trim() ?? link; var author = item.Element(_dc + "creator")?.Value?.Trim(); var description = item.Element("description")?.Value?.Trim(); var content = item.Element(_content + "encoded")?.Value?.Trim(); var sourceName = item.Element("source")?.Value?.Trim(); // Author fallback: Google News의 source 요소 if (string.IsNullOrEmpty(author) && !string.IsNullOrEmpty(sourceName)) { author = sourceName; } // 이미지: media:content > enclosure > description/content 내 첫 번째 var imageUrl = item.Element(_media + "content")?.Attribute("url")?.Value?.Trim() ?? item.Element("enclosure")?.Attribute("url")?.Value?.Trim(); if (string.IsNullOrEmpty(imageUrl)) { imageUrl = ExtractFirstImageUrl(description) ?? ExtractFirstImageUrl(content); } // 카테고리: JSON 배열로 저장 var categories = item.Elements("category").Select(c => c.Value.Trim()).Where(c => !string.IsNullOrEmpty(c)).ToList(); string? categoriesJson = categories.Count > 0 ? JsonSerializer.Serialize(categories) : null; // 댓글 수 var commentCountStr = item.Element(_slash + "comments")?.Value; int.TryParse(commentCountStr, out var commentCount); // 발행일 DateTime? publishedAt = null; var pubDateStr = item.Element("pubDate")?.Value; if (!string.IsNullOrEmpty(pubDateStr) && DateTimeOffset.TryParse(pubDateStr, out var dto)) { publishedAt = dto.UtcDateTime; } return RssNewsArticle.Create( rssFeedSourceID: sourceID, title: title, link: link, guid: guid, author: author, description: description, content: content, imageUrl: imageUrl, sourceName: sourceName, categories: categoriesJson, commentCount: commentCount, publishedAt: publishedAt ); } private static readonly Regex _imgSrcRegex = new(@"]+src\s*=\s*[""']([^""']+)[""']", RegexOptions.IgnoreCase | RegexOptions.Compiled); private static string? ExtractFirstImageUrl(string? html) { if (string.IsNullOrEmpty(html)) { return null; } var match = _imgSrcRegex.Match(html); return match.Success ? match.Groups[1].Value.Trim() : null; } private static async Task SeedDefaultSourcesAsync(IAppDbContext db, CancellationToken ct) { db.RssFeedSource.AddRange( RssFeedSource.Create("CoinSpeaker KR", "https://www.coinspeaker.com/kr/news/feed/", "CoinSpeaker 한국어 뉴스", 10), RssFeedSource.Create("Google News 비트코인", "https://news.google.com/rss/search?q=%EB%B9%84%ED%8A%B8%EC%BD%94%EC%9D%B8&hl=ko&gl=KR&ceid=KR:ko", "구글 뉴스 비트코인 검색", 15), RssFeedSource.Create("CoinTelegraph KR", "https://cointelegraph-kr.com/rss", "CoinTelegraph 한국어 뉴스", 10) ); await db.SaveChangesAsync(ct); } }