xianyan/lib/features/discover/services/rss_service.dart

/// ============================================================
/// 闲言APP — RSS订阅服务
/// 创建时间: 2026-05-30
/// 更新时间: 2026-05-30
/// 作用: RSS/Atom订阅源管理+文章解析+持久化存储
/// 上次更新: 新增OPML导入导出功能
/// ============================================================

import 'dart:convert';

import 'package:dio/dio.dart';
import 'package:hive_ce/hive.dart';
import 'package:rss_dart/dart_rss.dart';
import 'package:xianyan/core/utils/logger.dart';

/// RSS订阅源分类
enum RssCategory {
  tech('科技'),
  efficiency('效率'),
  general('综合'),
  techCommunity('技术社区'),
  design('设计'),
  finance('财经'),
  news('新闻');

  const RssCategory(this.label);

  final String label;
}

/// RSS订阅源数据模型
class RssSubscription {
  const RssSubscription({
    required this.id,
    required this.title,
    required this.url,
    this.description,
    this.iconUrl,
    this.category = RssCategory.general,
    this.lastUpdated,
    this.unreadCount = 0,
    this.addedAt,
  });

  final String id;
  final String title;
  final String url;
  final String? description;
  final String? iconUrl;
  final RssCategory category;
  final DateTime? lastUpdated;
  final int unreadCount;
  final DateTime? addedAt;

  RssSubscription copyWith({
    String? title,
    String? description,
    String? iconUrl,
    RssCategory? category,
    DateTime? lastUpdated,
    int? unreadCount,
  }) {
    return RssSubscription(
      id: id,
      title: title ?? this.title,
      url: url,
      description: description ?? this.description,
      iconUrl: iconUrl ?? this.iconUrl,
      category: category ?? this.category,
      lastUpdated: lastUpdated ?? this.lastUpdated,
      unreadCount: unreadCount ?? this.unreadCount,
      addedAt: addedAt,
    );
  }

  Map<String, dynamic> toMap() => {
    'id': id,
    'title': title,
    'url': url,
    'description': description,
    'iconUrl': iconUrl,
    'category': category.name,
    'lastUpdated': lastUpdated?.toIso8601String(),
    'unreadCount': unreadCount,
    'addedAt': addedAt?.toIso8601String(),
  };

  factory RssSubscription.fromMap(Map<String, dynamic> map) {
    return RssSubscription(
      id: map['id'] as String? ?? '',
      title: map['title'] as String? ?? '',
      url: map['url'] as String? ?? '',
      description: map['description'] as String?,
      iconUrl: map['iconUrl'] as String?,
      category: RssCategory.values.firstWhere(
        (e) => e.name == map['category'],
        orElse: () => RssCategory.general,
      ),
      lastUpdated: RssService.parseDateTime(map['lastUpdated'] as String?),
      unreadCount: map['unreadCount'] as int? ?? 0,
      addedAt: RssService.parseDateTime(map['addedAt'] as String?),
    );
  }
}

/// RSS条目数据模型
class RssFeedItem {
  const RssFeedItem({
    required this.title,
    this.description,
    this.content,
    this.link,
    this.author,
    this.pubDate,
    this.imageUrl,
    this.sourceId,
    this.sourceTitle,
    this.isRead = false,
  });

  final String title;
  final String? description;
  final String? content;
  final String? link;
  final String? author;
  final DateTime? pubDate;
  final String? imageUrl;
  final String? sourceId;
  final String? sourceTitle;
  final bool isRead;

  RssFeedItem copyWith({bool? isRead}) => RssFeedItem(
    title: title,
    description: description,
    content: content,
    link: link,
    author: author,
    pubDate: pubDate,
    imageUrl: imageUrl,
    sourceId: sourceId,
    sourceTitle: sourceTitle,
    isRead: isRead ?? this.isRead,
  );

  /// 生成唯一ID（基于sourceId + link）
  String get uid => '${sourceId ?? ""}_${link?.hashCode ?? title.hashCode}';
}

/// RSS订阅服务 — 解析RSS/Atom源 + 持久化
class RssService {
  RssService._();

  static final Dio _dio = Dio(
    BaseOptions(
      connectTimeout: const Duration(seconds: 10),
      receiveTimeout: const Duration(seconds: 10),
      responseType: ResponseType.plain,
    ),
  );

  static const String _boxName = 'rss_subscriptions';
  static const String _readArticlesBox = 'rss_read_articles';
  static Box<dynamic>? _box;
  static Box<dynamic>? _readBox;

  /// 初始化Hive存储
  static Future<void> init() async {
    try {
      _box = await Hive.openBox(_boxName);
      _readBox = await Hive.openBox(_readArticlesBox);
      Log.i('RssService', 'Hive存储初始化完成');
    } catch (e) {
      Log.e('RssService', 'Hive存储初始化失败: $e');
    }
  }

  /// 获取所有已保存的订阅源
  static List<RssSubscription> getSavedSubscriptions() {
    if (_box == null) return defaultSubscriptions.toList();
    final saved = _box!.get('subscriptions');
    if (saved == null) return defaultSubscriptions.toList();
    try {
      final list = jsonDecode(saved as String) as List<dynamic>;
      return list
          .map((e) => RssSubscription.fromMap(e as Map<String, dynamic>))
          .toList();
    } catch (e) {
      Log.e('RssService', '读取订阅源失败: $e');
      return defaultSubscriptions.toList();
    }
  }

  /// 保存订阅源列表
  static Future<void> saveSubscriptions(List<RssSubscription> subs) async {
    if (_box == null) return;
    try {
      final encoded = jsonEncode(subs.map((e) => e.toMap()).toList());
      await _box!.put('subscriptions', encoded);
    } catch (e) {
      Log.e('RssService', '保存订阅源失败: $e');
    }
  }

  /// 添加订阅源
  static Future<void> addSubscription(RssSubscription sub) async {
    final subs = getSavedSubscriptions();
    if (subs.any((s) => s.url == sub.url)) {
      Log.w('RssService', '订阅源已存在: ${sub.url}');
      return;
    }
    subs.add(sub);
    await saveSubscriptions(subs);
  }

  /// 删除订阅源
  static Future<void> removeSubscription(String id) async {
    final subs = getSavedSubscriptions();
    subs.removeWhere((s) => s.id == id);
    await saveSubscriptions(subs);
  }

  /// 标记文章已读
  static Future<void> markArticleRead(String articleUid) async {
    if (_readBox == null) return;
    await _readBox!.put(articleUid, true);
  }

  /// 检查文章是否已读
  static bool isArticleRead(String articleUid) {
    if (_readBox == null) return false;
    return _readBox!.get(articleUid) == true;
  }

  /// 拉取并解析RSS/Atom订阅源
  static Future<List<RssFeedItem>> fetchFeed(RssSubscription sub) async {
    try {
      final response = await _dio.get<String>(sub.url);
      final xml = response.data ?? '';

      List<RssFeedItem> items = [];

      if (xml.contains('<rss') || xml.contains('<channel')) {
        final feed = RssFeed.parse(xml);
        items = feed.items
            .map(
              (item) => RssFeedItem(
                title: item.title ?? '无标题',
                description: item.description ?? item.content?.value,
                content: item.content?.value ?? item.description,
                link: item.link,
                author: item.author ?? item.dc?.creator,
                pubDate: parseDateTime(item.pubDate),
                imageUrl:
                    item.enclosure?.url ??
                    item.media?.thumbnails.firstOrNull?.url,
                sourceId: sub.id,
                sourceTitle: sub.title,
                isRead: isArticleRead(
                  '${sub.id}_${item.link?.hashCode ?? item.title.hashCode}',
                ),
              ),
            )
            .toList();
      } else if (xml.contains('<feed')) {
        final feed = AtomFeed.parse(xml);
        items = feed.items
            .map(
              (entry) => RssFeedItem(
                title: entry.title ?? '无标题',
                description: entry.summary ?? entry.content,
                content: entry.content ?? entry.summary,
                link: entry.links.firstOrNull?.href,
                author: entry.authors.firstOrNull?.name,
                pubDate: parseDateTime(entry.published ?? entry.updated),
                imageUrl: entry.media?.thumbnails.firstOrNull?.url,
                sourceId: sub.id,
                sourceTitle: sub.title,
                isRead: isArticleRead(
                  '${sub.id}_${entry.links.firstOrNull?.href.hashCode ?? entry.title.hashCode}',
                ),
              ),
            )
            .toList();
      }

      return items;
    } catch (e) {
      Log.e('RssService', 'RSS解析失败 [${sub.url}]: $e');
      return [];
    }
  }

  /// 发现RSS源（输入URL自动解析）
  static Future<RssSubscription?> discoverFeed(String url) async {
    try {
      final normalizedUrl = _normalizeUrl(url);
      final response = await _dio.get<String>(normalizedUrl);
      final xml = response.data ?? '';

      if (xml.contains('<rss') || xml.contains('<channel')) {
        final feed = RssFeed.parse(xml);
        return RssSubscription(
          id: 'rss_${DateTime.now().millisecondsSinceEpoch}',
          title: feed.title ?? normalizedUrl,
          url: normalizedUrl,
          description: feed.description,
          iconUrl: feed.image?.url,
          category: _guessCategory(feed.title ?? ''),
          addedAt: DateTime.now(),
        );
      } else if (xml.contains('<feed')) {
        final feed = AtomFeed.parse(xml);
        return RssSubscription(
          id: 'rss_${DateTime.now().millisecondsSinceEpoch}',
          title: feed.title ?? normalizedUrl,
          url: normalizedUrl,
          description: feed.subtitle,
          iconUrl: feed.icon,
          category: _guessCategory(feed.title ?? ''),
          addedAt: DateTime.now(),
        );
      }
      return null;
    } catch (e) {
      Log.e('RssService', '发现RSS源失败: $e');
      return null;
    }
  }

  /// 批量拉取所有订阅源
  static Future<Map<String, List<RssFeedItem>>> fetchAllFeeds(
    List<RssSubscription> subs,
  ) async {
    final results = <String, List<RssFeedItem>>{};
    await Future.wait(
      subs.map((sub) async {
        final items = await fetchFeed(sub);
        results[sub.id] = items;
      }),
    );
    return results;
  }

  /// 默认推荐订阅源
  static const List<RssSubscription> defaultSubscriptions = [
    RssSubscription(
      id: '36kr',
      title: '36氪',
      url: 'https://36kr.com/feed',
      description: '科技创业资讯',
      category: RssCategory.tech,
    ),
    RssSubscription(
      id: 'ifanr',
      title: '爱范儿',
      url: 'https://www.ifanr.com/feed',
      description: '科技媒体',
      category: RssCategory.tech,
    ),
    RssSubscription(
      id: 'sspai',
      title: '少数派',
      url: 'https://sspai.com/feed',
      description: '高效工作和品质生活',
      category: RssCategory.efficiency,
    ),
    RssSubscription(
      id: 'zhihu_daily',
      title: '知乎日报',
      url: 'https://daily.zhihu.com/rss',
      description: '知乎精选',
    ),
    RssSubscription(
      id: 'ruanyifeng',
      title: '阮一峰',
      url: 'http://www.ruanyifeng.com/blog/atom.xml',
      description: '科技博客',
      category: RssCategory.tech,
    ),
    RssSubscription(
      id: 'v2ex',
      title: 'V2EX',
      url: 'https://www.v2ex.com/index.xml',
      description: '创意工作者社区',
      category: RssCategory.techCommunity,
    ),
  ];

  /// 解析日期字符串（公开方法，供fromMap使用）
  static DateTime? parseDateTime(String? dateStr) {
    if (dateStr == null || dateStr.isEmpty) return null;
    return DateTime.tryParse(dateStr);
  }

  /// URL标准化
  static String _normalizeUrl(String url) {
    var normalized = url.trim();
    if (!normalized.startsWith('http://') &&
        !normalized.startsWith('https://')) {
      normalized = 'https://$normalized';
    }
    return normalized;
  }

  /// 根据标题猜测分类
  static RssCategory _guessCategory(String title) {
    final lower = title.toLowerCase();
    if (lower.contains('设计') || lower.contains('design')) {
      return RssCategory.design;
    }
    if (lower.contains('财经') ||
        lower.contains('金融') ||
        lower.contains('finance')) {
      return RssCategory.finance;
    }
    if (lower.contains('新闻') || lower.contains('news')) {
      return RssCategory.news;
    }
    if (lower.contains('效率') ||
        lower.contains('少数派') ||
        lower.contains('sspai')) {
      return RssCategory.efficiency;
    }
    if (lower.contains('v2ex') ||
        lower.contains('社区') ||
        lower.contains('forum')) {
      return RssCategory.techCommunity;
    }
    if (lower.contains('科技') ||
        lower.contains('tech') ||
        lower.contains('氪') ||
        lower.contains('范儿')) {
      return RssCategory.tech;
    }
    return RssCategory.general;
  }

  // ============================================================
  // OPML 导入导出
  // ============================================================

  /// 导出订阅源为OPML格式XML
  static String exportToOpml() {
    final subs = getSavedSubscriptions();
    final buffer = StringBuffer();

    buffer.writeln('<?xml version="1.0" encoding="UTF-8"?>');
    buffer.writeln('<opml version="2.0">');
    buffer.writeln('  <head>');
    buffer.writeln('    <title>闲言APP RSS订阅</title>');
    buffer.writeln(
      '    <dateCreated>${DateTime.now().toIso8601String()}</dateCreated>',
    );
    buffer.writeln('  </head>');
    buffer.writeln('  <body>');

    final grouped = <RssCategory, List<RssSubscription>>{};
    for (final sub in subs) {
      grouped.putIfAbsent(sub.category, () => []).add(sub);
    }

    for (final entry in grouped.entries) {
      buffer.writeln('    <outline text="${_xmlEscape(entry.key.label)}">');
      for (final sub in entry.value) {
        buffer.writeln(
          '      <outline type="rss" '
          'text="${_xmlEscape(sub.title)}" '
          'title="${_xmlEscape(sub.title)}" '
          'xmlUrl="${_xmlEscape(sub.url)}"'
          '${sub.description != null ? ' description="${_xmlEscape(sub.description!)}"' : ''}'
          ' />',
        );
      }
      buffer.writeln('    </outline>');
    }

    buffer.writeln('  </body>');
    buffer.writeln('</opml>');

    return buffer.toString();
  }

  /// 从OPML格式XML导入订阅源
  static Future<int> importFromOpml(String opmlXml) async {
    try {
      final subs = <RssSubscription>[];
      final outlineRegex = RegExp(
        r'<outline[^>]*type="rss"[^>]*>',
        caseSensitive: false,
      );
      final textRegex = RegExp(r'text="([^"]*)"');
      final titleRegex = RegExp(r'title="([^"]*)"');
      final xmlUrlRegex = RegExp(r'xmlUrl="([^"]*)"');
      final descRegex = RegExp(r'description="([^"]*)"');

      for (final match in outlineRegex.allMatches(opmlXml)) {
        final outline = match.group(0)!;

        final xmlUrl = xmlUrlRegex.firstMatch(outline)?.group(1);
        if (xmlUrl == null || xmlUrl.isEmpty) continue;

        final title =
            titleRegex.firstMatch(outline)?.group(1) ??
            textRegex.firstMatch(outline)?.group(1) ??
            xmlUrl;
        final description = descRegex.firstMatch(outline)?.group(1);

        subs.add(
          RssSubscription(
            id: 'import_${DateTime.now().millisecondsSinceEpoch}_${subs.length}',
            title: _xmlUnescape(title),
            url: _xmlUnescape(xmlUrl),
            description: _xmlUnescape(description),
            category: _guessCategory(title),
            addedAt: DateTime.now(),
          ),
        );
      }

      final existing = getSavedSubscriptions();
      final existingUrls = existing.map((s) => s.url).toSet();
      var added = 0;

      for (final sub in subs) {
        if (!existingUrls.contains(sub.url)) {
          existing.add(sub);
          existingUrls.add(sub.url);
          added++;
        }
      }

      if (added > 0) {
        await saveSubscriptions(existing);
      }

      Log.i('RssService', 'OPML导入完成: ${subs.length}个源，新增$added个');
      return added;
    } catch (e) {
      Log.e('RssService', 'OPML导入失败: $e');
      return 0;
    }
  }

  /// XML特殊字符转义
  static String _xmlEscape(String input) {
    return input
        .replaceAll('&', '&amp;')
        .replaceAll('"', '&quot;')
        .replaceAll("'", '&apos;')
        .replaceAll('<', '&lt;')
        .replaceAll('>', '&gt;');
  }

  /// XML特殊字符反转义
  static String _xmlUnescape(String? input) {
    if (input == null) return '';
    return input
        .replaceAll('&gt;', '>')
        .replaceAll('&lt;', '<')
        .replaceAll('&apos;', "'")
        .replaceAll('&quot;', '"')
        .replaceAll('&amp;', '&');
  }

  // ============================================================
  // 全文提取（阅读模式）
  // ============================================================

  /// 从文章URL提取全文内容（阅读模式）
  ///
  /// 使用简易 Readability 算法：
  /// 1. 获取网页HTML
  /// 2. 移除导航/侧边栏/页脚等非正文区域
  /// 3. 提取最可能是正文的区域
  /// 4. 清理HTML标签，返回纯文本
  static Future<RssFullTextResult> fetchFullText(String url) async {
    try {
      final response = await _dio.get<String>(url);
      final html = response.data ?? '';
      if (html.isEmpty) {
        return const RssFullTextResult(error: '页面内容为空');
      }

      final title = _extractTitle(html);
      final content = _extractContent(html);
      final images = _extractContentImages(html);

      if (content.isEmpty) {
        return RssFullTextResult(
          error: '无法提取正文内容',
          title: title,
        );
      }

      return RssFullTextResult(
        success: true,
        title: title,
        content: content,
        images: images,
        sourceUrl: url,
      );
    } catch (e) {
      Log.e('RssService', '全文提取失败 [$url]: $e');
      return RssFullTextResult(error: '加载失败: $e');
    }
  }

  /// 提取页面标题
  static String _extractTitle(String html) {
    final ogTitle = RegExp(r'<meta[^>]*property=["\x27]og:title["\x27][^>]*content=["\x27]([^"\x27]*)["\x27]', caseSensitive: false)
        .firstMatch(html);
    if (ogTitle != null && ogTitle.group(1)!.isNotEmpty) {
      return _decodeHtmlEntities(ogTitle.group(1)!);
    }
    final titleMatch = RegExp(r'<title[^>]*>(.*?)</title>', caseSensitive: false, dotAll: true)
        .firstMatch(html);
    if (titleMatch != null && titleMatch.group(1)!.isNotEmpty) {
      return _decodeHtmlEntities(titleMatch.group(1)!.trim());
    }
    return '';
  }

  /// 提取正文内容（简易 Readability）
  static String _extractContent(String html) {
    var cleaned = html;

    // 移除脚本和样式
    cleaned = cleaned.replaceAll(
      RegExp(r'<script[^>]*>.*?</script>', caseSensitive: false, dotAll: true),
      '',
    );
    cleaned = cleaned.replaceAll(
      RegExp(r'<style[^>]*>.*?</style>', caseSensitive: false, dotAll: true),
      '',
    );
    cleaned = cleaned.replaceAll(
      RegExp(r'<nav[^>]*>.*?</nav>', caseSensitive: false, dotAll: true),
      '',
    );
    cleaned = cleaned.replaceAll(
      RegExp(r'<footer[^>]*>.*?</footer>', caseSensitive: false, dotAll: true),
      '',
    );
    cleaned = cleaned.replaceAll(
      RegExp(r'<header[^>]*>.*?</header>', caseSensitive: false, dotAll: true),
      '',
    );
    cleaned = cleaned.replaceAll(
      RegExp(r'<aside[^>]*>.*?</aside>', caseSensitive: false, dotAll: true),
      '',
    );
    cleaned = cleaned.replaceAll(
      RegExp(r'<noscript[^>]*>.*?</noscript>', caseSensitive: false, dotAll: true),
      '',
    );

    // 查找 article 标签
    final articleMatch = RegExp(r'<article[^>]*>(.*?)</article>', caseSensitive: false, dotAll: true)
        .firstMatch(cleaned);
    if (articleMatch != null) {
      cleaned = articleMatch.group(1)!;
    } else {
      // 查找 class 含 article/content/post/entry 的 div
      final contentDiv = RegExp(
        r'<div[^>]*class=["\x27][^\x27]*(?:article|content|post-body|entry-content|post-content|story-body|article-body|rich-text|markdown-body)[^\x27]*["\x27][^>]*>(.*?)</div>',
        caseSensitive: false,
        dotAll: true,
      ).firstMatch(cleaned);
      if (contentDiv != null) {
        cleaned = contentDiv.group(1)!;
      }
    }

    // 清理HTML标签，保留段落结构
    var text = cleaned;
    text = text.replaceAll(RegExp(r'<br\s*/?>', caseSensitive: false), '\n');
    text = text.replaceAll(RegExp(r'<p[^>]*>', caseSensitive: false), '\n');
    text = text.replaceAll(RegExp(r'</p>', caseSensitive: false), '\n');
    text = text.replaceAll(RegExp(r'<h[1-6][^>]*>', caseSensitive: false), '\n\n');
    text = text.replaceAll(RegExp(r'</h[1-6]>', caseSensitive: false), '\n');
    text = text.replaceAll(RegExp(r'<li[^>]*>', caseSensitive: false), '• ');
    text = text.replaceAll(RegExp(r'<blockquote[^>]*>', caseSensitive: false), '\n> ');
    text = text.replaceAll(RegExp(r'<[^>]*>'), '');
    text = text.replaceAll(RegExp(r'&nbsp;'), ' ');
    text = text.replaceAll(RegExp(r'&amp;'), '&');
    text = text.replaceAll(RegExp(r'&lt;'), '<');
    text = text.replaceAll(RegExp(r'&gt;'), '>');
    text = text.replaceAll(RegExp(r'&quot;'), '"');
    text = text.replaceAll(RegExp(r'&#\d+;'), '');
    text = text.replaceAll(RegExp(r'\n{3,}'), '\n\n');

    return text.trim();
  }

  /// 提取正文中的图片URL
  static List<String> _extractContentImages(String html) {
    final imgRegex = RegExp(r'<img[^>]+src\s*=\s*["\x27]([^"\x27]+)["\x27]', dotAll: true);
    return imgRegex
        .allMatches(html)
        .map((m) => m.group(1) ?? '')
        .where((url) => url.isNotEmpty && !url.endsWith('.svg') && !url.contains('avatar') && !url.contains('icon') && !url.contains('logo'))
        .take(10)
        .toList();
  }

  /// HTML实体解码
  static String _decodeHtmlEntities(String text) {
    return text
        .replaceAll('&amp;', '&')
        .replaceAll('&lt;', '<')
        .replaceAll('&gt;', '>')
        .replaceAll('&quot;', '"')
        .replaceAll('&#39;', "'")
        .replaceAll('&nbsp;', ' ');
  }
}

/// 全文提取结果
class RssFullTextResult {
  const RssFullTextResult({
    this.success = false,
    this.title,
    this.content,
    this.images = const [],
    this.sourceUrl,
    this.error,
  });

  final bool success;
  final String? title;
  final String? content;
  final List<String> images;
  final String? sourceUrl;
  final String? error;
}