Files
xianyan/lib/features/discover/services/rss_service.dart
Developer 10df6b705c 同步
2026-06-02 03:52:54 +08:00

736 lines
22 KiB
Dart
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/// ============================================================
/// 闲言APP — RSS订阅服务
/// 创建时间: 2026-05-30
/// 更新时间: 2026-05-30
/// 作用: RSS/Atom订阅源管理+文章解析+持久化存储
/// 上次更新: 新增OPML导入导出功能
/// ============================================================
import 'dart:convert';
import 'package:dio/dio.dart';
import 'package:hive_ce/hive.dart';
import 'package:rss_dart/dart_rss.dart';
import 'package:xianyan/core/utils/logger.dart';
/// RSS订阅源分类
enum RssCategory {
tech('科技'),
efficiency('效率'),
general('综合'),
techCommunity('技术社区'),
design('设计'),
finance('财经'),
news('新闻');
const RssCategory(this.label);
final String label;
}
/// RSS订阅源数据模型
class RssSubscription {
const RssSubscription({
required this.id,
required this.title,
required this.url,
this.description,
this.iconUrl,
this.category = RssCategory.general,
this.lastUpdated,
this.unreadCount = 0,
this.addedAt,
});
final String id;
final String title;
final String url;
final String? description;
final String? iconUrl;
final RssCategory category;
final DateTime? lastUpdated;
final int unreadCount;
final DateTime? addedAt;
RssSubscription copyWith({
String? title,
String? description,
String? iconUrl,
RssCategory? category,
DateTime? lastUpdated,
int? unreadCount,
}) {
return RssSubscription(
id: id,
title: title ?? this.title,
url: url,
description: description ?? this.description,
iconUrl: iconUrl ?? this.iconUrl,
category: category ?? this.category,
lastUpdated: lastUpdated ?? this.lastUpdated,
unreadCount: unreadCount ?? this.unreadCount,
addedAt: addedAt,
);
}
Map<String, dynamic> toMap() => {
'id': id,
'title': title,
'url': url,
'description': description,
'iconUrl': iconUrl,
'category': category.name,
'lastUpdated': lastUpdated?.toIso8601String(),
'unreadCount': unreadCount,
'addedAt': addedAt?.toIso8601String(),
};
factory RssSubscription.fromMap(Map<String, dynamic> map) {
return RssSubscription(
id: map['id'] as String? ?? '',
title: map['title'] as String? ?? '',
url: map['url'] as String? ?? '',
description: map['description'] as String?,
iconUrl: map['iconUrl'] as String?,
category: RssCategory.values.firstWhere(
(e) => e.name == map['category'],
orElse: () => RssCategory.general,
),
lastUpdated: RssService.parseDateTime(map['lastUpdated'] as String?),
unreadCount: map['unreadCount'] as int? ?? 0,
addedAt: RssService.parseDateTime(map['addedAt'] as String?),
);
}
}
/// RSS条目数据模型
class RssFeedItem {
const RssFeedItem({
required this.title,
this.description,
this.content,
this.link,
this.author,
this.pubDate,
this.imageUrl,
this.sourceId,
this.sourceTitle,
this.isRead = false,
});
final String title;
final String? description;
final String? content;
final String? link;
final String? author;
final DateTime? pubDate;
final String? imageUrl;
final String? sourceId;
final String? sourceTitle;
final bool isRead;
RssFeedItem copyWith({bool? isRead}) => RssFeedItem(
title: title,
description: description,
content: content,
link: link,
author: author,
pubDate: pubDate,
imageUrl: imageUrl,
sourceId: sourceId,
sourceTitle: sourceTitle,
isRead: isRead ?? this.isRead,
);
/// 生成唯一ID基于sourceId + link
String get uid => '${sourceId ?? ""}_${link?.hashCode ?? title.hashCode}';
}
/// RSS订阅服务 — 解析RSS/Atom源 + 持久化
class RssService {
RssService._();
static final Dio _dio = Dio(
BaseOptions(
connectTimeout: const Duration(seconds: 10),
receiveTimeout: const Duration(seconds: 10),
responseType: ResponseType.plain,
),
);
static const String _boxName = 'rss_subscriptions';
static const String _readArticlesBox = 'rss_read_articles';
static Box<dynamic>? _box;
static Box<dynamic>? _readBox;
/// 初始化Hive存储
static Future<void> init() async {
try {
_box = await Hive.openBox(_boxName);
_readBox = await Hive.openBox(_readArticlesBox);
Log.i('RssService', 'Hive存储初始化完成');
} catch (e) {
Log.e('RssService', 'Hive存储初始化失败: $e');
}
}
/// 获取所有已保存的订阅源
static List<RssSubscription> getSavedSubscriptions() {
if (_box == null) return defaultSubscriptions.toList();
final saved = _box!.get('subscriptions');
if (saved == null) return defaultSubscriptions.toList();
try {
final list = jsonDecode(saved as String) as List<dynamic>;
return list
.map((e) => RssSubscription.fromMap(e as Map<String, dynamic>))
.toList();
} catch (e) {
Log.e('RssService', '读取订阅源失败: $e');
return defaultSubscriptions.toList();
}
}
/// 保存订阅源列表
static Future<void> saveSubscriptions(List<RssSubscription> subs) async {
if (_box == null) return;
try {
final encoded = jsonEncode(subs.map((e) => e.toMap()).toList());
await _box!.put('subscriptions', encoded);
} catch (e) {
Log.e('RssService', '保存订阅源失败: $e');
}
}
/// 添加订阅源
static Future<void> addSubscription(RssSubscription sub) async {
final subs = getSavedSubscriptions();
if (subs.any((s) => s.url == sub.url)) {
Log.w('RssService', '订阅源已存在: ${sub.url}');
return;
}
subs.add(sub);
await saveSubscriptions(subs);
}
/// 删除订阅源
static Future<void> removeSubscription(String id) async {
final subs = getSavedSubscriptions();
subs.removeWhere((s) => s.id == id);
await saveSubscriptions(subs);
}
/// 标记文章已读
static Future<void> markArticleRead(String articleUid) async {
if (_readBox == null) return;
await _readBox!.put(articleUid, true);
}
/// 检查文章是否已读
static bool isArticleRead(String articleUid) {
if (_readBox == null) return false;
return _readBox!.get(articleUid) == true;
}
/// 拉取并解析RSS/Atom订阅源
static Future<List<RssFeedItem>> fetchFeed(RssSubscription sub) async {
try {
final response = await _dio.get<String>(sub.url);
final xml = response.data ?? '';
List<RssFeedItem> items = [];
if (xml.contains('<rss') || xml.contains('<channel')) {
final feed = RssFeed.parse(xml);
items = feed.items
.map(
(item) => RssFeedItem(
title: item.title ?? '无标题',
description: item.description ?? item.content?.value,
content: item.content?.value ?? item.description,
link: item.link,
author: item.author ?? item.dc?.creator,
pubDate: parseDateTime(item.pubDate),
imageUrl:
item.enclosure?.url ??
item.media?.thumbnails.firstOrNull?.url,
sourceId: sub.id,
sourceTitle: sub.title,
isRead: isArticleRead(
'${sub.id}_${item.link?.hashCode ?? item.title.hashCode}',
),
),
)
.toList();
} else if (xml.contains('<feed')) {
final feed = AtomFeed.parse(xml);
items = feed.items
.map(
(entry) => RssFeedItem(
title: entry.title ?? '无标题',
description: entry.summary ?? entry.content,
content: entry.content ?? entry.summary,
link: entry.links.firstOrNull?.href,
author: entry.authors.firstOrNull?.name,
pubDate: parseDateTime(entry.published ?? entry.updated),
imageUrl: entry.media?.thumbnails.firstOrNull?.url,
sourceId: sub.id,
sourceTitle: sub.title,
isRead: isArticleRead(
'${sub.id}_${entry.links.firstOrNull?.href.hashCode ?? entry.title.hashCode}',
),
),
)
.toList();
}
return items;
} catch (e) {
Log.e('RssService', 'RSS解析失败 [${sub.url}]: $e');
return [];
}
}
/// 发现RSS源输入URL自动解析
static Future<RssSubscription?> discoverFeed(String url) async {
try {
final normalizedUrl = _normalizeUrl(url);
final response = await _dio.get<String>(normalizedUrl);
final xml = response.data ?? '';
if (xml.contains('<rss') || xml.contains('<channel')) {
final feed = RssFeed.parse(xml);
return RssSubscription(
id: 'rss_${DateTime.now().millisecondsSinceEpoch}',
title: feed.title ?? normalizedUrl,
url: normalizedUrl,
description: feed.description,
iconUrl: feed.image?.url,
category: _guessCategory(feed.title ?? ''),
addedAt: DateTime.now(),
);
} else if (xml.contains('<feed')) {
final feed = AtomFeed.parse(xml);
return RssSubscription(
id: 'rss_${DateTime.now().millisecondsSinceEpoch}',
title: feed.title ?? normalizedUrl,
url: normalizedUrl,
description: feed.subtitle,
iconUrl: feed.icon,
category: _guessCategory(feed.title ?? ''),
addedAt: DateTime.now(),
);
}
return null;
} catch (e) {
Log.e('RssService', '发现RSS源失败: $e');
return null;
}
}
/// 批量拉取所有订阅源
static Future<Map<String, List<RssFeedItem>>> fetchAllFeeds(
List<RssSubscription> subs,
) async {
final results = <String, List<RssFeedItem>>{};
await Future.wait(
subs.map((sub) async {
final items = await fetchFeed(sub);
results[sub.id] = items;
}),
);
return results;
}
/// 默认推荐订阅源
static const List<RssSubscription> defaultSubscriptions = [
RssSubscription(
id: '36kr',
title: '36氪',
url: 'https://36kr.com/feed',
description: '科技创业资讯',
category: RssCategory.tech,
),
RssSubscription(
id: 'ifanr',
title: '爱范儿',
url: 'https://www.ifanr.com/feed',
description: '科技媒体',
category: RssCategory.tech,
),
RssSubscription(
id: 'sspai',
title: '少数派',
url: 'https://sspai.com/feed',
description: '高效工作和品质生活',
category: RssCategory.efficiency,
),
RssSubscription(
id: 'zhihu_daily',
title: '知乎日报',
url: 'https://daily.zhihu.com/rss',
description: '知乎精选',
),
RssSubscription(
id: 'ruanyifeng',
title: '阮一峰',
url: 'http://www.ruanyifeng.com/blog/atom.xml',
description: '科技博客',
category: RssCategory.tech,
),
RssSubscription(
id: 'v2ex',
title: 'V2EX',
url: 'https://www.v2ex.com/index.xml',
description: '创意工作者社区',
category: RssCategory.techCommunity,
),
];
/// 解析日期字符串公开方法供fromMap使用
static DateTime? parseDateTime(String? dateStr) {
if (dateStr == null || dateStr.isEmpty) return null;
return DateTime.tryParse(dateStr);
}
/// URL标准化
static String _normalizeUrl(String url) {
var normalized = url.trim();
if (!normalized.startsWith('http://') &&
!normalized.startsWith('https://')) {
normalized = 'https://$normalized';
}
return normalized;
}
/// 根据标题猜测分类
static RssCategory _guessCategory(String title) {
final lower = title.toLowerCase();
if (lower.contains('设计') || lower.contains('design')) {
return RssCategory.design;
}
if (lower.contains('财经') ||
lower.contains('金融') ||
lower.contains('finance')) {
return RssCategory.finance;
}
if (lower.contains('新闻') || lower.contains('news')) {
return RssCategory.news;
}
if (lower.contains('效率') ||
lower.contains('少数派') ||
lower.contains('sspai')) {
return RssCategory.efficiency;
}
if (lower.contains('v2ex') ||
lower.contains('社区') ||
lower.contains('forum')) {
return RssCategory.techCommunity;
}
if (lower.contains('科技') ||
lower.contains('tech') ||
lower.contains('') ||
lower.contains('范儿')) {
return RssCategory.tech;
}
return RssCategory.general;
}
// ============================================================
// OPML 导入导出
// ============================================================
/// 导出订阅源为OPML格式XML
static String exportToOpml() {
final subs = getSavedSubscriptions();
final buffer = StringBuffer();
buffer.writeln('<?xml version="1.0" encoding="UTF-8"?>');
buffer.writeln('<opml version="2.0">');
buffer.writeln(' <head>');
buffer.writeln(' <title>闲言APP RSS订阅</title>');
buffer.writeln(
' <dateCreated>${DateTime.now().toIso8601String()}</dateCreated>',
);
buffer.writeln(' </head>');
buffer.writeln(' <body>');
final grouped = <RssCategory, List<RssSubscription>>{};
for (final sub in subs) {
grouped.putIfAbsent(sub.category, () => []).add(sub);
}
for (final entry in grouped.entries) {
buffer.writeln(' <outline text="${_xmlEscape(entry.key.label)}">');
for (final sub in entry.value) {
buffer.writeln(
' <outline type="rss" '
'text="${_xmlEscape(sub.title)}" '
'title="${_xmlEscape(sub.title)}" '
'xmlUrl="${_xmlEscape(sub.url)}"'
'${sub.description != null ? ' description="${_xmlEscape(sub.description!)}"' : ''}'
' />',
);
}
buffer.writeln(' </outline>');
}
buffer.writeln(' </body>');
buffer.writeln('</opml>');
return buffer.toString();
}
/// 从OPML格式XML导入订阅源
static Future<int> importFromOpml(String opmlXml) async {
try {
final subs = <RssSubscription>[];
final outlineRegex = RegExp(
r'<outline[^>]*type="rss"[^>]*>',
caseSensitive: false,
);
final textRegex = RegExp(r'text="([^"]*)"');
final titleRegex = RegExp(r'title="([^"]*)"');
final xmlUrlRegex = RegExp(r'xmlUrl="([^"]*)"');
final descRegex = RegExp(r'description="([^"]*)"');
for (final match in outlineRegex.allMatches(opmlXml)) {
final outline = match.group(0)!;
final xmlUrl = xmlUrlRegex.firstMatch(outline)?.group(1);
if (xmlUrl == null || xmlUrl.isEmpty) continue;
final title =
titleRegex.firstMatch(outline)?.group(1) ??
textRegex.firstMatch(outline)?.group(1) ??
xmlUrl;
final description = descRegex.firstMatch(outline)?.group(1);
subs.add(
RssSubscription(
id: 'import_${DateTime.now().millisecondsSinceEpoch}_${subs.length}',
title: _xmlUnescape(title),
url: _xmlUnescape(xmlUrl),
description: _xmlUnescape(description),
category: _guessCategory(title),
addedAt: DateTime.now(),
),
);
}
final existing = getSavedSubscriptions();
final existingUrls = existing.map((s) => s.url).toSet();
var added = 0;
for (final sub in subs) {
if (!existingUrls.contains(sub.url)) {
existing.add(sub);
existingUrls.add(sub.url);
added++;
}
}
if (added > 0) {
await saveSubscriptions(existing);
}
Log.i('RssService', 'OPML导入完成: ${subs.length}个源,新增$added个');
return added;
} catch (e) {
Log.e('RssService', 'OPML导入失败: $e');
return 0;
}
}
/// XML特殊字符转义
static String _xmlEscape(String input) {
return input
.replaceAll('&', '&amp;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&apos;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;');
}
/// XML特殊字符反转义
static String _xmlUnescape(String? input) {
if (input == null) return '';
return input
.replaceAll('&gt;', '>')
.replaceAll('&lt;', '<')
.replaceAll('&apos;', "'")
.replaceAll('&quot;', '"')
.replaceAll('&amp;', '&');
}
// ============================================================
// 全文提取(阅读模式)
// ============================================================
/// 从文章URL提取全文内容阅读模式
///
/// 使用简易 Readability 算法:
/// 1. 获取网页HTML
/// 2. 移除导航/侧边栏/页脚等非正文区域
/// 3. 提取最可能是正文的区域
/// 4. 清理HTML标签返回纯文本
static Future<RssFullTextResult> fetchFullText(String url) async {
try {
final response = await _dio.get<String>(url);
final html = response.data ?? '';
if (html.isEmpty) {
return const RssFullTextResult(error: '页面内容为空');
}
final title = _extractTitle(html);
final content = _extractContent(html);
final images = _extractContentImages(html);
if (content.isEmpty) {
return RssFullTextResult(
error: '无法提取正文内容',
title: title,
);
}
return RssFullTextResult(
success: true,
title: title,
content: content,
images: images,
sourceUrl: url,
);
} catch (e) {
Log.e('RssService', '全文提取失败 [$url]: $e');
return RssFullTextResult(error: '加载失败: $e');
}
}
/// 提取页面标题
static String _extractTitle(String html) {
final ogTitle = RegExp(r'<meta[^>]*property=["\x27]og:title["\x27][^>]*content=["\x27]([^"\x27]*)["\x27]', caseSensitive: false)
.firstMatch(html);
if (ogTitle != null && ogTitle.group(1)!.isNotEmpty) {
return _decodeHtmlEntities(ogTitle.group(1)!);
}
final titleMatch = RegExp(r'<title[^>]*>(.*?)</title>', caseSensitive: false, dotAll: true)
.firstMatch(html);
if (titleMatch != null && titleMatch.group(1)!.isNotEmpty) {
return _decodeHtmlEntities(titleMatch.group(1)!.trim());
}
return '';
}
/// 提取正文内容(简易 Readability
static String _extractContent(String html) {
var cleaned = html;
// 移除脚本和样式
cleaned = cleaned.replaceAll(
RegExp(r'<script[^>]*>.*?</script>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<style[^>]*>.*?</style>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<nav[^>]*>.*?</nav>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<footer[^>]*>.*?</footer>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<header[^>]*>.*?</header>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<aside[^>]*>.*?</aside>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<noscript[^>]*>.*?</noscript>', caseSensitive: false, dotAll: true),
'',
);
// 查找 article 标签
final articleMatch = RegExp(r'<article[^>]*>(.*?)</article>', caseSensitive: false, dotAll: true)
.firstMatch(cleaned);
if (articleMatch != null) {
cleaned = articleMatch.group(1)!;
} else {
// 查找 class 含 article/content/post/entry 的 div
final contentDiv = RegExp(
r'<div[^>]*class=["\x27][^\x27]*(?:article|content|post-body|entry-content|post-content|story-body|article-body|rich-text|markdown-body)[^\x27]*["\x27][^>]*>(.*?)</div>',
caseSensitive: false,
dotAll: true,
).firstMatch(cleaned);
if (contentDiv != null) {
cleaned = contentDiv.group(1)!;
}
}
// 清理HTML标签保留段落结构
var text = cleaned;
text = text.replaceAll(RegExp(r'<br\s*/?>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'<p[^>]*>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'</p>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'<h[1-6][^>]*>', caseSensitive: false), '\n\n');
text = text.replaceAll(RegExp(r'</h[1-6]>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'<li[^>]*>', caseSensitive: false), '');
text = text.replaceAll(RegExp(r'<blockquote[^>]*>', caseSensitive: false), '\n> ');
text = text.replaceAll(RegExp(r'<[^>]*>'), '');
text = text.replaceAll(RegExp(r'&nbsp;'), ' ');
text = text.replaceAll(RegExp(r'&amp;'), '&');
text = text.replaceAll(RegExp(r'&lt;'), '<');
text = text.replaceAll(RegExp(r'&gt;'), '>');
text = text.replaceAll(RegExp(r'&quot;'), '"');
text = text.replaceAll(RegExp(r'&#\d+;'), '');
text = text.replaceAll(RegExp(r'\n{3,}'), '\n\n');
return text.trim();
}
/// 提取正文中的图片URL
static List<String> _extractContentImages(String html) {
final imgRegex = RegExp(r'<img[^>]+src\s*=\s*["\x27]([^"\x27]+)["\x27]', dotAll: true);
return imgRegex
.allMatches(html)
.map((m) => m.group(1) ?? '')
.where((url) => url.isNotEmpty && !url.endsWith('.svg') && !url.contains('avatar') && !url.contains('icon') && !url.contains('logo'))
.take(10)
.toList();
}
/// HTML实体解码
static String _decodeHtmlEntities(String text) {
return text
.replaceAll('&amp;', '&')
.replaceAll('&lt;', '<')
.replaceAll('&gt;', '>')
.replaceAll('&quot;', '"')
.replaceAll('&#39;', "'")
.replaceAll('&nbsp;', ' ');
}
}
/// 全文提取结果
class RssFullTextResult {
const RssFullTextResult({
this.success = false,
this.title,
this.content,
this.images = const [],
this.sourceUrl,
this.error,
});
final bool success;
final String? title;
final String? content;
final List<String> images;
final String? sourceUrl;
final String? error;
}