Files
xianyan/lib/features/discover/services/rss_service.dart
Developer ad00967c68 chore: 迁移依赖、移除sqlite3_flutter_libs并新增功能
1. 替换hive_flutter为hive_ce_flutter依赖
2. 从各平台插件列表移除sqlite3_flutter_libs
3. 重构API请求体格式,优化历史记录去重逻辑
4. 新增CTC笔记相关功能:桌面小部件、模板模型、本地存储
5. 新增表单收集服务和后台管理接口
6. 优化缓存配置、多语言文案和UI细节
7. 重构首页状态监听组件
2026-06-15 10:04:52 +08:00

997 lines
31 KiB
Dart
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/// ============================================================
/// 闲言APP — RSS订阅服务
/// 创建时间: 2026-05-30
/// 更新时间: 2026-06-12
/// 作用: RSS/Atom订阅源管理+文章解析+持久化存储
/// 上次更新: 新增阅读进度持久化功能(Hive box: rss_reading_progress);
/// 支持保存/获取/批量获取文章滚动偏移量
/// ============================================================
import 'dart:convert';
import 'package:dio/dio.dart';
import 'package:hive_ce_flutter/hive_flutter.dart';
import 'package:rss_dart/dart_rss.dart';
import 'package:xianyan/core/utils/logger.dart';
import 'package:xianyan/core/storage/hive_safe_access.dart';
/// RSS订阅源分类
enum RssCategory {
tech('科技'),
efficiency('效率'),
general('综合'),
techCommunity('技术社区'),
design('设计'),
finance('财经'),
news('新闻');
const RssCategory(this.label);
final String label;
}
/// RSS订阅源数据模型
class RssSubscription {
const RssSubscription({
required this.id,
required this.title,
required this.url,
this.description,
this.iconUrl,
this.category = RssCategory.general,
this.lastUpdated,
this.unreadCount = 0,
this.addedAt,
this.lastReadAt,
});
final String id;
final String title;
final String url;
final String? description;
final String? iconUrl;
final RssCategory category;
final DateTime? lastUpdated;
final int unreadCount;
final DateTime? addedAt;
/// 用户最后阅读时间
final DateTime? lastReadAt;
RssSubscription copyWith({
String? title,
String? description,
String? iconUrl,
RssCategory? category,
DateTime? lastUpdated,
int? unreadCount,
DateTime? lastReadAt,
bool clearLastReadAt = false,
}) {
return RssSubscription(
id: id,
title: title ?? this.title,
url: url,
description: description ?? this.description,
iconUrl: iconUrl ?? this.iconUrl,
category: category ?? this.category,
lastUpdated: lastUpdated ?? this.lastUpdated,
unreadCount: unreadCount ?? this.unreadCount,
addedAt: addedAt,
lastReadAt: clearLastReadAt ? null : (lastReadAt ?? this.lastReadAt),
);
}
Map<String, dynamic> toMap() => {
'id': id,
'title': title,
'url': url,
'description': description,
'iconUrl': iconUrl,
'category': category.name,
'lastUpdated': lastUpdated?.toIso8601String(),
'unreadCount': unreadCount,
'addedAt': addedAt?.toIso8601String(),
'lastReadAt': lastReadAt?.toIso8601String(),
};
factory RssSubscription.fromMap(Map<String, dynamic> map) {
return RssSubscription(
id: map['id'] as String? ?? '',
title: map['title'] as String? ?? '',
url: map['url'] as String? ?? '',
description: map['description'] as String?,
iconUrl: map['iconUrl'] as String?,
category: RssCategory.values.firstWhere(
(e) => e.name == map['category'],
orElse: () => RssCategory.general,
),
lastUpdated: RssService.parseDateTime(map['lastUpdated'] as String?),
unreadCount: (map['unreadCount'] as num?)?.toInt() ?? 0,
addedAt: RssService.parseDateTime(map['addedAt'] as String?),
lastReadAt: RssService.parseDateTime(map['lastReadAt'] as String?),
);
}
}
/// RSS条目数据模型
class RssFeedItem {
const RssFeedItem({
required this.title,
this.description,
this.content,
this.link,
this.author,
this.pubDate,
this.imageUrl,
this.sourceId,
this.sourceTitle,
this.isRead = false,
this.isBookmarked = false,
});
final String title;
final String? description;
final String? content;
final String? link;
final String? author;
final DateTime? pubDate;
final String? imageUrl;
final String? sourceId;
final String? sourceTitle;
final bool isRead;
final bool isBookmarked;
RssFeedItem copyWith({bool? isRead, bool? isBookmarked}) => RssFeedItem(
title: title,
description: description,
content: content,
link: link,
author: author,
pubDate: pubDate,
imageUrl: imageUrl,
sourceId: sourceId,
sourceTitle: sourceTitle,
isRead: isRead ?? this.isRead,
isBookmarked: isBookmarked ?? this.isBookmarked,
);
/// 生成唯一ID基于sourceId + link
String get uid => '${sourceId ?? ""}_${link?.hashCode ?? title.hashCode}';
}
/// RSS订阅服务 — 解析RSS/Atom源 + 持久化
class RssService {
RssService._();
static final Dio _dio = Dio(
BaseOptions(
connectTimeout: const Duration(seconds: 10),
receiveTimeout: const Duration(seconds: 10),
responseType: ResponseType.plain,
),
);
static const String _boxName = 'rss_subscriptions';
static const String _readArticlesBox = 'rss_read_articles';
static const String _bookmarkedArticlesBox = 'rss_bookmarked_articles';
static const String _readingProgressBox = 'rss_reading_progress';
static Box<dynamic>? _box;
static Box<dynamic>? _readBox;
static Box<dynamic>? _bookmarkBox;
static Box<dynamic>? _progressBox;
/// 初始化Hive存储通过 HiveSafeAccess 安全访问)
static Future<void> init() async {
try {
final box = await HiveSafeAccess.safeBox<dynamic>(name: _boxName);
final readBox = await HiveSafeAccess.safeBox<dynamic>(name: _readArticlesBox);
final bookmarkBox = await HiveSafeAccess.safeBox<dynamic>(name: _bookmarkedArticlesBox);
_box = box as Box<dynamic>?;
_readBox = readBox as Box<dynamic>?;
_bookmarkBox = bookmarkBox as Box<dynamic>?;
final progressBox = await HiveSafeAccess.safeBox<dynamic>(name: _readingProgressBox);
_progressBox = progressBox as Box<dynamic>?;
Log.i('RssService', 'Hive存储初始化完成 (通过HiveSafeAccess)');
} catch (e) {
Log.e('RssService', 'Hive存储初始化失败: $e');
}
}
/// 获取所有已保存的订阅源
static List<RssSubscription> getSavedSubscriptions() {
if (_box == null) return defaultSubscriptions.toList();
final saved = _box!.get('subscriptions');
if (saved == null) return defaultSubscriptions.toList();
try {
final list = jsonDecode(saved as String) as List<dynamic>;
return list
.map((e) => RssSubscription.fromMap(e as Map<String, dynamic>))
.toList();
} catch (e) {
Log.e('RssService', '读取订阅源失败: $e');
return defaultSubscriptions.toList();
}
}
/// 保存订阅源列表
static Future<void> saveSubscriptions(List<RssSubscription> subs) async {
if (_box == null) return;
try {
final encoded = jsonEncode(subs.map((e) => e.toMap()).toList());
await _box!.put('subscriptions', encoded);
} catch (e) {
Log.e('RssService', '保存订阅源失败: $e');
}
}
/// 添加订阅源
static Future<void> addSubscription(RssSubscription sub) async {
final subs = getSavedSubscriptions();
if (subs.any((s) => s.url == sub.url)) {
Log.w('RssService', '订阅源已存在: ${sub.url}');
return;
}
subs.add(sub);
await saveSubscriptions(subs);
}
/// 删除订阅源
static Future<void> removeSubscription(String id) async {
final subs = getSavedSubscriptions();
subs.removeWhere((s) => s.id == id);
await saveSubscriptions(subs);
}
/// 标记文章已读
static Future<void> markArticleRead(String articleUid) async {
if (_readBox == null) return;
await _readBox!.put(articleUid, true);
}
/// 检查文章是否已读
static bool isArticleRead(String articleUid) {
if (_readBox == null) return false;
return _readBox!.get(articleUid) == true;
}
/// 收藏文章
static Future<void> bookmarkArticle(String articleUid) async {
if (_bookmarkBox == null) return;
await _bookmarkBox!.put(articleUid, true);
}
/// 取消收藏文章
static Future<void> unbookmarkArticle(String articleUid) async {
if (_bookmarkBox == null) return;
await _bookmarkBox!.delete(articleUid);
}
/// 检查文章是否已收藏
static bool isArticleBookmarked(String articleUid) {
if (_bookmarkBox == null) return false;
return _bookmarkBox!.get(articleUid) == true;
}
/// 获取所有已收藏的文章uid集合
static Set<String> getBookmarkedUids() {
if (_bookmarkBox == null) return {};
return _bookmarkBox!.keys.toSet().cast<String>();
}
// ============================================================
// 阅读进度持久化
// ============================================================
/// 保存文章阅读进度
static Future<void> saveReadingProgress(String articleUid, double scrollOffset) async {
if (_progressBox == null) return;
await _progressBox!.put(articleUid, scrollOffset);
}
/// 获取文章阅读进度
static double getReadingProgress(String articleUid) {
if (_progressBox == null) return 0.0;
return (_progressBox!.get(articleUid) as num?)?.toDouble() ?? 0.0;
}
/// 获取所有阅读进度
static Map<String, double> getAllReadingProgress() {
if (_progressBox == null) return {};
final result = <String, double>{};
for (final key in _progressBox!.keys) {
final value = _progressBox!.get(key);
if (value != null) {
result[key.toString()] = (value as num).toDouble();
}
}
return result;
}
/// 检测订阅源网络可达性
static Future<bool> checkFeedAvailable(String url) async {
try {
final response = await _dio.head<String>(url);
return response.statusCode != null &&
response.statusCode! >= 200 &&
response.statusCode! < 400;
} catch (e) {
Log.w('RssService', '订阅源不可达 [$url]: $e');
return false;
}
}
/// 拉取并解析RSS/Atom订阅源
///
/// [limit] 返回条目数量上限默认20
/// [offset] 跳过前N条记录默认0
/// 针对知乎日报等非标准RSS源增加宽松XML解析和HTML fallback
static Future<List<RssFeedItem>> fetchFeed(
RssSubscription sub, {
int limit = 20,
int offset = 0,
}) async {
try {
final response = await _dio.get<String>(sub.url);
final xml = response.data ?? '';
List<RssFeedItem> items = [];
// ---- 标准RSS解析 ----
if (xml.contains('<rss') || xml.contains('<channel')) {
try {
final feed = RssFeed.parse(xml);
items = feed.items
.map(
(item) => RssFeedItem(
title: item.title ?? '无标题',
description: item.description ?? item.content?.value,
content: item.content?.value ?? item.description,
link: item.link,
author: item.author ?? item.dc?.creator,
pubDate: parseDateTime(item.pubDate),
imageUrl:
item.enclosure?.url ??
item.media?.thumbnails.firstOrNull?.url,
sourceId: sub.id,
sourceTitle: sub.title,
isRead: isArticleRead(
'${sub.id}_${item.link?.hashCode ?? item.title.hashCode}',
),
isBookmarked: isArticleBookmarked(
'${sub.id}_${item.link?.hashCode ?? item.title.hashCode}',
),
),
)
.toList();
} catch (e) {
Log.w('RssService', '标准RSS解析失败尝试宽松解析 [${sub.url}]: $e');
}
// ---- 知乎日报等非标准RSS标准解析后items为空时尝试正则提取<item>块 ----
if (items.isEmpty && xml.contains('<item')) {
items = _parseRssItemsLoose(xml, sub);
}
} else if (xml.contains('<feed')) {
// ---- 标准Atom解析 ----
try {
final feed = AtomFeed.parse(xml);
items = feed.items
.map(
(entry) => RssFeedItem(
title: entry.title ?? '无标题',
description: entry.summary ?? entry.content,
content: entry.content ?? entry.summary,
link: entry.links.firstOrNull?.href,
author: entry.authors.firstOrNull?.name,
pubDate: parseDateTime(entry.published ?? entry.updated),
imageUrl: entry.media?.thumbnails.firstOrNull?.url,
sourceId: sub.id,
sourceTitle: sub.title,
isRead: isArticleRead(
'${sub.id}_${entry.links.firstOrNull?.href.hashCode ?? entry.title.hashCode}',
),
isBookmarked: isArticleBookmarked(
'${sub.id}_${entry.links.firstOrNull?.href.hashCode ?? entry.title.hashCode}',
),
),
)
.toList();
} catch (e) {
Log.w('RssService', '标准Atom解析失败 [${sub.url}]: $e');
}
}
// ---- HTML fallback如果RSS/Atom解析结果为空尝试从HTML中提取内容 ----
if (items.isEmpty && xml.contains('<html')) {
items = _parseHtmlFallback(xml, sub);
}
// ---- 应用分页offset + limit ----
if (offset > 0 || limit < items.length) {
final start = offset.clamp(0, items.length);
final end = (offset + limit).clamp(0, items.length);
items = items.sublist(start, end);
}
return items;
} catch (e) {
Log.e('RssService', 'RSS解析失败 [${sub.url}]: $e');
return [];
}
}
/// 宽松RSS <item>块正则解析用于知乎日报等非标准RSS源
static List<RssFeedItem> _parseRssItemsLoose(
String xml,
RssSubscription sub,
) {
final items = <RssFeedItem>[];
final itemRegex = RegExp(r'<item[^>]*>(.*?)</item>', dotAll: true);
final titleRegex = RegExp(r'<title[^>]*>(.*?)</title>', dotAll: true);
final linkRegex = RegExp(r'<link[^>]*>(.*?)</link>', dotAll: true);
final descRegex = RegExp(
r'<description[^>]*>(.*?)</description>',
dotAll: true,
);
final pubDateRegex = RegExp(
r'<pubDate[^>]*>(.*?)</pubDate>',
dotAll: true,
);
final authorRegex = RegExp(
r'<author[^>]*>(.*?)</author>',
dotAll: true,
);
final dcCreatorRegex = RegExp(
r'<dc:creator[^>]*>(.*?)</dc:creator>',
dotAll: true,
);
for (final match in itemRegex.allMatches(xml)) {
final itemXml = match.group(1) ?? '';
final title = _extractCdataOrText(
titleRegex.firstMatch(itemXml)?.group(1),
);
final link = _extractCdataOrText(
linkRegex.firstMatch(itemXml)?.group(1),
)?.trim();
final desc = _extractCdataOrText(
descRegex.firstMatch(itemXml)?.group(1),
);
final pubDate = _extractCdataOrText(
pubDateRegex.firstMatch(itemXml)?.group(1),
);
final author = _extractCdataOrText(
authorRegex.firstMatch(itemXml)?.group(1),
) ??
_extractCdataOrText(
dcCreatorRegex.firstMatch(itemXml)?.group(1),
);
if ((title == null || title.isEmpty) && link == null) continue;
final uid = '${sub.id}_${link?.hashCode ?? title.hashCode}';
items.add(
RssFeedItem(
title: (title == null || title.isEmpty) ? '无标题' : title,
description: desc,
content: desc,
link: link,
author: author,
pubDate: parseDateTime(pubDate),
sourceId: sub.id,
sourceTitle: sub.title,
isRead: isArticleRead(uid),
isBookmarked: isArticleBookmarked(uid),
),
);
}
Log.i('RssService', '宽松RSS解析提取到${items.length}条 [${sub.url}]');
return items;
}
/// HTML fallback解析当RSS/Atom解析结果为空时从HTML中提取内容
static List<RssFeedItem> _parseHtmlFallback(
String html,
RssSubscription sub,
) {
final items = <RssFeedItem>[];
// 尝试从HTML中提取文章列表常见结构h2/h3 + a + p
final articleRegex = RegExp(
r'<article[^>]*>(.*?)</article>',
caseSensitive: false,
dotAll: true,
);
final linkRegex = RegExp(
r'<a[^>]+href=["\x27]([^"\x27]+)["\x27][^>]*>(.*?)</a>',
dotAll: true,
);
final titleRegex = RegExp(
r'<h[1-6][^>]*>(.*?)</h[1-6]>',
dotAll: true,
);
for (final match in articleRegex.allMatches(html)) {
final articleHtml = match.group(1) ?? '';
// 提取标题
String? title;
final titleMatch = titleRegex.firstMatch(articleHtml);
if (titleMatch != null) {
title = _stripHtmlTags(titleMatch.group(1) ?? '').trim();
}
// 提取链接
String? link;
final linkMatch = linkRegex.firstMatch(articleHtml);
if (linkMatch != null) {
link = linkMatch.group(1)?.trim();
}
if (title == null || title.isEmpty) continue;
final uid = '${sub.id}_${link?.hashCode ?? title.hashCode}';
items.add(
RssFeedItem(
title: title,
link: link,
sourceId: sub.id,
sourceTitle: sub.title,
isRead: isArticleRead(uid),
isBookmarked: isArticleBookmarked(uid),
),
);
}
Log.i('RssService', 'HTML fallback解析提取到${items.length}条 [${sub.url}]');
return items;
}
/// 提取CDATA内容或纯文本
static String? _extractCdataOrText(String? raw) {
if (raw == null) return null;
// 去除CDATA包裹
final cdataRegex = RegExp(r'<!\[CDATA\[(.*?)\]\]>', dotAll: true);
final cdataMatch = cdataRegex.firstMatch(raw);
if (cdataMatch != null) {
return cdataMatch.group(1)?.trim() ?? '';
}
return raw.trim();
}
/// 去除HTML标签
static String _stripHtmlTags(String html) {
return html.replaceAll(RegExp(r'<[^>]*>'), '').trim();
}
/// 发现RSS源输入URL自动解析
static Future<RssSubscription?> discoverFeed(String url) async {
try {
final normalizedUrl = _normalizeUrl(url);
final response = await _dio.get<String>(normalizedUrl);
final xml = response.data ?? '';
if (xml.contains('<rss') || xml.contains('<channel')) {
final feed = RssFeed.parse(xml);
return RssSubscription(
id: 'rss_${DateTime.now().millisecondsSinceEpoch}',
title: feed.title ?? normalizedUrl,
url: normalizedUrl,
description: feed.description,
iconUrl: feed.image?.url,
category: _guessCategory(feed.title ?? ''),
addedAt: DateTime.now(),
);
} else if (xml.contains('<feed')) {
final feed = AtomFeed.parse(xml);
return RssSubscription(
id: 'rss_${DateTime.now().millisecondsSinceEpoch}',
title: feed.title ?? normalizedUrl,
url: normalizedUrl,
description: feed.subtitle,
iconUrl: feed.icon,
category: _guessCategory(feed.title ?? ''),
addedAt: DateTime.now(),
);
}
return null;
} catch (e) {
Log.e('RssService', '发现RSS源失败: $e');
return null;
}
}
/// 批量拉取所有订阅源
static Future<Map<String, List<RssFeedItem>>> fetchAllFeeds(
List<RssSubscription> subs,
) async {
final results = <String, List<RssFeedItem>>{};
await Future.wait(
subs.map((sub) async {
final items = await fetchFeed(sub);
results[sub.id] = items;
}),
);
return results;
}
/// 默认推荐订阅源
static const List<RssSubscription> defaultSubscriptions = [
RssSubscription(
id: '36kr',
title: '36氪',
url: 'https://36kr.com/feed',
description: '科技创业资讯',
category: RssCategory.tech,
),
RssSubscription(
id: 'sspai',
title: '少数派',
url: 'https://sspai.com/feed',
description: '高效工作和品质生活',
category: RssCategory.efficiency,
),
RssSubscription(
id: 'zhihu_daily',
title: '知乎日报',
url: 'https://daily.zhihu.com/rss',
description: '知乎精选',
),
RssSubscription(
id: 'v2ex',
title: 'V2EX',
url: 'https://www.v2ex.com/index.xml',
description: '创意工作者社区',
category: RssCategory.techCommunity,
),
];
/// 解析日期字符串公开方法供fromMap使用
static DateTime? parseDateTime(String? dateStr) {
if (dateStr == null || dateStr.isEmpty) return null;
return DateTime.tryParse(dateStr);
}
/// URL标准化
static String _normalizeUrl(String url) {
var normalized = url.trim();
if (!normalized.startsWith('http://') &&
!normalized.startsWith('https://')) {
normalized = 'https://$normalized';
}
return normalized;
}
/// 根据标题猜测分类
static RssCategory _guessCategory(String title) {
final lower = title.toLowerCase();
if (lower.contains('设计') || lower.contains('design')) {
return RssCategory.design;
}
if (lower.contains('财经') ||
lower.contains('金融') ||
lower.contains('finance')) {
return RssCategory.finance;
}
if (lower.contains('新闻') || lower.contains('news')) {
return RssCategory.news;
}
if (lower.contains('效率') ||
lower.contains('少数派') ||
lower.contains('sspai')) {
return RssCategory.efficiency;
}
if (lower.contains('v2ex') ||
lower.contains('社区') ||
lower.contains('forum')) {
return RssCategory.techCommunity;
}
if (lower.contains('科技') ||
lower.contains('tech') ||
lower.contains('') ||
lower.contains('范儿')) {
return RssCategory.tech;
}
return RssCategory.general;
}
// ============================================================
// OPML 导入导出
// ============================================================
/// 导出订阅源为OPML格式XML
static String exportToOpml() {
final subs = getSavedSubscriptions();
final buffer = StringBuffer();
buffer.writeln('<?xml version="1.0" encoding="UTF-8"?>');
buffer.writeln('<opml version="2.0">');
buffer.writeln(' <head>');
buffer.writeln(' <title>闲言APP RSS订阅</title>');
buffer.writeln(
' <dateCreated>${DateTime.now().toIso8601String()}</dateCreated>',
);
buffer.writeln(' </head>');
buffer.writeln(' <body>');
final grouped = <RssCategory, List<RssSubscription>>{};
for (final sub in subs) {
grouped.putIfAbsent(sub.category, () => []).add(sub);
}
for (final entry in grouped.entries) {
buffer.writeln(' <outline text="${_xmlEscape(entry.key.label)}">');
for (final sub in entry.value) {
buffer.writeln(
' <outline type="rss" '
'text="${_xmlEscape(sub.title)}" '
'title="${_xmlEscape(sub.title)}" '
'xmlUrl="${_xmlEscape(sub.url)}"'
'${sub.description != null ? ' description="${_xmlEscape(sub.description!)}"' : ''}'
' />',
);
}
buffer.writeln(' </outline>');
}
buffer.writeln(' </body>');
buffer.writeln('</opml>');
return buffer.toString();
}
/// 从OPML格式XML导入订阅源
static Future<int> importFromOpml(String opmlXml) async {
try {
final subs = <RssSubscription>[];
final outlineRegex = RegExp(
r'<outline[^>]*type="rss"[^>]*>',
caseSensitive: false,
);
final textRegex = RegExp(r'text="([^"]*)"');
final titleRegex = RegExp(r'title="([^"]*)"');
final xmlUrlRegex = RegExp(r'xmlUrl="([^"]*)"');
final descRegex = RegExp(r'description="([^"]*)"');
for (final match in outlineRegex.allMatches(opmlXml)) {
final outline = match.group(0)!;
final xmlUrl = xmlUrlRegex.firstMatch(outline)?.group(1);
if (xmlUrl == null || xmlUrl.isEmpty) continue;
final title =
titleRegex.firstMatch(outline)?.group(1) ??
textRegex.firstMatch(outline)?.group(1) ??
xmlUrl;
final description = descRegex.firstMatch(outline)?.group(1);
subs.add(
RssSubscription(
id: 'import_${DateTime.now().millisecondsSinceEpoch}_${subs.length}',
title: _xmlUnescape(title),
url: _xmlUnescape(xmlUrl),
description: _xmlUnescape(description),
category: _guessCategory(title),
addedAt: DateTime.now(),
),
);
}
final existing = getSavedSubscriptions();
final existingUrls = existing.map((s) => s.url).toSet();
var added = 0;
for (final sub in subs) {
if (!existingUrls.contains(sub.url)) {
existing.add(sub);
existingUrls.add(sub.url);
added++;
}
}
if (added > 0) {
await saveSubscriptions(existing);
}
Log.i('RssService', 'OPML导入完成: ${subs.length}个源,新增$added个');
return added;
} catch (e) {
Log.e('RssService', 'OPML导入失败: $e');
return 0;
}
}
/// XML特殊字符转义
static String _xmlEscape(String input) {
return input
.replaceAll('&', '&amp;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&apos;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;');
}
/// XML特殊字符反转义
static String _xmlUnescape(String? input) {
if (input == null) return '';
return input
.replaceAll('&gt;', '>')
.replaceAll('&lt;', '<')
.replaceAll('&apos;', "'")
.replaceAll('&quot;', '"')
.replaceAll('&amp;', '&');
}
// ============================================================
// 全文提取(阅读模式)
// ============================================================
/// 从文章URL提取全文内容阅读模式
///
/// 使用简易 Readability 算法:
/// 1. 获取网页HTML
/// 2. 移除导航/侧边栏/页脚等非正文区域
/// 3. 提取最可能是正文的区域
/// 4. 清理HTML标签返回纯文本
static Future<RssFullTextResult> fetchFullText(String url) async {
try {
final response = await _dio.get<String>(url);
final html = response.data ?? '';
if (html.isEmpty) {
return const RssFullTextResult(error: '页面内容为空');
}
final title = _extractTitle(html);
final content = _extractContent(html);
final images = _extractContentImages(html);
if (content.isEmpty) {
return RssFullTextResult(
error: '无法提取正文内容',
title: title,
);
}
return RssFullTextResult(
success: true,
title: title,
content: content,
images: images,
sourceUrl: url,
);
} catch (e) {
Log.e('RssService', '全文提取失败 [$url]: $e');
return RssFullTextResult(error: '加载失败: $e');
}
}
/// 提取页面标题
static String _extractTitle(String html) {
final ogTitle = RegExp(r'<meta[^>]*property=["\x27]og:title["\x27][^>]*content=["\x27]([^"\x27]*)["\x27]', caseSensitive: false)
.firstMatch(html);
if (ogTitle != null && ogTitle.group(1)!.isNotEmpty) {
return _decodeHtmlEntities(ogTitle.group(1)!);
}
final titleMatch = RegExp(r'<title[^>]*>(.*?)</title>', caseSensitive: false, dotAll: true)
.firstMatch(html);
if (titleMatch != null && titleMatch.group(1)!.isNotEmpty) {
return _decodeHtmlEntities(titleMatch.group(1)!.trim());
}
return '';
}
/// 提取正文内容(简易 Readability
static String _extractContent(String html) {
var cleaned = html;
// 移除脚本和样式
cleaned = cleaned.replaceAll(
RegExp(r'<script[^>]*>.*?</script>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<style[^>]*>.*?</style>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<nav[^>]*>.*?</nav>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<footer[^>]*>.*?</footer>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<header[^>]*>.*?</header>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<aside[^>]*>.*?</aside>', caseSensitive: false, dotAll: true),
'',
);
cleaned = cleaned.replaceAll(
RegExp(r'<noscript[^>]*>.*?</noscript>', caseSensitive: false, dotAll: true),
'',
);
// 查找 article 标签
final articleMatch = RegExp(r'<article[^>]*>(.*?)</article>', caseSensitive: false, dotAll: true)
.firstMatch(cleaned);
if (articleMatch != null) {
cleaned = articleMatch.group(1)!;
} else {
// 查找 class 含 article/content/post/entry 的 div
final contentDiv = RegExp(
r'<div[^>]*class=["\x27][^\x27]*(?:article|content|post-body|entry-content|post-content|story-body|article-body|rich-text|markdown-body)[^\x27]*["\x27][^>]*>(.*?)</div>',
caseSensitive: false,
dotAll: true,
).firstMatch(cleaned);
if (contentDiv != null) {
cleaned = contentDiv.group(1)!;
}
}
// 清理HTML标签保留段落结构
var text = cleaned;
text = text.replaceAll(RegExp(r'<br\s*/?>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'<p[^>]*>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'</p>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'<h[1-6][^>]*>', caseSensitive: false), '\n\n');
text = text.replaceAll(RegExp(r'</h[1-6]>', caseSensitive: false), '\n');
text = text.replaceAll(RegExp(r'<li[^>]*>', caseSensitive: false), '');
text = text.replaceAll(RegExp(r'<blockquote[^>]*>', caseSensitive: false), '\n> ');
text = text.replaceAll(RegExp(r'<[^>]*>'), '');
text = text.replaceAll(RegExp(r'&nbsp;'), ' ');
text = text.replaceAll(RegExp(r'&amp;'), '&');
text = text.replaceAll(RegExp(r'&lt;'), '<');
text = text.replaceAll(RegExp(r'&gt;'), '>');
text = text.replaceAll(RegExp(r'&quot;'), '"');
text = text.replaceAll(RegExp(r'&#\d+;'), '');
text = text.replaceAll(RegExp(r'\n{3,}'), '\n\n');
return text.trim();
}
/// 提取正文中的图片URL
static List<String> _extractContentImages(String html) {
final imgRegex = RegExp(r'<img[^>]+src\s*=\s*["\x27]([^"\x27]+)["\x27]', dotAll: true);
return imgRegex
.allMatches(html)
.map((m) => m.group(1) ?? '')
.where((url) => url.isNotEmpty && !url.endsWith('.svg') && !url.contains('avatar') && !url.contains('icon') && !url.contains('logo'))
.take(10)
.toList();
}
/// HTML实体解码
static String _decodeHtmlEntities(String text) {
return text
.replaceAll('&amp;', '&')
.replaceAll('&lt;', '<')
.replaceAll('&gt;', '>')
.replaceAll('&quot;', '"')
.replaceAll('&#39;', "'")
.replaceAll('&nbsp;', ' ');
}
}
/// 全文提取结果
class RssFullTextResult {
const RssFullTextResult({
this.success = false,
this.title,
this.content,
this.images = const [],
this.sourceUrl,
this.error,
});
final bool success;
final String? title;
final String? content;
final List<String> images;
final String? sourceUrl;
final String? error;
}