// 2026-04-25 | test_pdf_garbled_chars.dart | PDF乱码字符诊断脚本 // 2026-04-25 | 创建: 验证 _cleanPdfText 对各种Unicode字符的过滤效果 void main() { print('╔══════════════════════════════════════════════════════════════╗'); print('║ PDF 乱码字符诊断工具 v1.0 ║'); print('║ 测试 _cleanPdfText 过滤效果 ║'); print('╚══════════════════════════════════════════════════════════════╝\n'); // ========== 测试用例 ========== final testCases = <_TestCase>[ // 基础测试 - 正常文本应该保留 _TestCase(name: '✅ 正常中文', input: '这道菜很好吃,营养丰富', expected: '保留'), _TestCase(name: '✅ 中英文混合', input: 'Hello世界,美味佳肴123', expected: '保留'), _TestCase(name: '✅ 纯英文', input: 'Delicious food recipe', expected: '保留'), // 乱码测试 - 应该被过滤 _TestCase(name: '❌ 菱形方块 (U+25FF)', input: '▯▯▯▯▯▯▯▯▯▯▯▯', expected: '过滤'), _TestCase(name: '❌ 交叉形状 (U+2716)', input: '✖✖✖✖✖✖✖✖✖', expected: '过滤'), _TestCase( name: '❌ 私用区字符 (U+E000-U+F8FF)', input: '\uE000\uE001\uE002\uE003\uE004\uE005', expected: '过滤', ), _TestCase( name: '❌ 变体选择器 (U+FE00-U+FE0F)', input: 'A\uFE00B\uFE01C\uFE02', expected: '过滤或部分保留', ), _TestCase( name: '❌ 控制字符 (U+00-U+1F)', input: '\x00\x01\x02\x03\x04\x05', expected: '过滤', ), _TestCase(name: '❌ Unicode替换字符 (U+FFFD)', input: '�����', expected: '过滤'), // 边界情况 _TestCase(name: '⚠️ 混合内容 (正常+乱码)', input: '很好吃▯▯▯营养▯▯丰富', expected: '部分保留'), _TestCase( name: '⚠️ 高比例乱码 (>40%)', input: '好吃▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯▯', expected: '过滤(>40%阈值)', ), _TestCase( name: '⚠️ 低比例乱码 (<40%)', input: '这道菜真的很好吃,营养丰富味道鲜美▯▯', expected: '保留(<40%阈值)', ), // 特殊Unicode区块 _TestCase( name: '🔣 制表符/边框 (U+2500-U+257F)', input: '┌┐└┘├┤┬┴┼─│', expected: '过滤', ), _TestCase( name: '🔣 方块元素 (U+25A0-U+25FF)', input: '■□▢▣▤▥▦▧▨▩', expected: '过滤', ), _TestCase( name: '🔣 几何形状 (U+25A0-U+25FF)', input: '▲▼◆◇○●◐◑◒◓', expected: '过滤', ), _TestCase( name: '🔣 箭头符号 (U+2190-U+21FF)', input: '→←↑↓↔⇒⇐⇑⇓', expected: '过滤', ), _TestCase( name: '🔣 数学运算符 (U+2200-U+22FF)', input: '±×÷≈≠≤≥∞√', expected: '过滤', ), _TestCase( name: '🔣 装饰符号 (U+2700-U+27BF)', input: '✓✔✗✘★☆♠♣♥♦', expected: '过滤', ), _TestCase( name: '🔣 Dingbats (U+2700-U+27BF)', input: '❤❥❦❧❝❞❟❰❱', expected: '过滤', ), // 实际场景模拟 _TestCase( name: '🎯 场景1: displayIntro含PUA', input: '美味家常菜\uE000\uE001\uE002\uE003', expected: '保留"美味家常菜"', ), _TestCase( name: '🎯 场景2: 全是乱码', input: '\uE000\uE001\uE002\uE003\uE004\uE005\uE006\uE007\uE008\uE009', expected: '空字符串', ), _TestCase( name: '🎯 场景3: 含控制字符', input: '好吃的菜\x01\x02\x03\x04\x05', expected: '保留"好吃的菜"', ), _TestCase( name: '🎯 场景4: CJK扩展区汉字', input: '\u3400\u3401\u3402\u4E00\u4E01', // CJK扩展A + 统一汉字 expected: '保留(CJK扩展区)', ), ]; // ========== 执行测试 ========== int passed = 0; int failed = 0; for (var i = 0; i < testCases.length; i++) { final tc = testCases[i]; print('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'); print("测试 ${i + 1}/${testCases.length}: ${tc.name}"); print('输入: "${tc.input}"'); // 打印每个字符的详细信息 print('字符分析:'); for (var j = 0; j < tc.input.length; j++) { final cp = tc.input.codeUnitAt(j); final char = tc.input[j]; final cpHex = 'U+${cp.toRadixString(16).toUpperCase().padLeft(4, '0')}'; String category; if (_isCjk(cp)) { category = 'CJK汉字'; } else if (_isAsciiLetter(cp)) { category = 'ASCII字母'; } else if (_isDigit(cp)) { category = '数字'; } else if (_isSpace(cp)) { category = '空白'; } else if (_isPunctuation(cp)) { category = '标点'; } else if (_shouldFilterChar(cp, char)) { category = '❌ 将被过滤'; } else { category = '⚠️ 未分类'; } print(' [$j] "$char" $cpHex → $category'); } // 执行清理 final result = _cleanPdfText(tc.input); print('输出: "${result ?? "null"}"'); print('长度: ${result?.length ?? 0}'); // 判断是否通过 bool testPassed = false; if (tc.expected.contains('保留') && result != null && result.isNotEmpty) { testPassed = true; } else if (tc.expected.contains('过滤') && (result == null || result.isEmpty)) { testPassed = true; } else if (tc.expected.contains('空字符串') && (result == null || result.isEmpty)) { testPassed = true; } if (testPassed) { print('结果: ✅ 通过 (预期: ${tc.expected})'); passed++; } else { print('结果: ❌ 失败 (预期: ${tc.expected})'); failed++; } print(''); } // ========== 汇总 ========== print('╔══════════════════════════════════════════════════════════════╗'); print('║ 测试汇总 ║'); print('╠══════════════════════════════════════════════════════════════╣'); print( '║ 总测试数: ${testCases.length.toString().padLeft(3)} ║', ); print( '║ ✅ 通过: ${passed.toString().padLeft(3)} ║', ); print( '║ ❌ 失败: ${failed.toString().padLeft(3)} ║', ); print( '║ 通过率: ${(passed / testCases.length * 100).toStringAsFixed(1).padLeft(5)}% ║', ); print('╚══════════════════════════════════════════════════════════════╝'); if (failed > 0) { print('\n⚠️ 有 $failed 个测试失败,请检查过滤逻辑!'); } else { print('\n🎉 所有测试通过!_cleanPdfText 工作正常。'); } } // ========== 核心方法 (从 recipe_export_button.dart 复制) ========== String _cleanPdfText(String text) { if (text.isEmpty) return ''; var cleaned = StringBuffer(); for (var i = 0; i < text.length; i++) { final codeUnit = text.codeUnitAt(i); final char = text[i]; if (_shouldFilterChar(codeUnit, char)) continue; cleaned.write(char); } var result = cleaned.toString().trim(); if (result.isEmpty) return ''; if (_isGarbledText(result)) return ''; return result; } bool _shouldFilterChar(int codeUnit, String char) { if (codeUnit < 0x20 && codeUnit != 0x09 && codeUnit != 0x0A && codeUnit != 0x0D) { return true; } if (codeUnit == 0x7F) return true; if (codeUnit >= 0x80 && codeUnit <= 0x9F) return true; if ((codeUnit & 0xFFFE) == 0xFFFE || (codeUnit & 0xFFFE) == 0xFFFF) return true; if (codeUnit == 0xFFFD) return true; if (codeUnit >= 0xFDD0 && codeUnit <= 0xFDEF) return true; if (codeUnit >= 0xE000 && codeUnit <= 0xF8FF) return true; if (codeUnit >= 0xFFF0 && codeUnit <= 0xFFFB) return true; if (codeUnit >= 0xFE00 && codeUnit <= 0xFE0F) return true; if (_isSpecialSymbol(char)) return true; return false; } bool _isSpecialSymbol(String char) { const rawSymbols = '▯□■◯○●◇◆▪▫◻◼◽◾▱░▒▓█▄▌▐▀▸▂▁▃▅▆▇▉▊▋▎▏▕▖▗▘▙▚▛▜▝▞▟╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿┌┐└┘├┤┬┴┼─│┈┉┊┋━┃┅┆┇┍┎┏┐┑▒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋☐☑☒✓✔✗✘→←↑↓↔⇒⇐⇑⇓⇔⇕⇖⇗⇘⇙♠♣♥♦★☆▲▼◐◑◒◓◔◕◖◗❤❥❦❧❝❞❟❰❱❲❳❴❵❶❷❸❹❺❻❼❽❾❿➔➘➙➚➛➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➱➲➳➴➵➶➷➸➹➺➻➼➽➾➿⟦⟧⟨⟩⟪⟫⟬⟭⟮⟯⬅⬆⬇⬈⬉⬊⬋⬌⬍⬎⬏⬐⬑⬒⬓⬘⬙⬚⬛⬜⬝⬞⬟⬠⬡⭢⭣⭤⭥⭦⭧⭨⭩⭪⭫⭬⭭⭮⭯⭐⭕⭘⭙⭚⭛⭜⭝⭞⭟⭠⭡⭢⭣⭤⭥'; return rawSymbols.contains(char); } bool _isGarbledText(String text) { if (text.length < 2) return false; int specialCount = 0; for (int i = 0; i < text.length; i++) { final cp = text.codeUnitAt(i); final isCjk = (cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF) || (cp >= 0x20000 && cp <= 0x2A6DF) || (cp >= 0x2A700 && cp <= 0x2B73F) || (cp >= 0x2B740 && cp <= 0x2B81F) || (cp >= 0x2B820 && cp <= 0x2CEAF) || (cp >= 0xF900 && cp <= 0xFAFF) || (cp >= 0x2F800 && cp <= 0x2FA1F); final isAsciiLetter = (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A); final isDigit = cp >= 0x30 && cp <= 0x39; final isSpace = cp == 0x20 || cp == 0x09 || cp == 0x0A || cp == 0x0D; final isPunctuation = (cp >= 0x2000 && cp <= 0x206F) || (cp >= 0x3000 && cp <= 0x303F) || (cp >= 0xFF00 && cp <= 0xFFEF) || cp == 0x2E || cp == 0x2C || cp == 0x3B || cp == 0x3A || cp == 0x21 || cp == 0x3F || cp == 0x28 || cp == 0x29 || cp == 0x5B || cp == 0x5D || cp == 0x7B || cp == 0x7D || cp == 0x201C || cp == 0x201D || cp == 0x2018 || cp == 0x2019; if (!isCjk && !isAsciiLetter && !isDigit && !isSpace && !isPunctuation) { specialCount++; } } final ratio = specialCount / text.length; return ratio > 0.4; } // ========== 辅助判断方法 ========== bool _isCjk(int cp) { return (cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF) || (cp >= 0x20000 && cp <= 0x2A6DF) || (cp >= 0x2A700 && cp <= 0x2B73F) || (cp >= 0x2B740 && cp <= 0x2B81F) || (cp >= 0x2B820 && cp <= 0x2CEAF) || (cp >= 0xF900 && cp <= 0xFAFF) || (cp >= 0x2F800 && cp <= 0x2FA1F); } bool _isAsciiLetter(int cp) { return (cp >= 0x41 && cp <= 0x5A) || (cp >= 0x61 && cp <= 0x7A); } bool _isDigit(int cp) { return cp >= 0x30 && cp <= 0x39; } bool _isSpace(int cp) { return cp == 0x20 || cp == 0x09 || cp == 0x0A || cp == 0x0D; } bool _isPunctuation(int cp) { return (cp >= 0x2000 && cp <= 0x206F) || (cp >= 0x3000 && cp <= 0x303F) || (cp >= 0xFF00 && cp <= 0xFFEF) || cp == 0x2E || cp == 0x2C || cp == 0x3B || cp == 0x3A || cp == 0x21 || cp == 0x3F || cp == 0x28 || cp == 0x29 || cp == 0x5B || cp == 0x5D || cp == 0x7B || cp == 0x7D || cp == 0x201C || cp == 0x201D || cp == 0x2018 || cp == 0x2019; } // ========== 测试用例数据类 ========== class _TestCase { final String name; final String input; final String expected; _TestCase({required this.name, required this.input, required this.expected}); }