Files
xianyan/docs/toolsapi/application/api/controller/Check.php
Developer a4b7105999 feat: 新增API响应模型、缓存配置和状态管理
refactor: 优化网络请求和错误处理

fix: 修复颜色引用和UI细节问题

docs: 更新API文档和设计规范

chore: 清理无用文件和脚本

perf: 优化图片导出和压缩逻辑

build: 更新依赖和构建配置

style: 调整代码格式和注释

test: 添加接口验证脚本

ci: 更新CI配置和脚本
2026-04-29 01:39:48 +08:00

812 lines
28 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace app\api\controller;
use app\common\controller\Api;
use think\Db;
use think\Cache;
/**
* @name 查重接口
* @author AI Coder
* @date 2026-04-28
* @desc 提供文本查重功能,支持精确/模糊/相似度查重
* @update v1.0 初始版本支持21种数据源查重
*/
class Check extends Api
{
protected $noNeedLogin = ['*'];
protected $noNeedRight = ['*'];
private static $checkMap = [
'poetry' => [
'table' => 'poetry',
'name' => '古诗词',
'icon' => '📜',
'fields' => ['name', 'content', 'author'],
'title_field' => 'name',
'content_field' => 'content',
'extra_fields' => ['author', 'dynasty', 'tag'],
'status_field' => 'switch',
'status_value' => 1,
],
'story' => [
'table' => 'story',
'name' => '故事大全',
'icon' => '📖',
'fields' => ['title', 'content'],
'title_field' => 'title',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'composition' => [
'table' => 'composition',
'name' => '作文大全',
'icon' => '✍️',
'fields' => ['title', 'content'],
'title_field' => 'title',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'lyric' => [
'table' => 'lyric',
'name' => '歌词大全',
'icon' => '🎵',
'fields' => ['title', 'content', 'singer'],
'title_field' => 'title',
'content_field' => 'content',
'extra_fields' => ['singer'],
'status_field' => 'switch',
'status_value' => 1,
],
'wisdom' => [
'table' => 'wisdom',
'name' => '名人名言',
'icon' => '💡',
'fields' => ['content', 'name'],
'title_field' => 'name',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'status',
'status_value' => 0,
],
'saying' => [
'table' => 'saying',
'name' => '谚语',
'icon' => '🗣️',
'fields' => ['saying', 'content'],
'title_field' => 'saying',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'why' => [
'table' => 'why',
'name' => '十万个为什么',
'icon' => '❓',
'fields' => ['title', 'content'],
'title_field' => 'title',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'status',
'status_value' => 0,
],
'cs' => [
'table' => 'cs',
'name' => '生活常识',
'icon' => '🏠',
'fields' => ['title', 'content'],
'title_field' => 'title',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'hitokoto' => [
'table' => 'hitokoto',
'name' => '一言句子',
'icon' => '💬',
'fields' => ['hitokoto'],
'title_field' => '',
'content_field' => 'hitokoto',
'extra_fields' => ['type_name', 'from_source', 'from_who'],
'status_field' => 'switch',
'status_value' => 1,
],
'chengyu' => [
'table' => 'cy',
'name' => '成语大全',
'icon' => '🔤',
'fields' => ['cy', 'cyjs'],
'title_field' => 'cy',
'content_field' => 'cyjs',
'extra_fields' => ['cypy', 'cycc'],
'status_field' => 'switch',
'status_value' => 1,
],
'cidian' => [
'table' => 'zc',
'name' => '词典',
'icon' => '📚',
'fields' => ['zc', 'zcjs'],
'title_field' => 'zc',
'content_field' => 'zcjs',
'extra_fields' => ['zcpy'],
'status_field' => 'switch',
'status_value' => 1,
],
'drug' => [
'table' => 'drug',
'name' => '药品查询',
'icon' => '💊',
'fields' => ['name', 'syz'],
'title_field' => 'name',
'content_field' => 'syz',
'extra_fields' => ['goods_name', 'gg', 'cf'],
'status_field' => 'switch',
'status_value' => 1,
],
'herbal' => [
'table' => 'herbal',
'name' => '中药材',
'icon' => '🌿',
'fields' => ['name', 'effect'],
'title_field' => 'name',
'content_field' => 'effect',
'extra_fields' => ['name_alias', 'spell'],
'status_field' => 'switch',
'status_value' => 1,
],
'prescription' => [
'table' => 'prescription',
'name' => '民间偏方',
'icon' => '🧪',
'fields' => ['title', 'content'],
'title_field' => 'title',
'content_field' => 'content',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'tisana' => [
'table' => 'tisana',
'name' => '药茶大全',
'icon' => '🍵',
'fields' => ['name', 'effect', 'recipe'],
'title_field' => 'name',
'content_field' => 'effect',
'extra_fields' => ['recipe', 'source'],
'status_field' => 'switch',
'status_value' => 1,
],
'food' => [
'table' => 'food',
'name' => '食物相克',
'icon' => '🍽️',
'fields' => ['sw', 'yh'],
'title_field' => 'sw',
'content_field' => 'yh',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'couplet' => [
'table' => 'couplet',
'name' => '对联大全',
'icon' => '🧧',
'fields' => ['hp', 'sl', 'xl'],
'title_field' => 'hp',
'content_field' => 'sl',
'extra_fields' => ['xl', 'yy'],
'status_field' => 'switch',
'status_value' => 1,
],
'brainteaser' => [
'table' => 'brainteaser',
'name' => '脑筋急转弯',
'icon' => '🧠',
'fields' => ['topic', 'answer'],
'title_field' => 'topic',
'content_field' => 'answer',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'riddle' => [
'table' => 'riddle',
'name' => '谜语大全',
'icon' => '🔮',
'fields' => ['riddle', 'miidii'],
'title_field' => 'riddle',
'content_field' => 'miidii',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'xiehouyu' => [
'table' => 'efs',
'name' => '歇后语',
'icon' => '😏',
'fields' => ['facet', 'undertone'],
'title_field' => 'facet',
'content_field' => 'undertone',
'extra_fields' => [],
'status_field' => 'switch',
'status_value' => 1,
],
'jiufang' => [
'table' => 'jiufang',
'name' => '酒方大全',
'icon' => '🍷',
'fields' => ['name', 'ingredients', 'usage'],
'title_field' => 'name',
'content_field' => 'usage',
'extra_fields' => ['ingredients', 'source', 'method', 'categories'],
'status_field' => 'switch',
'status_value' => 1,
],
];
/**
* @name 查重数据源列表
* @desc 返回所有可查重的数据源及其字段信息
*/
public function sources()
{
$sources = [];
foreach (self::$checkMap as $key => $config) {
$count = 0;
try {
$query = Db::name($config['table']);
if ($config['status_field'] && $config['status_value']) {
$query->where($config['status_field'], $config['status_value']);
}
$count = $query->count();
} catch (\Exception $e) {}
$sources[] = [
'key' => $key,
'name' => $config['name'],
'icon' => $config['icon'],
'table' => 'tool_' . $config['table'],
'total' => $count,
'check_fields' => $config['fields'],
'title_field' => $config['title_field'],
'content_field' => $config['content_field'],
'extra_fields' => $config['extra_fields'],
];
}
$this->success('成功', [
'total_sources' => count($sources),
'sources' => $sources,
]);
}
/**
* @name 精确查重
* @desc 完全匹配标题或内容字段
*/
public function exact()
{
$rawPost = $this->request->post(false);
$text = isset($rawPost['text']) ? trim($rawPost['text']) : '';
$type = isset($rawPost['type']) ? trim($rawPost['type']) : 'all';
$field = isset($rawPost['field']) ? trim($rawPost['field']) : 'auto';
$limit = min(50, max(1, isset($rawPost['limit']) ? intval($rawPost['limit']) : 20));
if (empty($text)) {
$this->error('请输入查重文本');
}
if (mb_strlen($text) > 5000) {
$this->error('文本长度不能超过5000字符');
}
$types = ($type === 'all') ? array_keys(self::$checkMap) : [$type];
$results = [];
$total_matched = 0;
foreach ($types as $t) {
if (!isset(self::$checkMap[$t])) continue;
$config = self::$checkMap[$t];
$matched = $this->_exactCheck($config, $text, $field, $limit);
if (!empty($matched)) {
$total_matched += count($matched);
$results[] = [
'type' => $t,
'name' => $config['name'],
'icon' => $config['icon'],
'count' => count($matched),
'matches' => $matched,
];
}
if ($total_matched >= $limit) break;
}
$this->success('查重完成', [
'text' => mb_substr($text, 0, 100),
'text_length' => mb_strlen($text),
'mode' => 'exact',
'types_checked' => count($types),
'total_matched' => $total_matched,
'results' => $results,
]);
}
/**
* @name 模糊查重
* @desc LIKE关键词匹配
*/
public function fuzzy()
{
$rawPost = $this->request->post(false);
$text = isset($rawPost['text']) ? trim($rawPost['text']) : '';
$type = isset($rawPost['type']) ? trim($rawPost['type']) : 'all';
$field = isset($rawPost['field']) ? trim($rawPost['field']) : 'auto';
$limit = min(50, max(1, isset($rawPost['limit']) ? intval($rawPost['limit']) : 20));
$min_len = isset($rawPost['min_length']) ? intval($rawPost['min_length']) : 4;
if (empty($text)) {
$this->error('请输入查重文本');
}
if (mb_strlen($text) < $min_len) {
$this->error("查重文本至少需要{$min_len}个字符");
}
if (mb_strlen($text) > 5000) {
$this->error('文本长度不能超过5000字符');
}
$keywords = $this->_extractKeywords($text, $min_len);
if (empty($keywords)) {
$this->error('未能提取有效关键词,请增加文本长度');
}
$types = ($type === 'all') ? array_keys(self::$checkMap) : [$type];
$results = [];
$total_matched = 0;
foreach ($types as $t) {
if (!isset(self::$checkMap[$t])) continue;
$config = self::$checkMap[$t];
$matched = $this->_fuzzyCheck($config, $keywords, $field, $limit);
if (!empty($matched)) {
$total_matched += count($matched);
$results[] = [
'type' => $t,
'name' => $config['name'],
'icon' => $config['icon'],
'count' => count($matched),
'matches' => $matched,
];
}
if ($total_matched >= $limit) break;
}
$this->success('查重完成', [
'text' => mb_substr($text, 0, 100),
'text_length' => mb_strlen($text),
'mode' => 'fuzzy',
'keywords' => $keywords,
'types_checked' => count($types),
'total_matched' => $total_matched,
'results' => $results,
]);
}
/**
* @name 相似度查重
* @desc 基于n-gram文本相似度算法
*/
public function similar()
{
$rawPost = $this->request->post(false);
$text = isset($rawPost['text']) ? trim($rawPost['text']) : '';
$type = isset($rawPost['type']) ? trim($rawPost['type']) : 'all';
$threshold = isset($rawPost['threshold']) ? floatval($rawPost['threshold']) : 0.3;
$limit = min(50, max(1, isset($rawPost['limit']) ? intval($rawPost['limit']) : 20));
if (empty($text)) {
$this->error('请输入查重文本');
}
if (mb_strlen($text) > 5000) {
$this->error('文本长度不能超过5000字符');
}
$threshold = max(0.1, min(1.0, $threshold));
$textNgrams = $this->_ngram($text, 2);
$types = ($type === 'all') ? array_keys(self::$checkMap) : [$type];
$results = [];
$total_matched = 0;
foreach ($types as $t) {
if (!isset(self::$checkMap[$t])) continue;
$config = self::$checkMap[$t];
$matched = $this->_similarCheck($config, $text, $textNgrams, $threshold, $limit);
if (!empty($matched)) {
$total_matched += count($matched);
$results[] = [
'type' => $t,
'name' => $config['name'],
'icon' => $config['icon'],
'count' => count($matched),
'matches' => $matched,
];
}
if ($total_matched >= $limit) break;
}
usort($results, function ($a, $b) {
$maxA = !empty($a['matches']) ? max(array_column($a['matches'], 'similarity')) : 0;
$maxB = !empty($b['matches']) ? max(array_column($b['matches'], 'similarity')) : 0;
return $maxB <=> $maxA;
});
$this->success('查重完成', [
'text' => mb_substr($text, 0, 100),
'text_length' => mb_strlen($text),
'mode' => 'similar',
'threshold' => $threshold,
'types_checked' => count($types),
'total_matched' => $total_matched,
'results' => $results,
]);
}
/**
* @name 综合查重报告
* @desc 一次请求返回精确+模糊+相似度综合结果
*/
public function report()
{
$rawPost = $this->request->post(false);
$text = isset($rawPost['text']) ? trim($rawPost['text']) : '';
$type = isset($rawPost['type']) ? trim($rawPost['type']) : 'all';
$limit = min(20, max(1, isset($rawPost['limit']) ? intval($rawPost['limit']) : 10));
if (empty($text)) {
$this->error('请输入查重文本');
}
if (mb_strlen($text) > 5000) {
$this->error('文本长度不能超过5000字符');
}
$startTime = microtime(true);
$exactResults = [];
$fuzzyResults = [];
$similarResults = [];
$types = ($type === 'all') ? array_keys(self::$checkMap) : [$type];
if (!isset(self::$checkMap[$type]) && $type !== 'all') {
$this->error('不支持的查重类型: ' . $type);
}
foreach ($types as $t) {
if (!isset(self::$checkMap[$t])) continue;
$config = self::$checkMap[$t];
$exactMatch = $this->_exactCheck($config, $text, 'auto', $limit);
if (!empty($exactMatch)) {
$exactResults[] = ['type' => $t, 'name' => $config['name'], 'icon' => $config['icon'], 'count' => count($exactMatch), 'matches' => $exactMatch];
}
$keywords = $this->_extractKeywords($text, 4);
$fuzzyMatch = $this->_fuzzyCheck($config, $keywords, 'auto', $limit);
if (!empty($fuzzyMatch)) {
$fuzzyResults[] = ['type' => $t, 'name' => $config['name'], 'icon' => $config['icon'], 'count' => count($fuzzyMatch), 'matches' => $fuzzyMatch];
}
$textNgrams = $this->_ngram($text, 2);
$similarMatch = $this->_similarCheck($config, $text, $textNgrams, 0.3, $limit);
if (!empty($similarMatch)) {
$similarResults[] = ['type' => $t, 'name' => $config['name'], 'icon' => $config['icon'], 'count' => count($similarMatch), 'matches' => $similarMatch];
}
}
$exactTotal = array_sum(array_column($exactResults, 'count'));
$fuzzyTotal = array_sum(array_column($fuzzyResults, 'count'));
$similarTotal = array_sum(array_column($similarResults, 'count'));
$maxSimilarity = 0;
$maxSimilaritySource = '';
foreach ($similarResults as $sr) {
foreach ($sr['matches'] as $m) {
if ($m['similarity'] > $maxSimilarity) {
$maxSimilarity = $m['similarity'];
$maxSimilaritySource = $sr['name'];
}
}
}
$elapsed = round(microtime(true) - $startTime, 3);
$riskLevel = 'low';
$riskScore = 0;
if ($exactTotal > 0) {
$riskLevel = 'high';
$riskScore = min(100, 60 + $exactTotal * 10);
} elseif ($similarTotal > 0 && $maxSimilarity >= 0.6) {
$riskLevel = 'high';
$riskScore = min(100, 40 + intval($maxSimilarity * 50));
} elseif ($fuzzyTotal > 0 || $similarTotal > 0) {
$riskLevel = 'medium';
$riskScore = min(100, 20 + $fuzzyTotal * 5 + intval($maxSimilarity * 30));
}
$this->success('查重报告生成完成', [
'text' => mb_substr($text, 0, 100),
'text_length' => mb_strlen($text),
'types_checked' => count($types),
'elapsed_time' => $elapsed . 's',
'risk_level' => $riskLevel,
'risk_score' => $riskScore,
'max_similarity' => round($maxSimilarity * 100, 1) . '%',
'max_similarity_source' => $maxSimilaritySource,
'summary' => [
'exact_matches' => $exactTotal,
'fuzzy_matches' => $fuzzyTotal,
'similar_matches' => $similarTotal,
],
'exact_results' => $exactResults,
'fuzzy_results' => $fuzzyResults,
'similar_results' => $similarResults,
]);
}
/**
* @name 精确查重内部方法
*/
private function _exactCheck($config, $text, $field, $limit)
{
try {
$query = Db::name($config['table']);
if ($config['status_field'] && isset($config['status_value'])) {
$query->where($config['status_field'], $config['status_value']);
}
$searchFields = ($field === 'auto') ? $config['fields'] : [$field];
$query->where(function ($q) use ($searchFields, $text) {
foreach ($searchFields as $f) {
$q->whereOr($f, '=', $text);
}
});
$selectFields = ['id'];
if ($config['title_field']) $selectFields[] = $config['title_field'];
if ($config['content_field']) $selectFields[] = $config['content_field'];
foreach ($config['extra_fields'] as $ef) {
if (!in_array($ef, $selectFields)) $selectFields[] = $ef;
}
$rows = $query->field($selectFields)->limit($limit)->select();
return $this->_formatMatches($config, $rows, 1.0);
} catch (\Exception $e) {
return [];
}
}
/**
* @name 模糊查重内部方法
*/
private function _fuzzyCheck($config, $keywords, $field, $limit)
{
try {
$query = Db::name($config['table']);
if ($config['status_field'] && isset($config['status_value'])) {
$query->where($config['status_field'], $config['status_value']);
}
$searchFields = ($field === 'auto') ? $config['fields'] : [$field];
$kw = '%' . implode('%', $keywords) . '%';
$query->where(function ($q) use ($searchFields, $kw) {
foreach ($searchFields as $f) {
$q->whereOr($f, 'like', $kw);
}
});
$selectFields = ['id'];
if ($config['title_field']) $selectFields[] = $config['title_field'];
if ($config['content_field']) $selectFields[] = $config['content_field'];
foreach ($config['extra_fields'] as $ef) {
if (!in_array($ef, $selectFields)) $selectFields[] = $ef;
}
$rows = $query->field($selectFields)->limit($limit)->select();
return $this->_formatMatches($config, $rows, null);
} catch (\Exception $e) {
return [];
}
}
/**
* @name 相似度查重内部方法
*/
private function _similarCheck($config, $text, $textNgrams, $threshold, $limit)
{
try {
$keywords = $this->_extractKeywords($text, 2);
if (empty($keywords)) return [];
$query = Db::name($config['table']);
if ($config['status_field'] && isset($config['status_value'])) {
$query->where($config['status_field'], $config['status_value']);
}
$primaryField = $config['content_field'] ?: $config['title_field'];
if (count($keywords) >= 2) {
$kw = '%' . $keywords[0] . '%' . $keywords[1] . '%';
$query->where($primaryField, 'like', $kw);
} else {
$kw = '%' . $keywords[0] . '%';
$query->where($primaryField, 'like', $kw);
}
$selectFields = ['id'];
if ($config['title_field']) $selectFields[] = $config['title_field'];
if ($config['content_field']) $selectFields[] = $config['content_field'];
foreach ($config['extra_fields'] as $ef) {
if (!in_array($ef, $selectFields)) $selectFields[] = $ef;
}
$rows = $query->field($selectFields)->limit(100)->select();
$matches = [];
foreach ($rows as $row) {
$rowText = '';
if ($config['title_field'] && isset($row[$config['title_field']])) {
$rowText .= $row[$config['title_field']] . ' ';
}
if ($config['content_field'] && isset($row[$config['content_field']])) {
$rowText .= $row[$config['content_field']];
}
$rowText = strip_tags($rowText);
$rowNgrams = $this->_ngram($rowText, 2);
$similarity = $this->_cosineSimilarity($textNgrams, $rowNgrams);
if ($similarity >= $threshold) {
$match = $this->_formatSingleMatch($config, $row);
$match['similarity'] = round($similarity, 4);
$match['similarity_percent'] = round($similarity * 100, 1) . '%';
$matches[] = $match;
}
}
usort($matches, function ($a, $b) {
return $b['similarity'] <=> $a['similarity'];
});
return array_slice($matches, 0, $limit);
} catch (\Exception $e) {
return [];
}
}
/**
* @name 格式化匹配结果
*/
private function _formatMatches($config, $rows, $similarity = null)
{
$matches = [];
foreach ($rows as $row) {
$match = $this->_formatSingleMatch($config, $row);
if ($similarity !== null) {
$match['similarity'] = $similarity;
$match['similarity_percent'] = round($similarity * 100, 1) . '%';
}
$matches[] = $match;
}
return $matches;
}
/**
* @name 格式化单条匹配
*/
private function _formatSingleMatch($config, $row)
{
$match = ['id' => $row['id']];
if ($config['title_field'] && isset($row[$config['title_field']])) {
$match['title'] = mb_substr(strip_tags($row[$config['title_field']]), 0, 200);
}
if ($config['content_field'] && isset($row[$config['content_field']])) {
$match['content'] = mb_substr(strip_tags($row[$config['content_field']]), 0, 300);
}
foreach ($config['extra_fields'] as $ef) {
if (isset($row[$ef]) && !empty($row[$ef])) {
$match[$ef] = mb_substr(strip_tags($row[$ef]), 0, 100);
}
}
return $match;
}
/**
* @name 提取关键词
*/
private function _extractKeywords($text, $minLen = 2)
{
$text = strip_tags($text);
$text = preg_replace('/[^\x{4e00}-\x{9fa5}a-zA-Z0-9]/u', ' ', $text);
$text = preg_replace('/\s+/', ' ', trim($text));
$words = explode(' ', $text);
$keywords = [];
foreach ($words as $word) {
$word = trim($word);
if (mb_strlen($word) >= $minLen) {
$keywords[] = $word;
}
}
if (empty($keywords) && mb_strlen($text) >= $minLen) {
$len = mb_strlen($text);
$chunkSize = min(8, $len);
for ($i = 0; $i < $len - $chunkSize + 1; $i += max(1, intval($chunkSize / 2))) {
$keywords[] = mb_substr($text, $i, $chunkSize);
}
}
return array_unique(array_slice($keywords, 0, 10));
}
/**
* @name 生成n-gram
*/
private function _ngram($text, $n = 2)
{
$text = strip_tags($text);
$text = preg_replace('/\s+/', '', $text);
$len = mb_strlen($text);
$ngrams = [];
for ($i = 0; $i < $len - $n + 1; $i++) {
$gram = mb_substr($text, $i, $n);
$ngrams[$gram] = isset($ngrams[$gram]) ? $ngrams[$gram] + 1 : 1;
}
return $ngrams;
}
/**
* @name 余弦相似度计算
*/
private function _cosineSimilarity($ngrams1, $ngrams2)
{
if (empty($ngrams1) || empty($ngrams2)) return 0;
$dotProduct = 0;
$norm1 = 0;
$norm2 = 0;
foreach ($ngrams1 as $gram => $count) {
$norm1 += $count * $count;
if (isset($ngrams2[$gram])) {
$dotProduct += $count * $ngrams2[$gram];
}
}
foreach ($ngrams2 as $gram => $count) {
$norm2 += $count * $count;
}
if ($norm1 == 0 || $norm2 == 0) return 0;
return $dotProduct / (sqrt($norm1) * sqrt($norm2));
}
}