文章目录[隐藏]
网络传媒柔性内容智能去重WordPress插件应用教程
引言:内容重复的挑战与解决方案
在当今网络传媒领域,内容重复是困扰许多网站运营者的普遍问题。无论是新闻聚合、多平台分发还是用户生成内容,重复内容不仅影响搜索引擎排名,还会降低用户体验。传统的内容去重方法往往过于刚性,容易误删有价值的内容变体。
本教程将介绍一款创新的WordPress插件——柔性内容智能去重系统,它结合了先进的算法和灵活的配置选项,帮助网络传媒从业者高效管理网站内容。
插件安装与基础配置
1. 插件安装步骤
首先,我们需要在WordPress后台安装并激活插件:
/**
* 插件安装回调函数
* 创建必要的数据库表和初始化选项
*/
function smart_content_deduplicator_install() {
global $wpdb;
// 创建去重记录表
$table_name = $wpdb->prefix . 'content_deduplication_logs';
$charset_collate = $wpdb->get_charset_collate();
$sql = "CREATE TABLE IF NOT EXISTS $table_name (
id bigint(20) NOT NULL AUTO_INCREMENT,
original_post_id bigint(20) NOT NULL,
duplicate_post_id bigint(20) NOT NULL,
similarity_score float NOT NULL,
deduplication_type varchar(50) NOT NULL,
action_taken varchar(50) NOT NULL,
processed_at datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id),
KEY original_post_id (original_post_id),
KEY duplicate_post_id (duplicate_post_id)
) $charset_collate;";
require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
dbDelta($sql);
// 设置默认选项
add_option('scd_similarity_threshold', 0.75);
add_option('scd_auto_deduplication', 0);
add_option('scd_check_categories', 1);
}
register_activation_hook(__FILE__, 'smart_content_deduplicator_install');
2. 基本配置界面
安装完成后,进入「设置」→「智能去重」进行基本配置:
/**
* 创建插件设置页面
*/
function scd_create_settings_page() {
add_options_page(
'柔性内容智能去重设置',
'内容去重',
'manage_options',
'smart-content-deduplicator',
'scd_settings_page_html'
);
}
add_action('admin_menu', 'scd_create_settings_page');
/**
* 设置页面HTML输出
*/
function scd_settings_page_html() {
// 权限检查
if (!current_user_can('manage_options')) {
return;
}
// 保存设置
if (isset($_POST['submit'])) {
update_option('scd_similarity_threshold', floatval($_POST['threshold']));
update_option('scd_auto_deduplication', isset($_POST['auto_deduplicate']) ? 1 : 0);
update_option('scd_check_categories', isset($_POST['check_categories']) ? 1 : 0);
echo '<div class="notice notice-success"><p>设置已保存!</p></div>';
}
// 获取当前设置
$threshold = get_option('scd_similarity_threshold', 0.75);
$auto_deduplicate = get_option('scd_auto_deduplication', 0);
$check_categories = get_option('scd_check_categories', 1);
?>
<div class="wrap">
<h1>柔性内容智能去重设置</h1>
<form method="post">
<table class="form-table">
<tr>
<th scope="row">相似度阈值</th>
<td>
<input type="range" name="threshold" min="0.1" max="1.0" step="0.05"
value="<?php echo esc_attr($threshold); ?>"
oninput="thresholdValue.value = this.value">
<output name="thresholdValue"><?php echo esc_html($threshold); ?></output>
<p class="description">设置内容被视为重复的相似度阈值(0.1-1.0)</p>
</td>
</tr>
<tr>
<th scope="row">自动去重</th>
<td>
<input type="checkbox" name="auto_deduplicate" value="1"
<?php checked($auto_deduplicate, 1); ?>>
<label>启用自动去重功能</label>
</td>
</tr>
<tr>
<th scope="row">检查分类</th>
<td>
<input type="checkbox" name="check_categories" value="1"
<?php checked($check_categories, 1); ?>>
<label>仅在同一分类内检查重复内容</label>
</td>
</tr>
</table>
<?php submit_button(); ?>
</form>
</div>
<?php
}
核心算法:柔性内容相似度检测
1. 文本预处理与特征提取
/**
* 文本预处理函数
* 包括分词、去除停用词、词干提取等
*/
function preprocess_text($text) {
// 转换为小写
$text = mb_strtolower($text, 'UTF-8');
// 移除HTML标签
$text = wp_strip_all_tags($text);
// 移除标点符号和特殊字符
$text = preg_replace('/[^p{L}p{N}s]/u', ' ', $text);
// 中文分词(如果安装了中文分词插件)
if (function_exists('wp_text_split')) {
$words = wp_text_split($text);
} else {
// 简单的空格分词(适用于英文)
$words = preg_split('/s+/', $text);
}
// 移除停用词
$stop_words = ['的', '了', '在', '是', '我', '有', '和', '就',
'the', 'and', 'a', 'an', 'in', 'on', 'at', 'to'];
$words = array_diff($words, $stop_words);
// 词干提取(简化版)
$words = array_map('stem_word', $words);
return array_filter($words);
}
/**
* 计算文本相似度(使用余弦相似度算法)
*/
function calculate_similarity($text1, $text2) {
// 预处理文本
$words1 = preprocess_text($text1);
$words2 = preprocess_text($text2);
// 获取所有唯一词汇
$all_words = array_unique(array_merge($words1, $words2));
// 创建词频向量
$vector1 = array_fill_keys($all_words, 0);
$vector2 = array_fill_keys($all_words, 0);
foreach ($words1 as $word) {
if (isset($vector1[$word])) {
$vector1[$word]++;
}
}
foreach ($words2 as $word) {
if (isset($vector2[$word])) {
$vector2[$word]++;
}
}
// 计算余弦相似度
$dot_product = 0;
$magnitude1 = 0;
$magnitude2 = 0;
foreach ($all_words as $word) {
$dot_product += $vector1[$word] * $vector2[$word];
$magnitude1 += pow($vector1[$word], 2);
$magnitude2 += pow($vector2[$word], 2);
}
$magnitude1 = sqrt($magnitude1);
$magnitude2 = sqrt($magnitude2);
if ($magnitude1 == 0 || $magnitude2 == 0) {
return 0;
}
return $dot_product / ($magnitude1 * $magnitude2);
}
2. 柔性匹配策略实现
/**
* 柔性内容去重主函数
*/
function flexible_content_deduplication($new_post_id) {
$new_post = get_post($new_post_id);
// 获取配置阈值
$threshold = get_option('scd_similarity_threshold', 0.75);
// 获取比较范围
$args = [
'post_type' => $new_post->post_type,
'post_status' => 'publish',
'posts_per_page' => 50, // 限制比较数量以提高性能
'post__not_in' => [$new_post_id],
];
// 如果启用了分类检查
if (get_option('scd_check_categories', 1)) {
$categories = wp_get_post_categories($new_post_id);
if (!empty($categories)) {
$args['category__in'] = $categories;
}
}
$existing_posts = get_posts($args);
$duplicates = [];
foreach ($existing_posts as $existing_post) {
// 计算相似度
$similarity = calculate_similarity(
$new_post->post_content,
$existing_post->post_content
);
// 如果超过阈值,标记为重复
if ($similarity >= $threshold) {
$duplicates[] = [
'post_id' => $existing_post->ID,
'title' => $existing_post->post_title,
'similarity' => round($similarity * 100, 2),
'url' => get_permalink($existing_post->ID)
];
}
}
return $duplicates;
}
实战应用:批量处理与自动化
1. 批量内容去重工具
/**
* 批量去重处理函数
*/
function batch_deduplication_process() {
// 获取所有已发布的文章
$args = [
'post_type' => 'post',
'post_status' => 'publish',
'posts_per_page' => -1,
'orderby' => 'date',
'order' => 'DESC'
];
$all_posts = get_posts($args);
$processed = 0;
$duplicates_found = 0;
echo '<div class="batch-process-results">';
echo '<h3>批量去重处理中...</h3>';
foreach ($all_posts as $index => $post) {
$processed++;
// 跳过最近的文章(避免与自身比较)
$recent_posts = array_slice($all_posts, 0, min($index, 10));
$recent_ids = array_map(function($p) { return $p->ID; }, $recent_posts);
$args = [
'post_type' => 'post',
'post_status' => 'publish',
'posts_per_page' => 20,
'post__not_in' => array_merge([$post->ID], $recent_ids),
'date_query' => [
[
'before' => $post->post_date,
'inclusive' => true,
]
]
];
$previous_posts = get_posts($args);
$has_duplicate = false;
foreach ($previous_posts as $previous_post) {
$similarity = calculate_similarity(
$post->post_content,
$previous_post->post_content
);
if ($similarity >= get_option('scd_similarity_threshold', 0.75)) {
$has_duplicate = true;
$duplicates_found++;
// 记录到日志
log_deduplication_action(
$previous_post->ID,
$post->ID,
$similarity,
'batch_processing'
);
// 根据设置采取行动
$action = get_option('scd_auto_deduplication', 0) ?
'trash' : 'flag';
take_deduplication_action($post->ID, $action);
echo sprintf(
'<p>发现重复:文章 #%d "%s" 与文章 #%d 相似度 %.1f%%</p>',
$post->ID,
esc_html($post->post_title),
$previous_post->ID,
$similarity * 100
);
break;
}
}
// 进度显示
if ($processed % 10 == 0) {
echo sprintf('<p>已处理 %d 篇文章,发现 %d 个重复</p>',
$processed, $duplicates_found);
}
}
echo sprintf('<h4>处理完成!共检查 %d 篇文章,发现 %d 个重复内容。</h4>',
$processed, $duplicates_found);
echo '</div>';
}
2. 自动化去重工作流
/**
* 新文章发布时自动检查重复
*/
function auto_check_on_publish($post_id) {
// 避免无限循环
remove_action('save_post', 'auto_check_on_publish');
// 只处理已发布的文章
if (get_post_status($post_id) != 'publish') {
return;
}
// 检查是否启用自动去重
if (!get_option('scd_auto_deduplication', 0)) {
return;
}
// 查找重复内容
$duplicates = flexible_content_deduplication($post_id);
if (!empty($duplicates)) {
// 获取相似度最高的重复文章
usort($duplicates, function($a, $b) {
return $b['similarity'] <=> $a['similarity'];
});
$most_similar = $duplicates[0];
// 根据相似度采取不同行动
if ($most_similar['similarity'] >= 90) {
// 高度相似,自动移至回收站
wp_trash_post($post_id);
$action = 'trashed';
} elseif ($most_similar['similarity'] >= 75) {
// 中度相似,标记为待审核
wp_update_post([
'ID' => $post_id,
'post_status' => 'pending'
]);
$action = 'pending_review';
// 发送通知给管理员
send_duplicate_notification($post_id, $most_similar);
} else {
// 轻度相似,仅记录日志
$action = 'logged';
}
// 记录操作
log_deduplication_action(
$most_similar['post_id'],
$post_id,
$most_similar['similarity'] / 100,
'auto_check',
$action
);
}
// 重新挂载钩子
add_action('save_post', 'auto_check_on_publish');
}
add_action('save_post', 'auto_check_on_publish', 10, 1);
高级功能:自定义规则与API集成
1. 自定义去重规则引擎
/**
* 自定义规则引擎
*/
class DeduplicationRuleEngine {
private $rules = [];
public function __construct() {
$this->load_rules();
}
/**
* 加载自定义规则
*/
private function load_rules() {
// 默认规则
$this->rules = [
'ignore_short_posts' => [
'condition' => function($post) {
return strlen($post->post_content) < 300;
},
'action' => 'ignore'
],
'whitelist_authors' => [
'condition' => function($post) {
$whitelist = get_option('scd_whitelist_authors', []);
return in_array($post->post_author, $whitelist);
},
'action' => 'ignore'
],
'important_keywords' => [
'condition' => function($post) {
$keywords = ['独家', '首发', '原创'];
foreach ($keywords as $keyword) {
if (strpos($post->post_title, $keyword) !== false) {
return true;
}
}
return false;
},
'action' => 'protect'
]
];
// 允许开发者添加自定义规则
$this->rules = apply_filters('scd_custom_rules', $this->rules);
}
/**
* 应用规则到文章
*/
public function apply_rules($post_id, $duplicates) {
$post = get_post($post_id);
$filtered_duplicates = [];
foreach ($duplicates as $duplicate) {
$should_ignore = false;
foreach ($this->rules as $rule) {
if ($rule['condition']($post)) {
if ($rule['action'] === 'ignore') {
$should_ignore = true;
break;
}
}
}
if (!$should_ignore) {
$filtered_duplicates[] = $duplicate;
}
}
return $filtered_duplicates;
}
}
2. REST API 集成
/**
* 注册REST API端点
*/
function register_deduplication_api() {
register_rest_route('smart-deduplicator/v1', '/check-duplicate', [
'methods' => 'POST',
'callback' => 'api_check_duplicate',
'permission_callback' => function() {
return current_user_can('edit_posts');
},
'args' => [
'content' => [
'required' => true,
'validate_callback' => function($param) {
return !empty(trim($param));
}
],
'title' => [
'required' => false,
'default' => ''
]
]
]);
register_rest_route('smart-deduplicator/v1', '/batch-process', [
'methods' => 'POST',
'callback' => 'api_batch_process',
'permission_callback' => function() {
return current_user_can('manage_options');
}
]);
}
add_action('rest_api_init', 'register_deduplication_api');
/**
2. REST API 集成(续)
/**
* API检查重复内容
*/
function api_check_duplicate($request) {
$content = $request->get_param('content');
$title = $request->get_param('title');
// 模拟文章对象用于检查
$temp_post = (object) [
'post_content' => $content,
'post_title' => $title,
'post_type' => 'post',
'post_status' => 'publish'
];
// 获取所有已发布文章进行比较
$args = [
'post_type' => 'post',
'post_status' => 'publish',
'posts_per_page' => 20,
'orderby' => 'date',
'order' => 'DESC'
];
$existing_posts = get_posts($args);
$results = [];
$threshold = get_option('scd_similarity_threshold', 0.75);
foreach ($existing_posts as $existing_post) {
$similarity = calculate_similarity($content, $existing_post->post_content);
if ($similarity >= $threshold) {
$results[] = [
'post_id' => $existing_post->ID,
'title' => $existing_post->post_title,
'similarity' => round($similarity * 100, 2),
'url' => get_permalink($existing_post->ID),
'publish_date' => $existing_post->post_date
];
}
}
// 按相似度排序
usort($results, function($a, $b) {
return $b['similarity'] <=> $a['similarity'];
});
return rest_ensure_response([
'success' => true,
'data' => [
'checked_content_length' => strlen($content),
'threshold' => $threshold * 100,
'duplicates_found' => count($results),
'duplicates' => $results,
'recommendation' => count($results) > 0 ?
'建议修改或合并内容' : '内容独特,可以发布'
]
]);
}
/**
* API批量处理
*/
function api_batch_process($request) {
// 验证请求并执行批量处理
$result = batch_deduplication_process();
return rest_ensure_response([
'success' => true,
'message' => '批量去重处理已完成',
'data' => $result
]);
}
性能优化与最佳实践
1. 数据库索引优化
/**
* 优化数据库查询性能
*/
function optimize_deduplication_queries() {
global $wpdb;
// 添加索引以提高查询速度
$indexes = [
'posts' => [
'post_content_hash' => 'ALTER TABLE ' . $wpdb->posts . ' ADD INDEX post_content_hash (post_content(100))',
'post_date_status' => 'ALTER TABLE ' . $wpdb->posts . ' ADD INDEX post_date_status (post_date, post_status)'
],
'deduplication_logs' => [
'processed_date' => 'ALTER TABLE ' . $wpdb->prefix . 'content_deduplication_logs ADD INDEX processed_date (processed_at)'
]
];
$results = [];
foreach ($indexes as $table => $table_indexes) {
foreach ($table_indexes as $index_name => $sql) {
// 检查索引是否已存在
$check_sql = "SHOW INDEX FROM $table WHERE Key_name = '$index_name'";
$existing = $wpdb->get_var($check_sql);
if (!$existing) {
$result = $wpdb->query($sql);
$results[] = [
'index' => $index_name,
'table' => $table,
'success' => $result !== false
];
}
}
}
return $results;
}
/**
* 内容哈希缓存机制
*/
class ContentHashCache {
private static $cache_group = 'content_hashes';
private static $cache_expire = 3600; // 1小时
/**
* 获取内容的哈希值(带缓存)
*/
public static function get_content_hash($content) {
$hash_key = 'hash_' . md5($content);
// 尝试从缓存获取
$cached_hash = wp_cache_get($hash_key, self::$cache_group);
if ($cached_hash !== false) {
return $cached_hash;
}
// 计算新哈希
$processed_content = preprocess_text($content);
$hash = md5(implode(' ', $processed_content));
// 存入缓存
wp_cache_set($hash_key, $hash, self::$cache_group, self::$cache_expire);
return $hash;
}
/**
* 批量获取哈希值
*/
public static function get_batch_hashes($contents) {
$hashes = [];
$to_compute = [];
foreach ($contents as $index => $content) {
$hash_key = 'hash_' . md5($content);
$cached_hash = wp_cache_get($hash_key, self::$cache_group);
if ($cached_hash !== false) {
$hashes[$index] = $cached_hash;
} else {
$to_compute[$index] = $content;
}
}
// 计算未缓存的哈希
foreach ($to_compute as $index => $content) {
$hashes[$index] = self::get_content_hash($content);
}
return $hashes;
}
}
2. 智能分批处理
/**
* 智能分批处理大量内容
*/
function intelligent_batch_processing($post_ids, $batch_size = 100) {
$total = count($post_ids);
$batches = array_chunk($post_ids, $batch_size);
$results = [];
echo '<div class="processing-progress">';
echo '<div class="progress-bar"><div class="progress"></div></div>';
echo '<p class="status">准备开始处理...</p>';
echo '</div>';
foreach ($batches as $batch_index => $batch) {
$start = $batch_index * $batch_size;
$end = min($start + $batch_size, $total);
// 更新进度显示
echo "<script>
document.querySelector('.progress').style.width = '" . (($end / $total) * 100) . "%';
document.querySelector('.status').textContent = '处理中: $end/$total';
</script>";
flush();
// 处理当前批次
$batch_results = process_batch($batch);
$results = array_merge($results, $batch_results);
// 避免超时,每批处理后休息一下
if (count($batches) > 1) {
sleep(1);
}
}
echo "<script>
document.querySelector('.progress').style.width = '100%';
document.querySelector('.status').textContent = '处理完成!共检查 $total 篇文章';
</script>";
return $results;
}
/**
* 处理单个批次
*/
function process_batch($post_ids) {
global $wpdb;
$results = [];
$placeholders = implode(',', array_fill(0, count($post_ids), '%d'));
// 一次性获取所有文章内容
$query = $wpdb->prepare(
"SELECT ID, post_content, post_title, post_date
FROM {$wpdb->posts}
WHERE ID IN ($placeholders)
AND post_status = 'publish'",
$post_ids
);
$posts = $wpdb->get_results($query);
// 预计算哈希值
$contents = array_column($posts, 'post_content', 'ID');
$hashes = ContentHashCache::get_batch_hashes($contents);
// 按哈希值分组,快速识别潜在重复
$hash_groups = [];
foreach ($hashes as $post_id => $hash) {
$hash_groups[$hash][] = $post_id;
}
// 只对相同哈希的内容进行详细比较
foreach ($hash_groups as $hash => $group_post_ids) {
if (count($group_post_ids) > 1) {
// 详细比较组内文章
$group_results = compare_within_group($group_post_ids, $posts);
$results = array_merge($results, $group_results);
}
}
return $results;
}
数据分析与报告系统
1. 去重统计与分析
/**
* 生成去重统计报告
*/
function generate_deduplication_report($period = 'month') {
global $wpdb;
$table_name = $wpdb->prefix . 'content_deduplication_logs';
// 根据时间段设置查询条件
$date_conditions = [
'day' => 'processed_at >= DATE_SUB(NOW(), INTERVAL 1 DAY)',
'week' => 'processed_at >= DATE_SUB(NOW(), INTERVAL 1 WEEK)',
'month' => 'processed_at >= DATE_SUB(NOW(), INTERVAL 1 MONTH)',
'year' => 'processed_at >= DATE_SUB(NOW(), INTERVAL 1 YEAR)',
'all' => '1=1'
];
$date_condition = $date_conditions[$period] ?? $date_conditions['month'];
// 基础统计
$stats = $wpdb->get_row("
SELECT
COUNT(*) as total_actions,
SUM(CASE WHEN action_taken = 'trashed' THEN 1 ELSE 0 END) as trashed_count,
SUM(CASE WHEN action_taken = 'pending_review' THEN 1 ELSE 0 END) as pending_count,
AVG(similarity_score) as avg_similarity,
MIN(similarity_score) as min_similarity,
MAX(similarity_score) as max_similarity
FROM $table_name
WHERE $date_condition
");
// 按日期分组统计
$daily_stats = $wpdb->get_results("
SELECT
DATE(processed_at) as date,
COUNT(*) as count,
AVG(similarity_score) as avg_similarity
FROM $table_name
WHERE $date_condition
GROUP BY DATE(processed_at)
ORDER BY date DESC
LIMIT 30
");
// 最常出现的重复内容
$top_duplicates = $wpdb->get_results("
SELECT
original_post_id,
COUNT(*) as duplicate_count,
GROUP_CONCAT(duplicate_post_id) as duplicate_ids
FROM $table_name
WHERE $date_condition
GROUP BY original_post_id
ORDER BY duplicate_count DESC
LIMIT 10
");
// 获取文章标题
foreach ($top_duplicates as &$duplicate) {
$post = get_post($duplicate->original_post_id);
$duplicate->title = $post ? $post->post_title : '文章已删除';
}
return [
'period' => $period,
'summary' => $stats,
'daily_trends' => $daily_stats,
'top_duplicates' => $top_duplicates,
'generated_at' => current_time('mysql')
];
}
/**
* 显示统计报告页面
*/
function display_deduplication_report() {
$period = $_GET['period'] ?? 'month';
$report = generate_deduplication_report($period);
?>
<div class="wrap">
<h1>内容去重统计报告</h1>
<div class="period-selector">
<a href="?page=deduplication-report&period=day" class="button <?php echo $period == 'day' ? 'primary' : ''; ?>">今日</a>
<a href="?page=deduplication-report&period=week" class="button <?php echo $period == 'week' ? 'primary' : ''; ?>">本周</a>
<a href="?page=deduplication-report&period=month" class="button <?php echo $period == 'month' ? 'primary' : ''; ?>">本月</a>
<a href="?page=deduplication-report&period=year" class="button <?php echo $period == 'year' ? 'primary' : ''; ?>">今年</a>
<a href="?page=deduplication-report&period=all" class="button <?php echo $period == 'all' ? 'primary' : ''; ?>">全部</a>
</div>
<div class="report-summary">
<h2>统计概览</h2>
<div class="stats-grid">
<div class="stat-card">
<h3>总处理次数</h3>
<p class="stat-number"><?php echo $report['summary']->total_actions; ?></p>
</div>
<div class="stat-card">
<h3>平均相似度</h3>
<p class="stat-number"><?php echo round($report['summary']->avg_similarity * 100, 1); ?>%</p>
</div>
<div class="stat-card">
<h3>已删除文章</h3>
<p class="stat-number"><?php echo $report['summary']->trashed_count; ?></p>
</div>
<div class="stat-card">
<h3>待审核文章</h3>
<p class="stat-number"><?php echo $report['summary']->pending_count; ?></p>
</div>
</div>
</div>
<div class="top-duplicates">
<h2>最常被重复的内容</h2>
<table class="wp-list-table widefat fixed striped">
<thead>
<tr>
<th>文章标题</th>
<th>被重复次数</th>
<th>操作</th>
</tr>
</thead>
<tbody>
<?php foreach ($report['top_duplicates'] as $item): ?>
<tr>
<td><?php echo esc_html($item->title); ?></td>
<td><?php echo $item->duplicate_count; ?></td>
<td>
<a href="<?php echo get_edit_post_link($item->original_post_id); ?>" class="button">查看</a>
</td>
</tr>
<?php endforeach; ?>
</tbody>
</table>
</div>
<div class="trend-chart">
<h2>趋势分析</h2>
<canvas id="deduplicationTrendChart" width="800" height="300"></canvas>
</div>
<script>
// 使用Chart.js显示趋势图
document.addEventListener('DOMContentLoaded', function() {
var ctx = document.getElementById('deduplicationTrendChart').getContext('2d');
var chart = new Chart(ctx, {
type: 'line',
data: {
labels: <?php echo json_encode(array_column($report['daily_trends'], 'date')); ?>,
datasets: [{
label: '重复内容数量',
data: <?php echo json_encode(array_column($report['daily_trends'], 'count')); ?>,
borderColor: 'rgb(255, 99, 132)',
backgroundColor: 'rgba(255, 99, 132, 0.2)',
fill: true
}, {
label: '平均相似度',
data: <?php echo json_encode(array_map(function($item) {
return $item->avg_similarity * 100;
}, $report['daily_trends'])); ?>,
borderColor: 'rgb(54, 162, 235)',
backgroundColor: 'rgba(54, 162, 235, 0.2)',
fill: true,
yAxisID: 'y1'
}]
},
options: {
responsive: true,
scales: {
y: {
beginAtZero: true,
title: {
display: true,
text: '数量'
}
},
y1: {
position: 'right',
beginAtZero: true,
max: 100,
title: {
display: true,
text: '相似度 (%)'
}
}
}
}
});
});
</script>
</div>
<?php
}
故障排除与维护
1. 常见问题解决
/**
* 诊断工具:检查插件状态
*/
function diagnose_plugin_issues() {
$issues = [];
// 检查数据库表
global $wpdb;
$table_name = $wpdb->prefix . 'content_deduplication_logs';
$table_exists = $wpdb->get_var("SHOW TABLES LIKE '$table_name'") === $table_name;
if (!$table_exists) {
$issues[] = [
'level' => 'error',
'message' => '数据库表不存在,请重新激活插件',
'solution' => 'deactivate_and_reactivate'
];
}
// 检查必要的WordPress函数
$required_functions = ['mb_strlen', 'preg_split', 'wp_cache_set'];
foreach ($required_functions as $function) {
if (!function_exists($function)) {
$issues[] = [
'level' => 'warning',
'message' => "缺少必要函数: $function",
'solution' => 'check_php_extensions'
];
}
}
// 检查内存限制
$memory_limit = ini_get('memory_limit');
$memory_bytes = wp_convert_hr_to_bytes($memory_limit);
if ($memory_bytes < 64 * 1024 * 1024) { // 64MB
$issues[] = [
'level' => 'warning',
'message' => "内存限制较低: $memory_limit",
'solution' => 'increase_memory_limit'
];
}
