首页 / 教程文章 / 网络传媒WordPress站点柔性内容智能标签系统的构建教程

网络传媒WordPress站点柔性内容智能标签系统的构建教程

网络传媒WordPress站点柔性内容智能标签系统的构建教程

引言:为什么需要智能标签系统

在当今信息爆炸的时代,网络传媒站点每天产生大量内容,如何让这些内容被准确分类、高效检索并智能推荐给用户,成为提升用户体验和站点粘性的关键。传统的手动标签方式不仅效率低下,而且难以保持一致性。本文将详细介绍如何为WordPress站点构建一个柔性内容智能标签系统,该系统能够自动分析内容语义,智能生成标签,并随着内容变化自适应调整。

系统架构设计

整体架构概述

我们的智能标签系统将采用模块化设计,包含以下核心组件:

  1. 内容分析模块:使用自然语言处理技术提取内容关键词
  2. 标签生成模块:基于关键词和现有标签库生成新标签
  3. 标签管理模块:管理标签的增删改查和权重计算
  4. API接口模块:为其他插件和主题提供标签数据接口

技术栈选择

  • PHP 7.4+ 作为后端主要语言
  • MySQL 5.7+ 用于数据存储
  • Python 3.8+ 用于自然语言处理(可选)
  • WordPress REST API 用于前后端通信
  • JavaScript/jQuery 用于前端交互

数据库设计

核心数据表结构

-- 智能标签系统核心表结构
CREATE TABLE IF NOT EXISTS `wp_intelligent_tags` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `tag_name` varchar(100) NOT NULL COMMENT '标签名称',
  `tag_slug` varchar(100) NOT NULL COMMENT '标签别名',
  `tag_description` text COMMENT '标签描述',
  `tag_weight` float DEFAULT 1.0 COMMENT '标签权重,用于排序和推荐',
  `related_tags` text COMMENT '相关标签ID,JSON格式',
  `usage_count` int(11) DEFAULT 0 COMMENT '使用次数',
  `auto_generated` tinyint(1) DEFAULT 0 COMMENT '是否自动生成:0-手动,1-自动',
  `created_at` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `updated_at` datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `tag_slug` (`tag_slug`),
  KEY `tag_weight` (`tag_weight`),
  KEY `usage_count` (`usage_count`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='智能标签主表';

-- 内容与标签关联表
CREATE TABLE IF NOT EXISTS `wp_content_tag_relations` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `content_id` int(11) NOT NULL COMMENT '内容ID(文章/页面)',
  `tag_id` int(11) NOT NULL COMMENT '标签ID',
  `relevance_score` float DEFAULT 1.0 COMMENT '关联度分数',
  `created_at` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  PRIMARY KEY (`id`),
  KEY `content_id` (`content_id`),
  KEY `tag_id` (`tag_id`),
  UNIQUE KEY `content_tag_unique` (`content_id`, `tag_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='内容与标签关联表';

核心功能实现

1. 内容分析模块

<?php
/**
 * 内容分析类 - 提取内容关键词
 */
class ContentAnalyzer {
    
    /**
     * 从文章内容中提取关键词
     * @param string $content 文章内容
     * @param int $max_keywords 最大关键词数量
     * @return array 关键词数组
     */
    public function extractKeywords($content, $max_keywords = 10) {
        // 清理HTML标签
        $clean_content = strip_tags($content);
        
        // 去除特殊字符和数字
        $clean_content = preg_replace('/[0-9W]+/', ' ', $clean_content);
        
        // 中文内容处理(如果站点主要是中文)
        if ($this->isChineseContent($clean_content)) {
            return $this->extractChineseKeywords($clean_content, $max_keywords);
        }
        
        // 英文内容处理
        return $this->extractEnglishKeywords($clean_content, $max_keywords);
    }
    
    /**
     * 提取英文关键词
     */
    private function extractEnglishKeywords($content, $max_keywords) {
        // 转换为小写
        $content = strtolower($content);
        
        // 分割为单词数组
        $words = str_word_count($content, 1);
        
        // 停用词过滤
        $stop_words = $this->getEnglishStopWords();
        $words = array_diff($words, $stop_words);
        
        // 统计词频
        $word_counts = array_count_values($words);
        
        // 按词频排序
        arsort($word_counts);
        
        // 取前N个关键词
        $keywords = array_slice(array_keys($word_counts), 0, $max_keywords);
        
        return $keywords;
    }
    
    /**
     * 提取中文关键词(简化版,实际应使用分词库)
     */
    private function extractChineseKeywords($content, $max_keywords) {
        // 这里使用简单的中文分词方法
        // 实际项目中建议使用结巴分词等专业库
        
        // 移除空格和标点
        $content = preg_replace('/s+/', '', $content);
        $content = preg_replace('/[^x{4e00}-x{9fa5}]/u', '', $content);
        
        // 简单按字符分割(实际应使用分词算法)
        $words = [];
        $len = mb_strlen($content, 'UTF-8');
        
        for ($i = 0; $i < $len; $i++) {
            $word = mb_substr($content, $i, 2, 'UTF-8'); // 取2个字符作为词
            if (mb_strlen($word, 'UTF-8') == 2) {
                $words[] = $word;
            }
        }
        
        // 统计词频
        $word_counts = array_count_values($words);
        arsort($word_counts);
        
        return array_slice(array_keys($word_counts), 0, $max_keywords);
    }
    
    /**
     * 获取英文停用词列表
     */
    private function getEnglishStopWords() {
        return ['a', 'an', 'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were'];
    }
    
    /**
     * 判断是否为中文内容
     */
    private function isChineseContent($content) {
        // 简单判断:如果中文字符超过30%,认为是中文内容
        $chinese_chars = preg_match_all('/[x{4e00}-x{9fa5}]/u', $content, $matches);
        $total_chars = strlen($content);
        
        return ($chinese_chars / max($total_chars, 1)) > 0.3;
    }
}
?>

2. 智能标签生成器

<?php
/**
 * 智能标签生成器类
 */
class IntelligentTagGenerator {
    
    private $db;
    private $analyzer;
    
    public function __construct() {
        global $wpdb;
        $this->db = $wpdb;
        $this->analyzer = new ContentAnalyzer();
    }
    
    /**
     * 为文章生成智能标签
     * @param int $post_id 文章ID
     * @return array 生成的标签ID数组
     */
    public function generateTagsForPost($post_id) {
        // 获取文章内容
        $post = get_post($post_id);
        if (!$post) {
            return [];
        }
        
        // 提取关键词
        $content = $post->post_title . ' ' . $post->post_content;
        $keywords = $this->analyzer->extractKeywords($content, 15);
        
        $generated_tag_ids = [];
        
        foreach ($keywords as $keyword) {
            // 检查标签是否已存在
            $tag_id = $this->findOrCreateTag($keyword, $post_id);
            
            if ($tag_id) {
                $generated_tag_ids[] = $tag_id;
                
                // 建立文章与标签的关联
                $this->createTagRelation($post_id, $tag_id);
            }
        }
        
        // 更新标签权重
        $this->updateTagWeights($generated_tag_ids);
        
        return $generated_tag_ids;
    }
    
    /**
     * 查找或创建标签
     */
    private function findOrCreateTag($tag_name, $source_post_id) {
        // 清理标签名
        $tag_slug = sanitize_title($tag_name);
        
        // 检查是否已存在
        $existing_tag = get_term_by('slug', $tag_slug, 'post_tag');
        
        if ($existing_tag) {
            // 更新使用次数
            $this->incrementTagUsage($existing_tag->term_id);
            return $existing_tag->term_id;
        }
        
        // 创建新标签
        $new_tag = wp_insert_term(
            $tag_name,
            'post_tag',
            [
                'slug' => $tag_slug,
                'description' => '智能生成标签 - 来源文章ID: ' . $source_post_id
            ]
        );
        
        if (is_wp_error($new_tag)) {
            error_log('创建标签失败: ' . $new_tag->get_error_message());
            return false;
        }
        
        // 标记为自动生成标签
        update_term_meta($new_tag['term_id'], 'auto_generated', 1);
        update_term_meta($new_tag['term_id'], 'usage_count', 1);
        update_term_meta($new_tag['term_id'], 'tag_weight', 1.0);
        
        return $new_tag['term_id'];
    }
    
    /**
     * 创建标签关联
     */
    private function createTagRelation($post_id, $tag_id) {
        $table_name = $this->db->prefix . 'content_tag_relations';
        
        // 检查是否已存在关联
        $existing = $this->db->get_var($this->db->prepare(
            "SELECT id FROM $table_name WHERE content_id = %d AND tag_id = %d",
            $post_id, $tag_id
        ));
        
        if (!$existing) {
            $this->db->insert(
                $table_name,
                [
                    'content_id' => $post_id,
                    'tag_id' => $tag_id,
                    'relevance_score' => 1.0
                ],
                ['%d', '%d', '%f']
            );
        }
    }
    
    /**
     * 增加标签使用计数
     */
    private function incrementTagUsage($tag_id) {
        $usage_count = get_term_meta($tag_id, 'usage_count', true);
        $usage_count = $usage_count ? intval($usage_count) + 1 : 1;
        update_term_meta($tag_id, 'usage_count', $usage_count);
    }
    
    /**
     * 更新标签权重
     */
    private function updateTagWeights($tag_ids) {
        foreach ($tag_ids as $tag_id) {
            $usage_count = get_term_meta($tag_id, 'usage_count', true);
            $usage_count = $usage_count ? intval($usage_count) : 1;
            
            // 简单权重计算:使用次数的对数,防止权重过大
            $weight = log($usage_count + 1);
            update_term_meta($tag_id, 'tag_weight', $weight);
        }
    }
}
?>

前端展示组件

智能标签云展示

/**
 * 智能标签云展示组件
 */
class IntelligentTagCloud {
    constructor(containerId, options = {}) {
        this.container = document.getElementById(containerId);
        this.options = Object.assign({
            maxTags: 50,
            minFontSize: 12,
            maxFontSize: 36,
            colors: ['#3B82F6', '#10B981', '#EF4444', '#F59E0B', '#8B5CF6']
        }, options);
        
        this.init();
    }
    
    async init() {
        // 从API获取标签数据
        const tags = await this.fetchTagData();
        
        // 渲染标签云
        this.renderTagCloud(tags);
    }
    
    async fetchTagData() {
        try {
            const response = await fetch('/wp-json/intelligent-tags/v1/cloud');
            const data = await response.json();
            return data.tags || [];
        } catch (error) {
            console.error('获取标签数据失败:', error);
            return [];
        }
    }
    
    renderTagCloud(tags) {
        if (!tags.length) {
            this.container.innerHTML = '<p>暂无标签数据</p>';
            return;
        }
        
        // 按权重排序并限制数量
        tags.sort((a, b) => b.weight - a.weight);
        tags = tags.slice(0, this.options.maxTags);
        
        // 计算权重范围
        const weights = tags.map(tag => tag.weight);
        const minWeight = Math.min(...weights);
        const maxWeight = Math.max(...weights);
        const weightRange = maxWeight - minWeight;
        
        // 清空容器
        this.container.innerHTML = '';
        
        // 创建标签元素
        tags.forEach(tag => {
            const tagElement = this.createTagElement(tag, minWeight, weightRange);
            this.container.appendChild(tagElement);
        });
    }
    
    createTagElement(tag, minWeight, weightRange) {
        const a = document.createElement('a');
        a.href = tag.link;
        a.textContent = tag.name;
        a.title = `相关文章: ${tag.count}篇`;
        
        // 根据权重计算字体大小
        const fontSize = this.calculateFontSize(tag.weight, minWeight, weightRange);
        a.style.fontSize = `${fontSize}px`;
        
        // 随机选择颜色
        const colorIndex = Math.floor(Math.random() * this.options.colors.length);
        a.style.color = this.options.colors[colorIndex];
        
        // 添加悬停效果
        a.style.transition = 'all 0.3s ease';
        a.addEventListener('mouseenter', () => {
            a.style.transform = 'scale(1.1)';
            a.style.textShadow = '2px 2px 4px rgba(0,0,0,0.2)';
        });
        
        a.addEventListener('mouseleave', () => {
            a.style.transform = 'scale(1)';
            a.style.textShadow = 'none';
        });
        
        return a;
    }
    
    calculateFontSize(weight, minWeight, weightRange) {
        if (weightRange === 0) {
            return (this.options.minFontSize + this.options.maxFontSize) / 2;
        }
        
        // 线性计算字体大小
        const ratio = (weight - minWeight) / weightRange;
        return this.options.minFontSize + 
               ratio * (this.options.maxFontSize - this.options.minFontSize);
    }
}

// 初始化标签云
document.addEventListener('DOMContentLoaded', () => {
    const tagCloud = new IntelligentTagCloud('intelligent-tag-cloud', {
        maxTags: 30,
        minFontSize: 14,
        maxFontSize: 32
    });
});

系统集成与优化

WordPress插件集成

<?php
/**
 * Plugin Name: 智能标签系统
 * Description: 为WordPress站点提供智能内容标签功能
 * Version: 1.0.0
 * Author: 网络传媒技术团队
 */

// 主插件类
class IntelligentTagsPlugin {
    
    private static $instance = null;
    private $tag_generator;
    
    public static function getInstance() {
        if (null === self::$instance) {
            self::$instance = new self();
        }
        return self::$instance;
    }
    
    private function __construct() {
        $this->init_hooks();
        $this->tag_generator = new IntelligentTagGenerator();
    }
    
    private function init_hooks() {
        // 文章保存时自动生成标签
        add_action('save_post', [$this, 'on_post_save'], 10, 3);
        
        // 添加管理菜单
        add_action('admin_menu', [$this, 'add_admin_menu']);
        
        // 注册REST API
        add_action('rest_api_init', [$this, 'register_rest_routes']);
        
        // 添加短代码
        add_shortcode('intelligent_tag_cloud', [$this, 'tag_cloud_shortcode']);
    }
    
    public function on_post_save($post_id, $post, $update) {
        // 跳过自动保存和修订
        if (wp_is_post_autosave($post_id) || wp_is_post_revision($post_id)) {
            return;
        }
        
        // 只处理文章类型
        if ($post->post_type !== 'post') {
            return;
        }
        
        // 检查用户权限
        if (!current_user_can('edit_post', $post_id)) {
            return;
        }
        
        // 生成智能标签
        $this->tag_generator->generateTagsForPost($post_id);
    }
    
    public function add_admin_menu() {
        add_options_page(
            '智能标签设置',
            '智能标签',
            'manage_options',
            'intelligent-tags-settings',
            [$this, 'render_settings_page']
        );
    }
    
    public function render_settings_page() {
        ?>
        <div class="wrap">
            <h1>智能标签系统设置</h1>
            <form method="post" action="options.php">
                <?php
                settings_fields('intelligent_tags_options');
                do_settings_sections('intelligent_tags_settings');
                submit_button();
                ?>
            </form>
            
            <div class="card">
                <h2>系统状态</h2>
                <p>总标签数: <?php echo $this->get_total_tags_count(); ?></p>
                <p>智能生成标签数: <?php echo $this->get_auto_generated_tags_count(); ?></p>
                <p>标签关联总数: <?php echo $this->get_total_relations_count(); ?></p>
            </div>
        <?php
    }
    
    public function register_rest_routes() {
        register_rest_route('intelligent-tags/v1', '/cloud', [
            'methods' => 'GET',
            'callback' => [$this, 'get_tag_cloud_data'],
            'permission_callback' => '__return_true'
        ]);
        
        register_rest_route('intelligent-tags/v1', '/related/(?P<id>d+)', [
            'methods' => 'GET',
            'callback' => [$this, 'get_related_tags'],
            'permission_callback' => '__return_true'
        ]);
    }
    
    public function get_tag_cloud_data() {
        global $wpdb;
        
        $tags = $wpdb->get_results("
            SELECT t.term_id as id, t.name, t.slug, 
                   COUNT(tr.object_id) as count,
                   tm.meta_value as weight
            FROM {$wpdb->terms} t
            INNER JOIN {$wpdb->term_taxonomy} tt ON t.term_id = tt.term_id
            INNER JOIN {$wpdb->term_relationships} tr ON tt.term_taxonomy_id = tr.term_taxonomy_id
            LEFT JOIN {$wpdb->termmeta} tm ON t.term_id = tm.term_id AND tm.meta_key = 'tag_weight'
            WHERE tt.taxonomy = 'post_tag'
            GROUP BY t.term_id
            ORDER BY weight DESC
            LIMIT 100
        ");
        
        $formatted_tags = array_map(function($tag) {
            return [
                'id' => (int)$tag->id,
                'name' => $tag->name,
                'slug' => $tag->slug,
                'count' => (int)$tag->count,
                'weight' => $tag->weight ? (float)$tag->weight : 1.0,
                'link' => get_term_link((int)$tag->id, 'post_tag')
            ];
        }, $tags);
        
        return [
            'success' => true,
            'count' => count($formatted_tags),
            'tags' => $formatted_tags
        ];
    }
    
    public function get_related_tags($request) {
        $tag_id = $request['id'];
        
        // 获取当前标签信息
        $tag = get_term($tag_id, 'post_tag');
        if (is_wp_error($tag)) {
            return ['success' => false, 'message' => '标签不存在'];
        }
        
        // 获取相关文章
        $related_posts = get_posts([
            'tag_id' => $tag_id,
            'posts_per_page' => 20,
            'fields' => 'ids'
        ]);
        
        // 从相关文章中提取其他标签
        $related_tags = [];
        foreach ($related_posts as $post_id) {
            $post_tags = wp_get_post_tags($post_id);
            foreach ($post_tags as $post_tag) {
                if ($post_tag->term_id != $tag_id) {
                    if (!isset($related_tags[$post_tag->term_id])) {
                        $related_tags[$post_tag->term_id] = [
                            'tag' => $post_tag,
                            'count' => 0
                        ];
                    }
                    $related_tags[$post_tag->term_id]['count']++;
                }
            }
        }
        
        // 按共现次数排序
        usort($related_tags, function($a, $b) {
            return $b['count'] - $a['count'];
        });
        
        // 格式化返回数据
        $formatted_tags = array_map(function($item) {
            return [
                'id' => $item['tag']->term_id,
                'name' => $item['tag']->name,
                'count' => $item['count'],
                'link' => get_term_link($item['tag']->term_id, 'post_tag')
            ];
        }, array_slice($related_tags, 0, 10));
        
        return [
            'success' => true,
            'current_tag' => $tag->name,
            'related_tags' => $formatted_tags
        ];
    }
    
    public function tag_cloud_shortcode($atts) {
        $atts = shortcode_atts([
            'max_tags' => 30,
            'min_size' => 14,
            'max_size' => 32
        ], $atts);
        
        // 生成唯一ID
        $container_id = 'tag-cloud-' . uniqid();
        
        // 输出容器和脚本
        ob_start();
        ?>
        <div id="<?php echo esc_attr($container_id); ?>" class="intelligent-tag-cloud"></div>
        
        <script>
        document.addEventListener('DOMContentLoaded', function() {
            // 内联脚本确保依赖
            if (typeof IntelligentTagCloud === 'undefined') {
                console.error('IntelligentTagCloud 类未定义');
                return;
            }
            
            new IntelligentTagCloud('<?php echo $container_id; ?>', {
                maxTags: <?php echo intval($atts['max_tags']); ?>,
                minFontSize: <?php echo intval($atts['min_size']); ?>,
                maxFontSize: <?php echo intval($atts['max_size']); ?>
            });
        });
        </script>
        
        <style>
        .intelligent-tag-cloud {
            text-align: center;
            padding: 20px;
            line-height: 2.5;
        }
        
        .intelligent-tag-cloud a {
            margin: 0 8px;
            text-decoration: none;
            display: inline-block;
            padding: 4px 8px;
            border-radius: 4px;
            transition: all 0.3s ease;
        }
        
        .intelligent-tag-cloud a:hover {
            background-color: rgba(59, 130, 246, 0.1);
            transform: translateY(-2px);
        }
        </style>
        <?php
        return ob_get_clean();
    }
    
    private function get_total_tags_count() {
        global $wpdb;
        return $wpdb->get_var("SELECT COUNT(*) FROM {$wpdb->terms} t 
                              INNER JOIN {$wpdb->term_taxonomy} tt ON t.term_id = tt.term_id 
                              WHERE tt.taxonomy = 'post_tag'");
    }
    
    private function get_auto_generated_tags_count() {
        global $wpdb;
        return $wpdb->get_var($wpdb->prepare(
            "SELECT COUNT(DISTINCT term_id) FROM {$wpdb->termmeta} 
             WHERE meta_key = 'auto_generated' AND meta_value = %d",
            1
        ));
    }
    
    private function get_total_relations_count() {
        global $wpdb;
        return $wpdb->get_var("SELECT COUNT(*) FROM {$wpdb->term_relationships}");
    }
}

// 初始化插件
add_action('plugins_loaded', function() {
    IntelligentTagsPlugin::getInstance();
});

// 激活插件时创建数据表
register_activation_hook(__FILE__, 'intelligent_tags_activate');
function intelligent_tags_activate() {
    global $wpdb;
    
    require_once(ABSPATH . 'wp-admin/includes/upgrade.php');
    
    // 创建自定义表
    $charset_collate = $wpdb->get_charset_collate();
    
    $sql = "CREATE TABLE IF NOT EXISTS {$wpdb->prefix}content_tag_relations (
        id bigint(20) NOT NULL AUTO_INCREMENT,
        content_id bigint(20) NOT NULL,
        tag_id bigint(20) NOT NULL,
        relevance_score float DEFAULT 1.0,
        created_at datetime DEFAULT CURRENT_TIMESTAMP,
        PRIMARY KEY (id),
        UNIQUE KEY content_tag_unique (content_id, tag_id),
        KEY content_id (content_id),
        KEY tag_id (tag_id)
    ) $charset_collate;";
    
    dbDelta($sql);
    
    // 添加默认选项
    add_option('intelligent_tags_version', '1.0.0');
    add_option('intelligent_tags_auto_generate', 'yes');
    add_option('intelligent_tags_min_word_length', 2);
}
?>

高级功能扩展

机器学习标签优化

# ml_tag_optimizer.py
# 使用Python进行标签优化(需要WordPress REST API支持)

import requests
import json
from collections import Counter
import jieba  # 中文分词库
import jieba.analyse
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import numpy as np

class MLTagOptimizer:
    def __init__(self, wordpress_url, username, password):
        self.wp_url = wordpress_url
        self.auth = (username, password)
        self.api_base = f"{wordpress_url}/wp-json/wp/v2"
        
    def fetch_posts(self, per_page=100):
        """从WordPress获取文章数据"""
        posts = []
        page = 1
        
        while True:
            response = requests.get(
                f"{self.api_base}/posts",
                params={
                    'per_page': per_page,
                    'page': page,
                    '_fields': 'id,title,content,excerpt,tags'
                },
                auth=self.auth
            )
            
            if response.status_code != 200:
                break
                
            batch = response.json()
            if not batch:
                break
                
            posts.extend(batch)
            page += 1
            
        return posts
    
    def extract_keywords_tfidf(self, posts, max_keywords=20):
        """使用TF-IDF提取关键词"""
        # 准备文本数据
        texts = []
        for post in posts:
            text = f"{post['title']['rendered']} {post['excerpt']['rendered']}"
            # 清理HTML标签
            text = self.clean_html(text)
            texts.append(text)
        
        # 中文分词
        chinese_texts = [' '.join(jieba.cut(text)) for text in texts]
        
        # 计算TF-IDF
        vectorizer = TfidfVectorizer(max_features=1000)
        tfidf_matrix = vectorizer.fit_transform(chinese_texts)
        
        # 获取特征词
        feature_names = vectorizer.get_feature_names_out()
        
        # 为每篇文章提取关键词
        all_keywords = []
        for i in range(len(posts)):
            tfidf_scores = tfidf_matrix[i].toarray()[0]
            top_indices = tfidf_scores.argsort()[-max_keywords:][::-1]
            keywords = [feature_names[idx] for idx in top_indices if tfidf_scores[idx] > 0]
            all_keywords.extend(keywords)
        
        # 统计关键词频率
        keyword_counter = Counter(all_keywords)
        return keyword_counter.most_common(50)
    
    def cluster_tags(self, tags, n_clusters=10):
        """对标签进行聚类分析"""
        # 获取每个标签的相关文章
        tag_vectors = []
        tag_names = []
        
        for tag in tags[:100]:  # 限制数量
            response = requests.get(
                f"{self.api_base}/posts",
                params={'tags': tag['id'], 'per_page': 1},
                auth=self.auth
            )
            
            if response.status_code == 200:
                # 简化处理:使用标签ID作为特征
                tag_vectors.append([tag['id']])
                tag_names.append(tag['name'])
        
        if len(tag_vectors) < n_clusters:
            return {}
        
        # K-means聚类
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = kmeans.fit_predict(tag_vectors)
        
        # 组织聚类结果
        clustered_tags = {}
        for i, cluster_id in enumerate(clusters):
            if cluster_id not in clustered_tags:
                clustered_tags[cluster_id] = []
            clustered_tags[cluster_id].append(tag_names[i])
        
        return clustered_tags
    
    def clean_html(self, text):
        """清理HTML标签"""
        import re
        clean = re.compile('<.*?>')
        return re.sub(clean, '', text)
    
    def optimize_tags_for_post(self, post_id):
        """为单篇文章优化标签"""
        # 获取文章内容
        response = requests.get(
            f"{self.api_base}/posts/{post_id}",
            auth=self.auth
        )
        
        if response.status_code != 200:
            return []
        
        post = response.json()
        content = f"{post['title']['rendered']} {post['content']['rendered']}"
        content = self.clean_html(content)
        
        # 使用TextRank算法提取关键词
        keywords = jieba.analyse.textrank(
            content, 
            topK=10, 
            withWeight=True,
            allowPOS=('n', 'vn', 'v', 'ns', 'nr')  # 只保留名词、动词等
        )
        
        return keywords

# 使用示例
if __name__ == "__main__":
    optimizer = MLTagOptimizer(
        wordpress_url="https://your-site.com",
        username="your_username",
        password="your_password"
    )
    
    # 获取文章并分析
    posts = optimizer.fetch_posts(per_page=50)
    keywords = optimizer.extract_keywords_tfidf(posts)
    
    print("Top 20关键词:")
    for keyword, count in keywords[:20]:
        print(f"{keyword}: {count}")

性能优化与缓存策略

<?php
/**
 * 智能标签缓存类
 */
class IntelligentTagCache {
    
    private $cache_group = 'intelligent_tags';
    private $cache_expiration = 3600; // 1小时
    
    /**
     * 获取标签云数据(带缓存)
     */
    public function get_tag_cloud($force_refresh = false) {
        $cache_key = 'tag_cloud_data';
        
        // 尝试从缓存获取
        if (!$force_refresh) {
            $cached = wp_cache_get($cache_key, $this->cache_group);
            if ($cached !== false) {
                return $cached;
            }
        }
        
        // 重新生成数据
        $data = $this->generate_tag_cloud_data();
        
        // 存入缓存
        wp_cache_set($cache_key, $data, $this->cache_group, $this->cache_expiration);
        
        return $data;
    }
    
    /**
     * 生成标签云数据
     */
    private function generate_tag_cloud_data() {
        global $wpdb;
        
        // 使用更高效的查询
        $tags = $wpdb->get_results("
            SELECT t.term_id, t.name, t.slug, 
                   tt.count as post_count,
                   COALESCE(tm.meta_value, 1.0) as weight,
                   tm2.meta_value as is_auto
            FROM {$wpdb->terms} t
            INNER JOIN {$wpdb->term_taxonomy} tt 
                ON t.term_id = tt.term_id 
                AND tt.taxonomy = 'post_tag'
            LEFT JOIN {$wpdb->termmeta} tm 
                ON t.term_id = tm.term_id 
                AND tm.meta_key = 'tag_weight'
            LEFT JOIN {$wpdb->termmeta} tm2 
                ON t.term_id = tm2.term_id 
                AND tm2.meta_key = 'auto_generated'
            WHERE tt.count > 0
            ORDER BY weight DESC
            LIMIT 100
        ");
        
        return array_map(function($tag) {
            return [
                'id' => (int)$tag->term_id,
                'name' => $tag->name,
                'slug' => $tag->slug,
                'count' => (int)$tag->post_count,
                'weight' => (float)$tag->weight,
                'auto_generated' => (bool)$tag->is_auto,
                'link' => get_term_link((int)$tag->term_id, 'post_tag')
            ];
        }, $tags);
    }
    
    /**
     * 清除所有缓存
     */
    public function clear_all_cache() {
        wp_cache_flush_group($this->cache_group);
        
        // 清除对象缓存
        $this->clear_object_cache();
        
        // 记录清除日志
        $this->log_cache_clear();
    }
    
    /**
     * 清除对象缓存
     */
    private function clear_object_cache() {
        // 清除相关的transient
        delete_transient('intelligent_tags_cloud');
        delete_transient('intelligent_tags_stats');
        
        // 清除分类缓存
        clean_term_cache([], 'post_tag');
    }
    
    /**
     * 记录缓存清除日志
     */
    private function log_cache_clear() {
        $log_entry = [
            'time' => current_time('mysql'),
            'action' => 'cache_clear',
            'user' => get_current_user_id()
        ];
        
        $logs = get_option('intelligent_tags_cache_logs', []);
        $logs[] = $log_entry;
        
        // 只保留最近50条日志
        if (count($logs) > 50) {
            $logs = array_slice($logs, -50);
        }
        
        update_option('intelligent_tags_cache_logs', $logs, false);
    }
    
    /**
     * 获取缓存统计信息
     */
    public function get_cache_stats() {
        global $wpdb;
        
        $stats = [
            'total_tags' => (int)wp_cache_get('total_tags_count', $this->cache_group),
            'cache_hits' => (int)get_option('intelligent_tags_cache_hits', 0),
            'cache_misses' => (int)get_option('intelligent_tags_cache_misses', 0),
            'last_cleared' => get_option('intelligent_tags_last_cleared', '从未清理')
        ];
        
        // 计算命中率
        $total_requests = $stats['cache_hits'] + $stats['cache_misses'];
        $stats['hit_rate'] = $total_requests > 0 
            ? round(($stats['cache_hits'] / $total_requests) * 100, 2)
            : 0;
        
        return $stats;
    }
}

/**
 * 数据库查询优化类
 */
class TagQueryOptimizer {
    
    /**
     * 优化标签查询
     */
    public static function optimize_queries() {
        global $wpdb;
        
本文来自网络,不代表柔性供应链服务中心立场,转载请注明出处:https://mall.org.cn/6395.html

EXCHANGES®作者

上一篇
下一篇

为您推荐

发表回复

联系我们

联系我们

18559313275

在线咨询: QQ交谈

邮箱: vip@exchanges.center

工作时间:周一至周五,9:00-17:30,节假日休息
返回顶部