php 自动提取tag

使用外部API提取

<?php


/**
 * Retrieve only the body from the raw response.
 *
 * @since 2.7.0
 *
 * @param array|WP_Error $response HTTP response.
 * @return string The body of the response. Empty string if no body or incorrect parameter given.
 */
function wp_remote_retrieve_body( $response ) {
    if (! isset( $response['body'] ) ) {
        return '';
    }

    return $response['body'];
}


/**
 * CURL POST数据
 * @param  string  $url       发送地址
 * @param  array   $post_data 发送数组
 * @param  integer $timeout   超时秒
 * @param  string  $header    头信息
 * @return string
 */
function curlPost($url, $post_data=array(), $timeout=100,$header="") {
    $header=empty($header)?'':$header;
    $post_string = http_build_query($post_data);
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_POST, true);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $post_string);
    curl_setopt($ch, CURLOPT_URL, $url);

    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);

    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    //curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    //curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
    curl_setopt($ch, CURLOPT_REFERER, $_SERVER['HTTP_HOST']);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array($header));//模拟的header头
    $result = curl_exec($ch);
    $error=curl_errno($ch);
    curl_close($ch);

    echo "<pre>";
    print_r($error);
    echo "</pre>";
    return $result;
}


function wp_aatags_html2text($ep) {
    $search = array("'<script[^>]*?>.*?</script>'si", "'<[\/\!]*?[^<>]*?>'si", "'([\r\n])[\s]+'", "'&(quot|#34|#034|#x22);'i", "'&(amp|#38|#038|#x26);'i", "'&(lt|#60|#060|#x3c);'i", "'&(gt|#62|#062|#x3e);'i", "'&(nbsp|#160|#xa0);'i", "'&(iexcl|#161);'i", "'&(cent|#162);'i", "'&(pound|#163);'i", "'&(copy|#169);'i", "'&(reg|#174);'i", "'&(deg|#176);'i", "'&(#39|#039|#x27);'", "'&(euro|#8364);'i", "'&a(uml|UML);'", "'&o(uml|UML);'", "'&u(uml|UML);'", "'&A(uml|UML);'", "'&O(uml|UML);'", "'&U(uml|UML);'", "'&szlig;'i");
    $replace = array("", "", "\\1", "\"", "&", "<", ">", " ", chr(161), chr(162), chr(163), chr(169), chr(174), chr(176), chr(39), chr(128), "ä", "ö", "ü", "Ä", "Ö", "Ü", "ß");
    return preg_replace($search, $replace, $ep);
}

function wp_aatags_sanitize($taglist) {
    $special_chars = array('?', '、', '。', '“', '”', '《', '》', '!', ',', ':', '?', '.', '[', ']', '/', '\\', '\=', '<', '>', ':', ';', '\'', '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', chr(0));
    /**
     * Filter the list of characters to remove from a taglist.
     * @param array  $special_chars Characters to remove.
     */
    $taglist = preg_replace("#\x{00a0}#siu", ' ', $taglist);
    $taglist = str_replace($special_chars, '', $taglist);
    $taglist = str_replace(array('%20', '+'), '-', $taglist);
    $taglist = preg_replace('/[\d]+/', '', $taglist);
    $taglist = preg_replace('/[\r\n\t -]+/', '-', $taglist);
    $taglist = trim($taglist, ',-_');
    return $taglist;
}

function wp_aatags_keycontents($keys, $num) {
    $request = curlPost('https://cws.9sep.org/extract/json', array('text' => $keys, 'topk' => $num),array('Content-Type: application/json'));
    echo "<pre>";
    print_r($request);
    echo "</pre>";
    exit;
    if (! isset( $response['response'] ) || ! is_array( $response['response'] )  && $response['response'] != 200) {
        return 'rEr';
    }else{
        return wp_remote_retrieve_body($request);
    }


}

$content='内容';
$body = wp_aatags_keycontents(wp_aatags_html2text($content), 1);
wp_aatags_keycontents($content,3);

?>

 

posted @ 2021-10-11 09:58  周小黑  阅读(209)  评论(0编辑  收藏  举报