[coreseek/sphinx学习笔记6]--实例代码

我的梦境 提交于 2019-12-01 21:26:15
[这是本人工作中实现的coreseek/sphinx搜索,稍做了些修改,希望大家提出优化意见]


1.环境说明
安装目录:/var/coreseek/coreseek
配置文件:(./etc/sphinx.conf)

#searchd服务配置
searchd
{
        port    = 9312
        log    = /var/coreseek/coreseek/var/log/searchd.log
        query_log    = /var/coreseek/coreseek/var/log/query.log
        read_timeout    = 5
        max_children    = 30
        pid_file    = /var/coreseek/coreseek/var/log/searchd.pid
        max_matches    = 1000
        seamless_rotate    = 1
        preopen_indexes    = 0
        unlink_old    = 1
        listen    = localhost:9306:mysql41
}

#数据源配置
source ztm_sheet_xml_source
{
        type    = xmlpipe
        xmlpipe_command    = cat /var/www/htdocs/myweb/application/data/sphinxforsheet.pipe.xml
        xmlpipe_field     = sh_title
        xmlpipe_field     = sh_intro
        xmlpipe_attr_uint    = sh_id
        xmlpipe_attr_timestamp  = sh_time
        xmlpipe_attr_uint               = sh_uid
        xmlpipe_attr_uint               = sh_subject
        xmlpipe_attr_uint               = sh_question_count
        xmlpipe_attr_uint               = sh_note_count
        xmlpipe_attr_uint               = sh_view_count
        xmlpipe_attr_uint               = sh_share_count
        xmlpipe_attr_uint               = sh_praise_count
        xmlpipe_attr_uint               = sh_download_count
        xmlpipe_attr_uint               = sh_admin_note
        xmlpipe_attr_uint               = sh_year
        xmlpipe_attr_uint               = sh_type
        xmlpipe_attr_uint               = sh_grade
        xmlpipe_attr_uint               = sh_city
}

#索引配置
index ztm_sheet_index
{
        source      = ztm_sheet_xml_source
        path        = /var/coreseek/coreseek/var/data/sheet
        docinfo     = extern
        charset_dictpath     =  /var/coreseek/mmseg3/etc
        charset_type    = zh_cn.utf-8
        ngram_len     = 0
}

2.建立索引的php代码(sphinxindex.php)
<?php
class script_sphinxindexforsheet {
    public static function init() {
        $content = self::getXmlPipeHead();
        $filename = "sphinxforsheet.pipe.xml";
        $f = fopen($filename, "w"); 
        if($f){
            fwrite($f, $content);
        }
        fclose($f);
    }

    public static function generateIndex() {
        self::init();
        $begin = time();
        $min_id = 1; // 最小的id值,最好使用程序获取
        $max_id = 100000;  // 最大的id值,最好使用程序获取

        $begin_id = $min_id;
        $limit = 3000;
        do{
            $time1 = time();
            unset($list);
            $list = self::getSheets($begin_id-1, $begin_id+$limit);
            $begin_id += $limit;
            usleep(300);
        }while(($begin_id-$limit)<=$max_id);
        self::appendXmlPipe(self::getXmlPipeFooter());
    }

    protected static function appendXmlPipe($content){
        $f = fopen("sphinxforsheet.pipe.xml", "a");
        if($f){
            fwrite($f, $content);
        }
        fclose($f);
    }

    protected static function getXmlPipeFooter(){
        $xml = "</sphinx:docset>\n";
        return $xml;
    }

    private static function getSheets($offset, $length) {
        $offset = $offset-1;
	$host = 'localhost:';
	$port = 3306;
	$user = 'root';
	$pwd = 'abc123';
	$dbname = 'test';
	$mysql = mysql_connect($host.$port, $user, $pwd) or die('Could not connect: ' . 
                    mysql_error());
	mysql_select_db($dbname);
	$sql = "select id,uid,create_time,title,intro,subject,question_count,note_count,view_count,share_count,praise_count,download_count,admin_note,type,year,city,grade from sheet where id>$offset and id<$length";
        $ret = mysql_query($sql);
	while ($row = mysql_fetch_assoc($ret)) {
	    $xml .="<sphinx:document id=\"" . $row['id'] . "\">\n";
            $xml .="<sh_title><![CDATA[" . $row['title'] ."]]></sh_title>\n";
            $xml .="<sh_intro><![CDATA[" . $row['intro'] ."]]></sh_intro>\n";
            $xml .="<sh_id>" . $row['id'] ."</sh_id>\n";
            $xml .="<sh_uid>" . $row['uid'] ."</sh_uid>\n";
            $xml .="<sh_time>" . $row['create_time'] ."</sh_time>\n";
            $xml .="<sh_subject>" . $row['subject'] ."</sh_subject>\n";
            $xml .="<sh_question_count>" . $row['question_count'] ."</sh_question_count>\n";
            $xml .="<sh_note_count>" . $row['note_count'] ."</sh_note_count>\n";
            $xml .="<sh_view_count>" . $row['view_count'] ."</sh_view_count>\n";
            $xml .="<sh_share_count>" . $row['share_count'] ."</sh_share_count>\n";
            $xml .="<sh_praise_count>" . $row['praise_count'] ."</sh_praise_count>\n";
            $xml .="<sh_download_count>" . $row['download_count'] ."</sh_download_count>\n";
            $xml .="<sh_admin_note>" . $row['admin_note'] ."</sh_admin_note>\n";
            $xml .="<sh_year>" . $row['year'] ."</sh_year>\n";
            $xml .="<sh_type>" . $row['type'] ."</sh_type>\n";
            $xml .="<sh_grade>" . $row['grade'] ."</sh_grade>\n";
            $xml .="<sh_city>" . $row['city'] ."</sh_city>\n";
            $xml .="</sphinx:document>\n";
	}
	mysql_close($mysql);
        self::appendXmlPipe($xml);
    }

    private static function getXmlPipeHead(){
        $xml =  "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
        $xml .= "<sphinx:docset>\n";
        $xml .= "<sphinx:schema>\n";
        $xml .= "<sphinx:field name=\"sh_title\"/>\n";
        $xml .= "<sphinx:field name=\"sh_intro\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_id\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_uid\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_time\" type=\"timestamp\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_subject\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_question_count\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_note_count\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_view_count\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_share_count\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_praise_count\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_download_count\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_admin_note\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_year\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_type\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_grade\" type=\"int\"/>\n";
        $xml .= "<sphinx:attr name=\"sh_city\" type=\"int\"/>\n";
        $xml .= "</sphinx:schema>\n";
        return $xml;
    }
}
script_sphinxindexforsheet::generateIndex();
?>

3.运行代码,建立索引

cd /var/www/htdocs/myweb/application/script
php sphinxindex.php & 
sleep 5
#index
cd /var/coreseek/coreseek
./bin/indexer -c etc/sphinx.conf --rotate ztm_sheet_index

4.查询(search.php)
<?php
require_once '/var/www/htdocs/myweb/application/lib/sphinx/sphinxapi.php';

class application_biz_searchsheet {
	public function query() {
		$verify = $_POST;
		$key = isset($_REQUEST['k'])?strval($_REQUEST['k']):'';
		$subject = isset($_REQUEST['s'])?strval($_REQUEST['s']):'';

		//年份范围
        $year_min = -1;
        $year_max = -1;
        if(isset($_REQUEST['ymin']) && isset($_REQUEST['ymax'])){
            $year_min = intval($_REQUEST['ymin']);
            $year_max = intval($_REQUEST['ymax']);
        }

        // 城市
        $city = isset($_REQUEST['c'])?intval($_REQUEST['c']):0;

        // 卷子类型
        $arr_type = isset($_REQUEST['t'])?strval($_REQUEST['t']):null;
        $arr_shtype = null;
        if($arr_type){
            $arr_shtype = array();
            foreach($arr_type as $type){
                $arr_shtype[] = intval($type);
            }
        }

        $grade = isset($_REQUEST['g'])?intval($_REQUEST['g']):-1; // 年级
        $page = isset($_REQUEST['p'])?intval($_REQUEST['p']):1; // 页数
        $desc_order_by = "sh_download_count";//默认按照时间排序
        if(isset($_REQUEST['desc'])){
            $desc_order_by = $_REQUEST['desc'];
        }
        $list = $this->search(array(
            'key'=>$key,
            'subject'=>$subject,
            'year_min'=>$year_min,
            'year_max'=>$year_max,
            'city' => $city,
            'arr_shtype'=>$arr_shtype,
            'grade'=>$grade,
            'page'=>$page,
            'pagenum'=>20, 
            'desc_order_by'=>$desc_order_by));

        return array(
            "verify"=>$verify,
            "list"=>$list,
        );
	}

	public function search($condition, $flag = false) {
		$res = $this->searchd($condition);
		$data = array();
		if ($res) {
			$total = $res['total'];
            $total_found = $res['total_found'];
            if($res && isset($res['matches'])) {
            	$matches = $res['matches'];
				// 根据 $matches 的值再去查询数据库,即可获得对应的详细数据
            	foreach ($matches as $row) {
            		$data['sheet'][] = "查询数据库获得的结果";
            	}
            }
            $pagenum = isset($condition['pagenum'])?$condition['pagenum']:10;
            $data['other']['total'] = $total;
            $data['other']['total_found'] = $total_found;
            $data['other']['total_page'] = $total%$pagenum==0?intval($total/$pagenum):intval($total/$pagenum)+1;
		}
		return $data;
	}

	public function searchd($condition) {
        // 关键字
        $key = isset($condition['key'])?strval($condition['key']):'';
        // 当前科目域名
        $subject = isset($condition['subject'])?trim(strval($condition['subject'])):'';
        // 年份下限
        $year_min = isset($condition['year_min'])?intval($condition['year_min']):-1;
        // 年份上限
        $year_max = isset($condition['year_max'])?intval($condition['year_max']):-1;
        // 城市
        $city = isset($condition['city'])?intval($condition['city']):0;
        // 卷子类型
        $arr_shtype = isset($condition['arr_shtype'])?$condition['arr_shtype']:null;
        // 年级
        $grade = isset($condition['grade'])?intval($condition['grade']):-1;
        //分页,从1开始
        $page = isset($condition['page'])?intval($condition['page']):1;
        //每页数量
        $pagenum = isset($condition['pagenum'])?intval($condition['pagenum']):10;
        //用来做降序排列的字段名称
        $desc_order_by = isset($condition['desc_order_by'])?$condition['desc_order_by']:null;//

        try {
        	$client = $this->connect();
            $client->SetMatchMode ( SPH_MATCH_EXTENDED2 );
            $client->SetRankingMode ( SPH_RANK_WORDCOUNT );
            if($desc_order_by){
                $client->SetSortMode (SPH_SORT_ATTR_DESC, $desc_order_by);
            } else {
                $client->SetSortMode ( SPH_SORT_RELEVANCE );
            }
            $sid = isset($condition['sid'])?$condition['sid']:0;
            if($sid>0){
                $client->SetFilter('sh_subject', array($sid));
            }
            // 年份范围
            if($year_min>-1 && $year_max>-1){
                $client->SetFilterRange('sh_year', $year_min, $year_max);
            }
            // 城市
            if($city>0){
                $client->SetFilter('sh_city', array($city));
            }
            // 卷子类型
            if($arr_shtype){
                $client->SetFilter('sh_type', $arr_shtype);
            }
            // 年级
            if($grade>-1){
                $client->SetFilter('sh_grade', array($grade));
            }
            // 分页
            $page = ($page < 1) ? 1 : $page;
            $offset = ($page-1)*$pagenum;
            $client->SetLimits($offset, $pagenum);
            $res = $client->Query ("$key", "ztm_sheet_index");
            return $res;
        } catch (Exception $e) {
            return false;
        }
	}

	protected function connect(){
        try{
            $host = '127.0.0.1';
            $port = 9312;
            $client = new SphinxClient ();
            $client->SetServer ($host, intval($port));
            return $client;
        } catch (Exception $e) {
            return false;
	   }
    }
}
?>
$obj = new application_biz_searchsheet();
$ret = $obj->query($_REQUEST);
$ret 就是搜索结果。

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!