html导入信息平台

mobile-first
Ycc 5 years ago
parent 0239e6d0c7
commit 898edafbb0

@ -1,27 +1,28 @@
<?php <?php
if (!defined('BASEPATH')) if (!defined('BASEPATH')) {
exit('No direct script access allowed'); exit('No direct script access allowed');
}
class Import extends CI_Controller class Import extends CI_Controller
{ {
public function __construct()
function __construct()
{ {
parent::__construct(); parent::__construct();
$this->permission->is_admin(); //$this->permission->is_admin();
//$this->output->enable_profiler(TRUE); //$this->output->enable_profiler(TRUE);
$this->load->model('Area_model'); // $this->load->model('Area_model');
$this->load->model('Import_model'); $this->load->model('Import_model');
$this->load->model('InfoContents_model'); // $this->load->model('InfoContents_model');
$this->load->model('InfoStructures_model'); // $this->load->model('InfoStructures_model');
$this->load->library('simple_html_dom_lib');
} }
public function index() public function index()
{ {
//@set_time_limit(0); //@set_time_limit(0);
ini_set('max_execution_time', '0'); ini_set('max_execution_time', '0');
// $this->Import_model->import_from_other_database(69690,63564); // $this->Import_model->import_from_other_database(69690,63564);
//$this->Import_model->import_clear(); //$this->Import_model->import_clear();
//$this->Import_model->import_info(); //$this->Import_model->import_info();
//$this->Import_model->import_xxoo(); //$this->Import_model->import_xxoo();
@ -36,9 +37,106 @@ class Import extends CI_Controller
echo $item->ic_url_title . '<br/>'; echo $item->ic_url_title . '<br/>';
}*/ }*/
} }
public function html_to_database($sitecode)
{
ini_set('max_execution_time', '0');
$file_array=$this->Import_model->not_done_html_info($sitecode);
if (empty($file_array)) {
echo 'HTML文件都处理完毕!';
return;
}
foreach ($file_array as $item) {
$h_id=$item->h_id;
$file=$item->h_file;
$h_done=1;
//$file='D:\wwwroot\gh-html\chinahighlights\xian\tour-planning-md.htm';
if (!is_file($file)) {
echo '文件不存在:'.$file;
continue;
}
$html_object=file_get_html($file);
if (empty($html_object)) {
echo '文件加载失败:'.$file;
continue;
}
$h_content=$html_object->find('div[id=mainContentRight]', 0);
if (empty($h_content)) {
$h_content=$html_object->find('div[id=content]', 0);
}
if (empty($h_content)) {
$h_content=$html_object->find('div[id=mainContent]', 0);
}
if (empty($h_content)) {
$h_content=$html_object->find('div[id=contentRight]', 0);
}
if (empty($h_content)) {
$h_content=$html_object->find('div[id=MainhotelContent]', 0);
}
if (empty($h_content)) {
$h_content=$html_object->find('div.col-md-19', 0);
}
if (empty($h_content)) {
$h_content=$html_object->find('div[id=historyLine]', 0);
}
if (empty($h_content)) {
$h_content=$html_object->find('div.container', 0);
}
if (empty($h_content)) {
$h_content='';
$h_done=2;
} else {
$h_content=$h_content->innertext;
}
$h_url=str_replace('\\', '/', $file);
$h_url=str_replace('D:/wwwroot/gh-html/chinahighlights/', 'https://www.chinahighlights.com/', $h_url);
$h_url=str_replace('D:/wwwroot/gh-html/asiahighlights/', 'https://www.asiahighlights.com/', $h_url);
$h_url=str_replace('index-md.htm', '', $h_url);
$h_url=str_replace('-md.htm', '.htm', $h_url);
foreach ($html_object->find('meta') as $item) {
if ($item->name == 'description' && !empty($item->content)) {
$h_description = $item->content;
break;
}
}
if (empty($h_description)) {
$h_description='';
$h_done=2;
}
$h_title=$html_object->find('title', 0);
if (empty($h_title)) {
$h_title='';
$h_done=2;
} else {
$h_title=$h_title->innertext;
}
$h_h1=$html_object->find('h1', 0);
if (empty($h_h1)) {
$h_h1='';
$h_done=2;
} else {
$h_h1=$h_h1->innertext;
}
public function clear_site(){ echo 'file:'.$file.'</br>';
$this->Import_model->import_clear_site();echo 'clear_site'; echo 'h_title:'.$h_title.'</br>';
echo 'h_url:'.$h_url.'</br>';
echo 'h_description:'.$h_description.'</br>';
echo '<hr/>';
//echo $h_content;
$this->Import_model->update_html_info($h_id, $h_h1, $h_title, $h_url, $h_description, $h_content, $h_done);
}
}
public function clear_site()
{
$this->Import_model->import_clear_site();
echo 'clear_site';
} }
public function reset_path() public function reset_path()
@ -51,7 +149,7 @@ class Import extends CI_Controller
public function to_sqlite() public function to_sqlite()
{ {
$this->load->driver('cache', array('adapter' => 'file', 'backup' => 'file')); $this->load->driver('cache', array('adapter' => 'file', 'backup' => 'file'));
$this->HT = $this->load->database('HT', TRUE); $this->HT = $this->load->database('HT', true);
$sql_array = array(); $sql_array = array();
$sql_ht = "SELECT ic.ic_id, \n" $sql_ht = "SELECT ic.ic_id, \n"
. " ic.ic_url, \n" . " ic.ic_url, \n"
@ -91,8 +189,7 @@ class Import extends CI_Controller
. "WHERE ic.ic_ht_area_type = 'c' \n" . "WHERE ic.ic_ht_area_type = 'c' \n"
. " AND ic.ic_ht_area_id IN (177, 54, 1, 3, 7)"; . " AND ic.ic_ht_area_id IN (177, 54, 1, 3, 7)";
$sql_ht_query = $this->HT->query($sql_ht); $sql_ht_query = $this->HT->query($sql_ht);
foreach ($sql_ht_query->result() as $item) foreach ($sql_ht_query->result() as $item) {
{
$sql_content = "INSERT INTO information \n" $sql_content = "INSERT INTO information \n"
. " ( \n" . " ( \n"
. " ic_id, ic_url_title, ic_type,ic_content, ic_title, \n" . " ic_id, ic_url_title, ic_type,ic_content, ic_title, \n"
@ -123,12 +220,10 @@ class Import extends CI_Controller
. $this->HT->escape($item->is_sort) . $this->HT->escape($item->is_sort)
. " );"; . " );";
$sql_array[] = $sql_content; $sql_array[] = $sql_content;
// echo $sql_content.';'; // echo $sql_content.';';
//$this->cache->save(md5($item->ic_id), $item->ic_content, 60 * 60 * 24 * 30); //$this->cache->save(md5($item->ic_id), $item->ic_content, 60 * 60 * 24 * 30);
} }
file_put_contents('import.sql', implode("\n", $sql_array)); file_put_contents('import.sql', implode("\n", $sql_array));
//print_r($sql_array); //print_r($sql_array);
} }
} }

@ -6,9 +6,20 @@ class Import_model extends CI_Model
function __construct() function __construct()
{ {
parent::__construct(); parent::__construct();
$this->HT = $this->load->database('HT', TRUE); $this->HT = $this->load->database('INFO', TRUE);
} }
function not_done_html_info($h_sitecode){
$sql="select top 1000 * from html_infos where h_sitecode=? and h_done=2 order by h_id desc";
$query=$this->HT->query($sql,$h_sitecode);
return $query->result();
}
function update_html_info($h_id,$h_h1,$h_title,$h_url,$h_description,$h_content,$h_done){
$sql=' update html_infos set h_done=? ,h_url=N?, h_description=N? ,h_h1=N? ,h_title=N?,h_content=N? where h_id=? ';
return $this->HT->query($sql,array($h_done,$h_url,$h_description,$h_h1,$h_title,$h_content,$h_id));
}
function import_clear() function import_clear()
{ {
$this->HT->query('TRUNCATE TABLE infoContents'); $this->HT->query('TRUNCATE TABLE infoContents');

@ -1,5 +1,5 @@
<?php <?php
/** /** https://simplehtmldom.sourceforge.io/manual.htm
* Website: http://sourceforge.net/projects/simplehtmldom/ * Website: http://sourceforge.net/projects/simplehtmldom/
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/) * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
* Contributions by: * Contributions by:

Loading…
Cancel
Save