部署优化后的TA抓取程序

mobile-first
cyc 5 years ago
parent ba23e76e2e
commit 422901f401

@ -268,30 +268,64 @@ class Index extends CI_Controller {
}
}
public function get_destination_reviews($destination = null,$pagenum = null){
//获取目的地
public function get_destination_reviews(){
set_time_limit(0);
$ta_website = $this->config->item('tripadvisor_website');
$destination = $this->input->get_post('destination');
$pagenum = $this->input->get_post('pagenum');
$product_links = $this->input->get_post('product_links');
//根据传入的目的地简码获取TA的相应评论列表
if(isset($ta_website[$destination])){
$url = $ta_website[$destination];
if($product_links != ''){
if($pagenum != ''){
$url = str_replace('{PAGENUM}','-or'.$pagenum,$url);
$product_links = str_replace('{PAGENUM}','-or'.$pagenum,$product_links);
}else{
$url = str_replace('{PAGENUM}','',$url);
$product_links = str_replace('{PAGENUM}','',$product_links);
}
//根据url获取页面内容
$content = GET_HTTP($product_links);
$html_object = str_get_html($content);
$urlList = $html_object->find('._1T1U92WJ ._2cigFICy a');
$data = new stdClass();
$data->urls = array();
foreach ($urlList as $key=>$url){
$data->urls[$key] = 'https://www.tripadvisor.com'.$url->href;
}
print_r(json_encode($data));
}else{
return ;
}
}
//获取产品内评论URL
public function get_reviews_url(){
set_time_limit(0);
$url = $this->input->get_post('url');
$url = 'https://www.tripadvisor.com/AttractionProductReview-g294212-d11463418-Mini_Group_2_Day_Beijing_Highlights_and_Great_Wall_Tour-Beijing.html';
if($url != ''){
$content = GET_HTTP($url);
//进行页面解析
$html_object = str_get_html($content);
$return = new stdClass();
$return->urls = array();
//获取每个页面上的url
foreach ($html_object->find('.reviewSelector .quote a') as $reviews_url){
array_push($return->urls,'https://www.tripadvisor.com'.$reviews_url->href);
$urlList = $html_object->find('._1T1U92WJ ._2cigFICy a');
$data = new stdClass();
$data->urls = array();
foreach ($urlList as $key=>$url){
$data->urls[$key] = 'https://www.tripadvisor.com'.$url->href;
}
print_r(json_encode($return));
print_r(json_encode($data));
}else{
return ;
}
}
@ -312,6 +346,7 @@ class Index extends CI_Controller {
$html_num = $this->input->get_post('html_num');
$group_name = $this->input->get_post('group_name');
$guidename = $this->input->get_post('guidename');
$product_code = $this->input->get_post('product_code');
//$url = 'https://www.tripadvisor.com/ShowUserReviews-g308272-d6222868-r599123490-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html';
//$destination = 'tp_Beijing';
@ -326,6 +361,7 @@ class Index extends CI_Controller {
$detail_data->group_name = $group_name;
$detail_data->links = $url;
$detail_data->guidename = $guidename;
$detail_data->product_code = $product_code;
//提取局部,不做整个页面的寻找元素,提升效率
$meta_inner = $html_object->find('.meta_inner');
@ -413,4 +449,28 @@ class Index extends CI_Controller {
print_r(json_encode($return_data));
}
}
public function get_production_code(){
$destination = $this->input->get_post('destination');
if($destination != ''){
$productions_info = $this->Tripadvisor_Review_model->get_productions_info($destination);
print_r(json_encode($productions_info));
}
}
public function add_production(){
$config_destination = $this->input->get_post('config_destination');
$production_code = $this->input->get_post('production_code');
$production_link = $this->input->get_post('production_link');
//添加换页参数
$arr = explode('-',$production_link);
$arr['2'] = $arr['2'].'{PAGENUM}';
$production_link = implode('-',$arr);
if($config_destination && $production_code && $production_link){
$flag = $this->Tripadvisor_Review_model->add_config_production($config_destination,$production_code,$production_link);
exit('{"status":"200","reason":"添加成功"}');
}
}
}

@ -182,12 +182,13 @@ class Tripadvisor_Review_model extends CI_Model {
tr_gri_no,
tr_links,
tr_guidename,
tr_product_code,
tr_datetime
)values(
?,?,?,?,?,?,?,?,?,?,?,?,?,GETDATE()
?,?,?,?,?,?,?,?,?,?,?,?,?,?,GETDATE()
)
";
$query = $this->INFO->query($sql, array($detail_data->review_id,$detail_data->destination,$detail_data->review_id,$detail_data->title,$detail_data->content,$detail_data->review_name,$detail_data->user_loc,$detail_data->star_nums,$detail_data->rating_date,$detail_data->experience_date,json_encode($detail_data->pic),$detail_data->group_name,$detail_data->links,$detail_data->guidename));
$query = $this->INFO->query($sql, array($detail_data->review_id,$detail_data->destination,$detail_data->review_id,$detail_data->title,$detail_data->content,$detail_data->review_name,$detail_data->user_loc,$detail_data->star_nums,$detail_data->rating_date,$detail_data->experience_date,json_encode($detail_data->pic),$detail_data->group_name,$detail_data->links,$detail_data->guidename,$detail_data->product_code));
//$result = $query->result();
}
@ -213,4 +214,16 @@ class Tripadvisor_Review_model extends CI_Model {
return $query->result();
}
public function add_config_production($config_destination,$production_code,$production_link){
$sql = 'IF NOT EXISTS(
select trc_code from Ta_Reviews_Config where trc_code = ?
)insert into Ta_Reviews_Config (trc_destination,trc_code,trc_links) values (?,?,?)';
$query = $this->INFO->query($sql,array($production_code,$config_destination,$production_code,$production_link));
}
public function get_productions_info($destination){
$sql = 'select * from Ta_Reviews_Config where trc_destination = ?';
$query = $this->INFO->query($sql,array($destination));
return $query->result();
}
}

@ -19,6 +19,9 @@
<li role="presentation">
<a href="#list_view" aria-controls="list_view" role="tab" data-toggle="tab">数据预览</a>
</li>
<li role="presentation">
<a href="#config_view" aria-controls="list_view" role="tab" data-toggle="tab">配置中心</a>
</li>
</ul>
<div class="tab-content">
<div role="tabpanel" class="tab-pane active" id="handinput">
@ -70,7 +73,7 @@
<div role="tabpanel" class="tab-pane" id="listinput">
<div class="row" style="margin-top:10px;">
<div class="col-md-9">
<div class="col-md-7">
<select name="destination" class="form-control" id="destination">
<option value="">选择站点</option>
<option value="tp_Beijing">tp_Beijing</option>
@ -79,15 +82,20 @@
<option value="train">train</option>
</select>
</div>
<div class="col-md-9">
<div class="col-md-7">
<select name="production" class="form-control" id="production">
<option value="">选择产品</option>
</select>
</div>
<div class="col-md-7">
<select name="nums" class="form-control" id="pagenums">
<option value="">选择抓取条数</option>
<option value="">1~10</option>
<option value="10">11~20</option>
<option value="20">21~30</option>
<option value="">1~5</option>
<option value="5">6~10</option>
<option value="10">11~15</option>
</select>
</div>
<div class="col-md-6">
<div class="col-md-3">
<a href="#" id="batchcontents" class="btn btn-info">批量抓取</a>
</div>
</div>
@ -113,6 +121,29 @@
</div>
</div>
<div role="tabpanel" class="tab-pane" id="config_view">
<div class="row" style="margin-top:10px;">
<div class="col-md-5">
<select name="confg_destination" class="form-control" id="confg_destination">
<option value="">选择站点</option>
<option value="tp_Beijing">tp_Beijing</option>
<option value="tp_Shanghai">tp_Shanghai</option>
<option value="tp_Guilin">tp_Guilin</option>
<option value="train">train</option>
</select>
</div>
<div class="col-md-5">
<input name="production_code" type="text" class="form-control" placeholder="请输入产品代号" />
</div>
<div class="col-md-11">
<input name="production_link" type="text" class="form-control" placeholder="请输入产品链接" />
</div>
<div class="col-md-3">
<a href="#" id="addproduction" class="btn btn-info">添加产品</a>
</div>
</div>
</div>
</div>
</div>
</div>
@ -133,6 +164,7 @@ $(function(){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_reviews_detail',
data:{url:ta_url},
type:'POST',
success:function(json,status){
var data = $.parseJSON(json);
console.log(data);
@ -161,21 +193,64 @@ $(function(){
$('#batchcontents').click(function(){
var destination = $('#destination').val();
var pagenums = $('#pagenums').val();
var product_links = $('#production').val();
var product_code = $('#production option:selected').text();
if(destination != ''){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_destination_reviews/'+destination+'/'+pagenums,
url:'/info.php/apps/tripadvisor_spider/index/get_destination_reviews/',
data : {
destination : destination,
pagenum : pagenums,
product_links : product_links
},
type:'POST',
success:function(json,status){
var data = $.parseJSON(json);
var html = '';
for(var x=0;x<data.urls.length;x++){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_reviews_detail',
data:{url:data.urls[x],html_num:'list_'+x,destination:destination},
type:'POST',
data:{
url : data.urls[x],
html_num : 'list_'+x,
destination : destination,
product_links : product_links,
product_code : product_code
},
success:function(content,status){
}
});
}
},
error:function (){
alert('获取失败,请重试!');
},
timeout:function(){
alert('请求超时,请重试!');
}
});
}
});
//修改目的地获取产品
$('#destination').on("change",function (){
var destination = $(this).val();
if(destination != ''){
$.ajax({
url : '/info.php/apps/tripadvisor_spider/index/get_production_code',
data : {destination : destination},
type:'POST',
success : function (json,status){
var jsondata = $.parseJSON(json);
var options_html = '<option value="">选择产品</option>';
for (var i=0;i<jsondata.length;i++){
options_html += '<option value="'+jsondata[i].trc_links+'">'+jsondata[i].trc_code+'</option>';
}
$('#production').html(options_html);
}
});
}
@ -192,7 +267,7 @@ $(function(){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/analysis_excel',
data: formData,
type: 'post',
type: 'POST',
cache: false,
contentType: false,
processData: false,
@ -224,6 +299,7 @@ $(function(){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_reviews_detail',
data:{url:ta_url,html_num:num,destination:jsondata[i].list_data[j][0],group_name:jsondata[i].list_data[j][1],guidename:jsondata[i].list_data[j][2]},
type:'POST',
success:function(json_detail,status){
var data = $.parseJSON(json_detail);
$('#excel_title_'+data.html_id).html(data.title);
@ -268,6 +344,7 @@ $(function(){
if(view_destination != ''){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_all_reviews/'+view_destination,
type:'POST',
success:function(json,status){
var jsondata = $.parseJSON(json);
var html = '';
@ -294,5 +371,36 @@ $(function(){
});
}
});
//添加数据
$('#addproduction').click(function (){
var config_destination = $('#confg_destination').val();
var production_code = $('input[name="production_code"]').val();
var production_link = $('input[name="production_link"]').val();
$(this).addClass('disabled');
if(config_destination && production_code && production_link){
$.ajax({
url : '/info.php/apps/tripadvisor_spider/index/add_production/',
data : {
config_destination : config_destination,
production_code : production_code,
production_link : production_link
},
type:'POST',
success : function (json,status){
var jsondata = $.parseJSON(json);
$('#confg_destination').val('');
$('input[name="production_code"]').val('');
$('input[name="production_link"]').val('');
$('#addproduction').removeClass('disabled');
alert(jsondata.reason);
}
});
}else{
alert('请输入完整参数');
}
});
});
</script>
Loading…
Cancel
Save