From 422901f40144693e9243a666dec268b19b4eb2fe Mon Sep 17 00:00:00 2001
From: cyc
Date: Mon, 22 Jun 2020 15:16:29 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E9=83=A8=E7=BD=B2=E4=BC=98=E5=8C=96?=
=?UTF-8?q?=E5=90=8E=E7=9A=84TA=E6=8A=93=E5=8F=96=E7=A8=8B=E5=BA=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../tripadvisor_spider/controllers/index.php | 88 ++++++++++--
.../models/Tripadvisor_Review_model.php | 19 ++-
.../views/third_party_input.php | 126 ++++++++++++++++--
3 files changed, 207 insertions(+), 26 deletions(-)
diff --git a/application/third_party/tripadvisor_spider/controllers/index.php b/application/third_party/tripadvisor_spider/controllers/index.php
index 988b8369..67b0d88e 100644
--- a/application/third_party/tripadvisor_spider/controllers/index.php
+++ b/application/third_party/tripadvisor_spider/controllers/index.php
@@ -268,30 +268,64 @@ class Index extends CI_Controller {
}
}
- public function get_destination_reviews($destination = null,$pagenum = null){
+ //获取目的地
+ public function get_destination_reviews(){
set_time_limit(0);
$ta_website = $this->config->item('tripadvisor_website');
+ $destination = $this->input->get_post('destination');
+ $pagenum = $this->input->get_post('pagenum');
+ $product_links = $this->input->get_post('product_links');
- //根据传入的目的地简码获取TA的相应评论列表
- if(isset($ta_website[$destination])){
- $url = $ta_website[$destination];
+ if($product_links != ''){
if($pagenum != ''){
- $url = str_replace('{PAGENUM}','-or'.$pagenum,$url);
+ $product_links = str_replace('{PAGENUM}','-or'.$pagenum,$product_links);
}else{
- $url = str_replace('{PAGENUM}','',$url);
+ $product_links = str_replace('{PAGENUM}','',$product_links);
}
- //根据url获取页面内容
+ $content = GET_HTTP($product_links);
+
+ $html_object = str_get_html($content);
+
+ $urlList = $html_object->find('._1T1U92WJ ._2cigFICy a');
+
+ $data = new stdClass();
+ $data->urls = array();
+
+ foreach ($urlList as $key=>$url){
+ $data->urls[$key] = 'https://www.tripadvisor.com'.$url->href;
+ }
+
+ print_r(json_encode($data));
+ }else{
+ return ;
+ }
+ }
+
+ //获取产品内评论URL
+ public function get_reviews_url(){
+ set_time_limit(0);
+ $url = $this->input->get_post('url');
+
+ $url = 'https://www.tripadvisor.com/AttractionProductReview-g294212-d11463418-Mini_Group_2_Day_Beijing_Highlights_and_Great_Wall_Tour-Beijing.html';
+
+ if($url != ''){
$content = GET_HTTP($url);
- //进行页面解析
+
$html_object = str_get_html($content);
- $return = new stdClass();
- $return->urls = array();
- //获取每个页面上的url
- foreach ($html_object->find('.reviewSelector .quote a') as $reviews_url){
- array_push($return->urls,'https://www.tripadvisor.com'.$reviews_url->href);
+
+ $urlList = $html_object->find('._1T1U92WJ ._2cigFICy a');
+
+ $data = new stdClass();
+ $data->urls = array();
+
+ foreach ($urlList as $key=>$url){
+ $data->urls[$key] = 'https://www.tripadvisor.com'.$url->href;
}
- print_r(json_encode($return));
+
+ print_r(json_encode($data));
+ }else{
+ return ;
}
}
@@ -312,6 +346,7 @@ class Index extends CI_Controller {
$html_num = $this->input->get_post('html_num');
$group_name = $this->input->get_post('group_name');
$guidename = $this->input->get_post('guidename');
+ $product_code = $this->input->get_post('product_code');
//$url = 'https://www.tripadvisor.com/ShowUserReviews-g308272-d6222868-r599123490-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html';
//$destination = 'tp_Beijing';
@@ -326,6 +361,7 @@ class Index extends CI_Controller {
$detail_data->group_name = $group_name;
$detail_data->links = $url;
$detail_data->guidename = $guidename;
+ $detail_data->product_code = $product_code;
//提取局部,不做整个页面的寻找元素,提升效率
$meta_inner = $html_object->find('.meta_inner');
@@ -413,4 +449,28 @@ class Index extends CI_Controller {
print_r(json_encode($return_data));
}
}
+
+ public function get_production_code(){
+ $destination = $this->input->get_post('destination');
+ if($destination != ''){
+ $productions_info = $this->Tripadvisor_Review_model->get_productions_info($destination);
+ print_r(json_encode($productions_info));
+ }
+ }
+
+ public function add_production(){
+ $config_destination = $this->input->get_post('config_destination');
+ $production_code = $this->input->get_post('production_code');
+ $production_link = $this->input->get_post('production_link');
+
+ //添加换页参数
+ $arr = explode('-',$production_link);
+ $arr['2'] = $arr['2'].'{PAGENUM}';
+ $production_link = implode('-',$arr);
+
+ if($config_destination && $production_code && $production_link){
+ $flag = $this->Tripadvisor_Review_model->add_config_production($config_destination,$production_code,$production_link);
+ exit('{"status":"200","reason":"添加成功"}');
+ }
+ }
}
diff --git a/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php b/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php
index ba12b652..8dca3e9f 100644
--- a/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php
+++ b/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php
@@ -182,12 +182,13 @@ class Tripadvisor_Review_model extends CI_Model {
tr_gri_no,
tr_links,
tr_guidename,
+ tr_product_code,
tr_datetime
)values(
- ?,?,?,?,?,?,?,?,?,?,?,?,?,GETDATE()
+ ?,?,?,?,?,?,?,?,?,?,?,?,?,?,GETDATE()
)
";
- $query = $this->INFO->query($sql, array($detail_data->review_id,$detail_data->destination,$detail_data->review_id,$detail_data->title,$detail_data->content,$detail_data->review_name,$detail_data->user_loc,$detail_data->star_nums,$detail_data->rating_date,$detail_data->experience_date,json_encode($detail_data->pic),$detail_data->group_name,$detail_data->links,$detail_data->guidename));
+ $query = $this->INFO->query($sql, array($detail_data->review_id,$detail_data->destination,$detail_data->review_id,$detail_data->title,$detail_data->content,$detail_data->review_name,$detail_data->user_loc,$detail_data->star_nums,$detail_data->rating_date,$detail_data->experience_date,json_encode($detail_data->pic),$detail_data->group_name,$detail_data->links,$detail_data->guidename,$detail_data->product_code));
//$result = $query->result();
}
@@ -212,5 +213,17 @@ class Tripadvisor_Review_model extends CI_Model {
$query = $this->INFO->query($sql,array($destination));
return $query->result();
}
-
+
+ public function add_config_production($config_destination,$production_code,$production_link){
+ $sql = 'IF NOT EXISTS(
+ select trc_code from Ta_Reviews_Config where trc_code = ?
+ )insert into Ta_Reviews_Config (trc_destination,trc_code,trc_links) values (?,?,?)';
+ $query = $this->INFO->query($sql,array($production_code,$config_destination,$production_code,$production_link));
+ }
+
+ public function get_productions_info($destination){
+ $sql = 'select * from Ta_Reviews_Config where trc_destination = ?';
+ $query = $this->INFO->query($sql,array($destination));
+ return $query->result();
+ }
}
diff --git a/application/third_party/tripadvisor_spider/views/third_party_input.php b/application/third_party/tripadvisor_spider/views/third_party_input.php
index 0a67a39a..8b697d59 100644
--- a/application/third_party/tripadvisor_spider/views/third_party_input.php
+++ b/application/third_party/tripadvisor_spider/views/third_party_input.php
@@ -19,6 +19,9 @@
数据预览
+
+ 配置中心
+
@@ -133,6 +164,7 @@ $(function(){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_reviews_detail',
data:{url:ta_url},
+ type:'POST',
success:function(json,status){
var data = $.parseJSON(json);
console.log(data);
@@ -161,21 +193,64 @@ $(function(){
$('#batchcontents').click(function(){
var destination = $('#destination').val();
var pagenums = $('#pagenums').val();
+ var product_links = $('#production').val();
+ var product_code = $('#production option:selected').text();
+
if(destination != ''){
$.ajax({
- url:'/info.php/apps/tripadvisor_spider/index/get_destination_reviews/'+destination+'/'+pagenums,
+ url:'/info.php/apps/tripadvisor_spider/index/get_destination_reviews/',
+ data : {
+ destination : destination,
+ pagenum : pagenums,
+ product_links : product_links
+ },
+ type:'POST',
success:function(json,status){
var data = $.parseJSON(json);
var html = '';
for(var x=0;x
'+jsondata[i].trc_code+'';
+ }
+ $('#production').html(options_html);
}
});
}
@@ -192,7 +267,7 @@ $(function(){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/analysis_excel',
data: formData,
- type: 'post',
+ type: 'POST',
cache: false,
contentType: false,
processData: false,
@@ -224,6 +299,7 @@ $(function(){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_reviews_detail',
data:{url:ta_url,html_num:num,destination:jsondata[i].list_data[j][0],group_name:jsondata[i].list_data[j][1],guidename:jsondata[i].list_data[j][2]},
+ type:'POST',
success:function(json_detail,status){
var data = $.parseJSON(json_detail);
$('#excel_title_'+data.html_id).html(data.title);
@@ -268,6 +344,7 @@ $(function(){
if(view_destination != ''){
$.ajax({
url:'/info.php/apps/tripadvisor_spider/index/get_all_reviews/'+view_destination,
+ type:'POST',
success:function(json,status){
var jsondata = $.parseJSON(json);
var html = '';
@@ -294,5 +371,36 @@ $(function(){
});
}
});
+
+ //添加数据
+ $('#addproduction').click(function (){
+ var config_destination = $('#confg_destination').val();
+ var production_code = $('input[name="production_code"]').val();
+ var production_link = $('input[name="production_link"]').val();
+ $(this).addClass('disabled');
+
+ if(config_destination && production_code && production_link){
+ $.ajax({
+ url : '/info.php/apps/tripadvisor_spider/index/add_production/',
+ data : {
+ config_destination : config_destination,
+ production_code : production_code,
+ production_link : production_link
+ },
+ type:'POST',
+ success : function (json,status){
+ var jsondata = $.parseJSON(json);
+ $('#confg_destination').val('');
+ $('input[name="production_code"]').val('');
+ $('input[name="production_link"]').val('');
+ $('#addproduction').removeClass('disabled');
+ alert(jsondata.reason);
+ }
+ });
+ }else{
+ alert('请输入完整参数');
+ }
+
+ });
});
\ No newline at end of file
From 60c2e7752ceab1512a992e99437b93f2fcbcd4e8 Mon Sep 17 00:00:00 2001
From: cyc
Date: Mon, 22 Jun 2020 15:33:31 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=8E=A5=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../tripadvisor_spider/controllers/index.php | 20 +++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/application/third_party/tripadvisor_spider/controllers/index.php b/application/third_party/tripadvisor_spider/controllers/index.php
index 67b0d88e..869015c9 100644
--- a/application/third_party/tripadvisor_spider/controllers/index.php
+++ b/application/third_party/tripadvisor_spider/controllers/index.php
@@ -463,14 +463,18 @@ class Index extends CI_Controller {
$production_code = $this->input->get_post('production_code');
$production_link = $this->input->get_post('production_link');
- //添加换页参数
- $arr = explode('-',$production_link);
- $arr['2'] = $arr['2'].'{PAGENUM}';
- $production_link = implode('-',$arr);
-
- if($config_destination && $production_code && $production_link){
- $flag = $this->Tripadvisor_Review_model->add_config_production($config_destination,$production_code,$production_link);
- exit('{"status":"200","reason":"添加成功"}');
+ if($production_link != ''){
+ //添加换页参数
+ $arr = explode('-',$production_link);
+ $arr['2'] = $arr['2'].'{PAGENUM}';
+ $production_link = implode('-',$arr);
+
+ if($config_destination && $production_code && $production_link){
+ $flag = $this->Tripadvisor_Review_model->add_config_production($config_destination,$production_code,$production_link);
+ exit('{"status":"200","reason":"添加成功"}');
+ }
+ }else{
+ exit('请输入参数');
}
}
}
From 8be561f108cdac24ab3e3913717e6aadfb52568f Mon Sep 17 00:00:00 2001
From: cyc
Date: Mon, 22 Jun 2020 23:58:02 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E4=BC=98=E5=8C=96TA=E6=95=B0=E6=8D=AE?=
=?UTF-8?q?=E9=87=87=E9=9B=86=E7=95=8C=E9=9D=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../views/third_party_input.php | 27 ++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/application/third_party/tripadvisor_spider/views/third_party_input.php b/application/third_party/tripadvisor_spider/views/third_party_input.php
index 8b697d59..23daf969 100644
--- a/application/third_party/tripadvisor_spider/views/third_party_input.php
+++ b/application/third_party/tripadvisor_spider/views/third_party_input.php
@@ -79,6 +79,10 @@
+
+
+
+
@@ -98,6 +102,9 @@
+
+
+
@@ -109,6 +116,10 @@
+
+
+
+
@@ -195,6 +206,8 @@ $(function(){
var pagenums = $('#pagenums').val();
var product_links = $('#production').val();
var product_code = $('#production option:selected').text();
+ $(this).addClass('disabled');
+ $('#console').html('');
if(destination != ''){
$.ajax({
@@ -207,7 +220,9 @@ $(function(){
type:'POST',
success:function(json,status){
var data = $.parseJSON(json);
- var html = '';
+ $('#batchcontents').removeClass('disabled');
+ $('#console').html('5条评论链接获取成功,下面开始抓取具体评论!
');
+
for(var x=0;x标题:'+data.title+' 评论抓取成功!
';
+ $('#console').html(console_html);
}
});
}
},
error:function (){
alert('获取失败,请重试!');
+ $('#batchcontents').removeClass('disabled');
},
timeout:function(){
alert('请求超时,请重试!');
+ $('#batchcontents').removeClass('disabled');
}
});
@@ -363,7 +384,7 @@ $(function(){
}
}
- html += 'Date of experience: '+jsondata.list[y].tr_visited_date+'
';
+ html += 'Date of experience: '+jsondata.list[y].tr_visited_date+'
product code: '+jsondata.list[y].tr_product_code+'
';
}
html += 'total nums : '+jsondata.list.length+'
'
$('#list_view_content').html(html);