diff --git a/application/third_party/tripadvisor_spider/config/config.php b/application/third_party/tripadvisor_spider/config/config.php index 022d61f8..3b452f64 100644 --- a/application/third_party/tripadvisor_spider/config/config.php +++ b/application/third_party/tripadvisor_spider/config/config.php @@ -18,10 +18,10 @@ $config['tripadvisor_website'] = array( 'Zhangjiajie' => 'http://www.tripadvisor.com/Attraction_Review-g494933-d8077695-Reviews{PAGENUM}China_Highlights_Zhangjiajie_Day_Tour-Zhangjiajie_Hunan.html', 'HongKong' => 'https://www.tripadvisor.com/Attraction_Review-g294217-d10243951-Reviews{PAGENUM}China_Highlights_Hong_Kong-Hong_Kong.html', 'Panda' => 'https://www.tripadvisor.com/Attraction_Review-g297463-d11489225-Reviews{PAGENUM}China_Highlights-Chengdu_Sichuan.html', - 'tp_Beijing' => 'https://www.tripadvisor.com/Attraction_Review-g294212-d4006739-Reviews-The_Trippest_Mini_Group_Tours-Beijing.html', - 'tp_Xian' => 'https://www.tripadvisor.com/Attraction_Review-g298557-d10999897-Reviews-Xi_an_Trippest_Mini_Group_Tours-Xi_an_Shaanxi.html', - 'tp_Shanghai' => 'https://www.tripadvisor.com/Attraction_Review-g308272-d6222868-Reviews-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html', - 'tp_Guilin' => 'https://www.tripadvisor.com/Attraction_Review-g298556-d14121459-Reviews-Trippest_Mini_Group_Tours-Guilin_Guangxi.html' + 'tp_Beijing' => 'https://www.tripadvisor.com/Attraction_Review-g294212-d4006739-Reviews{PAGENUM}-The_Trippest_Mini_Group_Tours-Beijing.html', + 'tp_Xian' => 'https://www.tripadvisor.com/Attraction_Review-g298557-d10999897-Reviews{PAGENUM}-Xi_an_Trippest_Mini_Group_Tours-Xi_an_Shaanxi.html', + 'tp_Shanghai' => 'https://www.tripadvisor.com/Attraction_Review-g308272-d6222868-Reviews{PAGENUM}-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html', + 'tp_Guilin' => 'https://www.tripadvisor.com/Attraction_Review-g298556-d14121459-Reviews{PAGENUM}-Trippest_Mini_Group_Tours-Guilin_Guangxi.html' ); diff --git a/application/third_party/tripadvisor_spider/controllers/index.php b/application/third_party/tripadvisor_spider/controllers/index.php index 6b7e266f..2b8124b6 100644 --- a/application/third_party/tripadvisor_spider/controllers/index.php +++ b/application/third_party/tripadvisor_spider/controllers/index.php @@ -267,24 +267,40 @@ class Index extends CI_Controller { } } - public function get_destination_reviews($destination = null){ + public function get_destination_reviews($destination = null,$pagenum = null){ + set_time_limit(0); $ta_website = $this->config->item('tripadvisor_website'); //根据传入的目的地简码获取TA的相应评论列表 if(isset($ta_website[$destination])){ $url = $ta_website[$destination]; + if($pagenum != ''){ + $url = str_replace('{PAGENUM}','-or'.$pagenum,$url); + }else{ + $url = str_replace('{PAGENUM}','',$url); + } //根据url获取页面内容 $content = GET_HTTP($url); - //进行页面解析 $html_object = str_get_html($content); - - //获取第一页列表上的url - foreach ($html_object->find('.reviewSelector .quote a') as $a_info){ - $url = 'https://www.tripadvisor.com'.$a_info->href; - + $return = new stdClass(); + $return->urls = array(); + //获取每个页面上的url + foreach ($html_object->find('.reviewSelector .quote a') as $reviews_url){ + array_push($return->urls,'https://www.tripadvisor.com'.$reviews_url->href); } + print_r(json_encode($return)); + } + } + + //查看抓取到的所有信息 + public function get_all_reviews($destination){ + if($destination != ''){ + $all_reviews = $this->Tripadvisor_Review_model->get_all_reviews($destination); + $return = array(); + $return['list'] = $all_reviews; + print_r(json_encode($return)); } } @@ -294,8 +310,8 @@ class Index extends CI_Controller { $destination = $this->input->get_post('destination'); $html_num = $this->input->get_post('html_num'); - //$url = 'https://www.tripadvisor.com/ShowUserReviews-g294212-d4006739-r666168101-The_Trippest_Mini_Group_Tours-Beijing.html'; - $destination = 'tp_Beijing'; + //$url = 'https://www.tripadvisor.com/ShowUserReviews-g308272-d6222868-r599123490-Shanghai_Trippest_Mini_Group_Tours-Shanghai.html'; + //$destination = 'tp_Beijing'; if($url != ''){ $content = GET_HTTP($url); @@ -307,15 +323,28 @@ class Index extends CI_Controller { //提取局部,不做整个页面的寻找元素,提升效率 $meta_inner = $html_object->find('.meta_inner'); + $detail_data->user_loc = ''; + $detail_data->pic = array(); foreach($meta_inner as $detail_info){ //记录该条记录的id $detail_data->html_id = $html_num; + //获取评论者帐号 foreach($detail_info->find('.info_text') as $review_name){ $detail_data->review_name = $review_name->first_child()->innertext; } + //获取评论者帐号 + foreach($detail_info->find('.info_text .userLoc strong') as $user_loc){ + $detail_data->user_loc = $user_loc->innertext; + } + + //抓取评论时间 + foreach($detail_info->find('.ratingDate') as $ratingDate){ + $detail_data->rating_date = date('Y-m-d',strtotime($ratingDate->title)); + } + //获取评论者ID foreach($detail_info->find('.reviewSelector') as $review_id){ $detail_data->review_id = str_replace('review_','',$review_id->id); @@ -337,13 +366,21 @@ class Index extends CI_Controller { $detail_data->content = $content->innertext; } - //获取评论时间 - foreach($detail_info->find('.prw_reviews_stay_date_hsx') as $review_date){ - $detail_data->review_date = str_replace('Date of experience: ','',$review_date->innertext); + //获取体验时间 + foreach($detail_info->find('.prw_reviews_stay_date_hsx') as $experience_date){ + $detail_data->experience_date = date('Y-m-d',strtotime(str_replace('Date of experience: ','',$experience_date->innertext))); + } + + //抓取图片 + foreach($detail_info->find('.imgWrap .noscript') as $imgWrap){ + $imgWrap->src = str_replace('photo-l','photo-s',$imgWrap->src); + array_push($detail_data->pic,$imgWrap->src); } } //拿到数据后进行入库 + $this->Tripadvisor_Review_model->add_reviews($detail_data); + print_r(json_encode($detail_data)); } } diff --git a/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php b/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php index 25414fc7..d2f8cf21 100644 --- a/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php +++ b/application/third_party/tripadvisor_spider/models/Tripadvisor_Review_model.php @@ -161,5 +161,39 @@ class Tripadvisor_Review_model extends CI_Model { $result = $query->result(); return $result; } + + public function add_reviews($detail_data){ + $sql = " + IF NOT EXISTS( + select tr_review_id from Ta_Reviews where tr_review_id = ? + ) + insert into Ta_Reviews + ( + tr_destination, + tr_review_id, + tr_review_title, + tr_content, + tr_member_name, + tr_member_loc, + tr_member_starts, + tr_review_date, + tr_visited_date, + tr_review_pics, + tr_gri_no, + tr_tgi_sn, + tr_datetime + )values( + ?,?,?,?,?,?,?,?,?,?,?,?,GETDATE() + ) + "; + $query = $this->INFO->query($sql, array($detail_data->review_id,$detail_data->destination,$detail_data->review_id,$detail_data->title,$detail_data->content,$detail_data->review_name,$detail_data->user_loc,$detail_data->star_nums,$detail_data->rating_date,$detail_data->experience_date,json_encode($detail_data->pic),'','')); + //$result = $query->result(); + } + + public function get_all_reviews($destination){ + $sql = 'select * from Ta_Reviews where tr_destination = ? order by tr_review_date desc'; + $query = $this->INFO->query($sql,array($destination)); + return $query->result(); + } } diff --git a/application/third_party/tripadvisor_spider/views/third_party_input.php b/application/third_party/tripadvisor_spider/views/third_party_input.php index 0643046e..eb84da06 100644 --- a/application/third_party/tripadvisor_spider/views/third_party_input.php +++ b/application/third_party/tripadvisor_spider/views/third_party_input.php @@ -10,9 +10,15 @@ +
  • + 列表抓取 +
  • excel导入
  • +
  • + 数据预览 +
  • @@ -30,17 +36,21 @@

    +

    +

    -

    +

    +

    +
    @@ -57,6 +67,50 @@
    + +
    +
    +
    + +
    +
    + +
    + +
    +
    + +
    +
    +
    + +
    + +
    +
    + +
    +
    + @@ -70,6 +124,7 @@ $(function(){ //获取填写的url var ta_url = $('#ta_url').val(); var stars = ''; + var pic_htm = ''; if(ta_url == ''){ alert('请填写需要采集的TA地址'); }else{ @@ -80,8 +135,14 @@ $(function(){ var data = $.parseJSON(json); console.log(data); $('.ta_content').html(data.content); - $('.review_date').html('Date of experience: '+data.review_date); + $('.experience_date').html('Date of experience: '+data.experience_date); $('.review_name').html(data.review_name); + $('.user_loc').html(data.user_loc); + $('.rating_date').html('Reviewed:'+data.rating_date); + for(var i=0;i

    '; + } + $('.review_pic').html(pic_htm); $('.ta_title').html(''+data.title+''); if(data.star_nums){ for(var i=0;i

    '+jsondata.list[y].tr_member_loc+'

    '; + for(var i=0;i

    Reviewed: '+jsondata.list[y].tr_review_date+'

    '+jsondata.list[y].tr_content+'

    '; + html += '

    '; + + if($.parseJSON(jsondata.list[y].tr_review_pics).length > 0){ + for(var j=0;j<$.parseJSON(jsondata.list[y].tr_review_pics).length;j++){ + html += '

    '; + } + } + + html += '

    Date of experience: '+jsondata.list[y].tr_visited_date+'


    '; + } + html += '

    total nums : '+jsondata.list.length+'

    ' + $('#list_view_content').html(html); + } + }); + } + }); }); \ No newline at end of file