output->enable_profiler(TRUE); header('Access-Control-Allow-Origin:*'); header('Access-Control-Allow-Methods:POST, GET'); header('Access-Control-Max-Age:0'); header('Access-Control-Allow-Headers:x-requested-with, Content-Type'); header('Access-Control-Allow-Credentials:true'); $this->load->model('Tripadvisor_Review_model'); } public function index($city = 'Beijing') { $this->permission->is_admin(); $data = array(); $data['city'] = $city; $data['ta_review_list'] = $this->Tripadvisor_Review_model->search(200, $data['city']); $this->load->view('bootstrap3/header', $data); $this->load->view('welcome'); $this->load->view('bootstrap3/footer'); } function auto_update($city = 'Beijing') { ini_set('max_execution_time', '180'); $ta_website = $this->config->item('tripadvisor_website'); $nation_mark=array('www.tripadvisor.com','www.tripadvisor.it','www.tripadvisor.jp','www.tripadvisor.es','www.tripadvisor.fr','www.tripadvisor.de'); foreach ($ta_website as $key_city => $item_url) { if ($key_city == $city) { //采集各个国家的评论 foreach($nation_mark as $nation_item){ $page_url = str_replace('www.tripadvisor.com', $nation_item, $item_url); if($nation_item=='www.tripadvisor.com'){//分页代码,英文站点查询前三页,反过来查询,越早的越在后面 $page_mark = array('-or20-', '-or10-', '-'); //使用代理来请求,国内直接访问会很慢 $page_url=str_replace('https://www.tripadvisor.com', 'http://47.74.2.87:5052', $page_url); }else{ $page_mark = array('-'); } foreach ($page_mark as $page_num) { $page_url = str_replace('{PAGENUM}', $page_num, $page_url); $content = GET_HTTP($page_url); if (!empty($content)) { $html_object = str_get_html($content); foreach ($html_object->find('.reviewSelector') as $review) { //获取到评论ID if (!empty($review->id)) { $tr_review_id = str_replace('review_', '', $review->id); $tr_review_title = $review->find('div.quote a', 0); if (empty($tr_review_title)) { $tr_review_title = ''; } else { $tr_review_title = $tr_review_title->plaintext; } $review = $this->Tripadvisor_Review_model->detail($tr_review_id); if (empty($review)) { $Tripadvisor_Review_Data = new StdClass; $Tripadvisor_Review_Data->tr_city = $key_city; $Tripadvisor_Review_Data->tr_review_title = $tr_review_title; $Tripadvisor_Review_Data->tr_review_id = $tr_review_id; $Tripadvisor_Review_Data->tr_datetime = date('Y-m-d H:i:s', time()); $tr_id = $this->Tripadvisor_Review_model->add('Tripadvisor_Review', $Tripadvisor_Review_Data); echo '
' . $tr_id . ' ' . $key_city . ' ' . $tr_review_id; } } } } } } } } } //获取内容更新 function auto_update_content() { ini_set('max_execution_time', '100'); $update_list = $this->Tripadvisor_Review_model->update_list(10); if (empty($update_list)) { echo 'all done'; return true; } $tr_review_id_string = ''; foreach ($update_list as $key => $item) { $tr_review_id_string.=$item->tr_review_id . ','; } $tr_review_id_string.='0'; $url = "https://www.tripadvisor.com/ExpandedUserReviews-g298556-d4418151?target=480111710&context=1&reviews=480111710,$tr_review_id_string&servlet=Attraction_Review&expand=1"; echo $url . '
'; $content = GET_HTTP($url); if (empty($content)) { echo 'error gethttp:' . $url; return FALSE; } //echo $content; $html_object = str_get_html($content); foreach ($html_object->find('div .extended') as $review) { if (!empty($review->id)) { //评论ID $tr_review_id = str_replace('UR', '', $review->id); //用户ID $tr_member_id = $review->find('div.member_info div.memberOverlayLink', 0); if (isset($tr_member_id) && isset($tr_member_id->id)) { //UID_A50920FC5494D02709AA8F0E12294AAB-SRC_494596572 $tr_member_id = substr($tr_member_id->id, 4, 32); } else { $tr_member_id = 0; } //用户名 $tr_member_name = $review->find('div.username span.expand_inline', 0); if (empty($tr_member_name)) { $tr_member_name = 'A TripAdvisor Member'; //看不到客人名字 } else { $tr_member_name = $tr_member_name->plaintext; } //TA内容 $tr_content = $review->find('div.entry', 0); if (empty($tr_content)) { $tr_content = ''; } else { $tr_content = $tr_content->innertext; } //游览时间 $tr_visited_date = $review->find('span.recommend-titleInline', 0); if (empty($tr_visited_date)) { $tr_visited_date = ''; } else { $tr_visited_date = str_replace('Visited ', '', $tr_visited_date->plaintext); } //评论时间 $tr_review_date = $review->find('span.ratingDate', 0); if (empty($tr_review_date->title)) { $tr_review_date = str_replace('Reviewed ', '', $tr_review_date->innertext); } else { $tr_review_date = $tr_review_date->title; } $Tripadvisor_Review_Data = new StdClass; $Tripadvisor_Review_Data->tr_member_id = $tr_member_id; $Tripadvisor_Review_Data->tr_member_name = $tr_member_name; $Tripadvisor_Review_Data->tr_content = $tr_content; $Tripadvisor_Review_Data->tr_visited_date = $tr_visited_date; $Tripadvisor_Review_Data->tr_review_date = $tr_review_date; $where = array('tr_review_id' => $tr_review_id); $this->Tripadvisor_Review_model->update('Tripadvisor_Review', $Tripadvisor_Review_Data, $where); //print_r($Tripadvisor_Review_Data); echo $tr_review_id . ' ' . $tr_member_id . ' ' . $tr_member_name . ' ' . $tr_content . ' ' . $tr_visited_date . ' ' . $tr_review_date . '
'; } } } //分析评论,找出可能的团号和导游 public function analysis_ta_review($tr_id) { $data = array(); $data['ta_review'] = $this->Tripadvisor_Review_model->detail_tr_id($tr_id); if (empty($data['ta_review'])) { echo '找不到评论内容'; return; } $start_date = date('Y-m-d', strtotime($data['ta_review']->tr_visited_date)); $end_date = date("Y-m-d", strtotime("$start_date +31 day")); $group_list = $this->Tripadvisor_Review_model->find_group($data['ta_review']->tr_city, $start_date, $end_date); if (empty($group_list)) { echo json_encode(array('group_result' => '没有找到匹配团信息', 'tr_content' => $data['ta_review']->tr_content)); return; } $data['match_group_list'] = array(); //echo $data['ta_review']->tr_content . '
'; foreach ($group_list as $item) { //echo $item->GuideName . '
'; if ((trim($item->GuideName) <> '') && (stripos($data['ta_review']->tr_content, $item->GuideName) !== false)) { //查询团队中客人信息 $item->customer_list = $this->Tripadvisor_Review_model->get_customer_info($item->coli_sn); //填充匹配到的导游到匹配记录中 $data['match_group_list'][] = $item; //高亮导游名字 //防止重复替换 $data['ta_review']->tr_content = str_ireplace('' . $item->GuideName . '', $item->GuideName, $data['ta_review']->tr_content); $data['ta_review']->tr_content = str_ireplace($item->GuideName, '' . $item->GuideName . '', $data['ta_review']->tr_content); //echo $data['ta_review']->tr_content; //echo $item->GRI_No . ',' . date("Y-m-d", strtotime($item->EOI_GetDate)) . ',' . $item->GuideName . ',' . $customer_string . '
'; } } echo json_encode(array('group_result' => $this->load->view('find_group_result', $data, true), 'tr_content' => $data['ta_review']->tr_content)); } //第三方数据导入 public function third_party_input(){ $this->load->view('bootstrap3/header'); $this->load->view('third_party_input'); $this->load->view('bootstrap3/footer'); } function ensure_writable_dir($dir) { if(!file_exists($dir)) { mkdir($dir, 0766, true); chmod($dir, 0766); chmod($dir, 0777); }else if(!is_writable($dir)) { chmod($dir, 0766); chmod($dir, 0777); if(!is_writable($dir)) { throw new FileSystemException("目录 $dir 不可写"); } } } //第三方数据录入 public function analysis_excel(){ $filename = date('Y').date('m').date('d').date('h').date('i').date('s').'.'.explode('.',$_FILES['fileArray']['name'])[1]; $tmp = $_FILES['fileArray']['tmp_name']; $error = $_FILES['fileArray']['error']; if($error > 0){ header("HTTP/1.1 404 Not Found"); echo '{"status":404,"message":'.$_FILES["fileArray"]["error"].'}'; }else{ $path = 'upload/'.date('Y').'/'.date('m').'/'; $this->ensure_writable_dir($path); if(move_uploaded_file($tmp,$path.$filename)){ require_once "PHPExcel/IOFactory.php"; $phpExcel = PHPExcel_IOFactory::load($path.$filename); //创建返回的数组 $data = []; foreach ($phpExcel->getSheetNames() as $key=>$destination){ $data[$key] = new stdClass(); $data[$key]->destination = $destination; $data[$key]->list_name = array(); $data[$key]->list_data = array(); //循环获取每个表格的行/列数 $row = $phpExcel->getSheet($key)->getHighestRow(); $column = $phpExcel->getSheet($key)->getHighestColumn(); $j = 0; // 行数循环 for ($i = 1; $i <= $row; $i++) { // 列数循环 for ($c = 'A'; $c <= $column; $c++) { if($phpExcel->getSheet($key)->getCell('A' . $i)->getValue() == ''){ continue; }else{ if($i == 1){ array_push($data[$key]->list_name,$phpExcel->getSheet($key)->getCell($c . $i)->getValue()); }else{ $data[$key]->list_data[$j][] = $phpExcel->getSheet($key)->getCell($c . $i)->getValue(); } } } $j++; } } //返回处理完后的json print_r(json_encode($data)); }else{ header("HTTP/1.1 404 Not Found"); echo '{"status":404,"message":"文件上传失败!","picname":""}'; } } } public function get_destination_reviews($destination = null){ $ta_website = $this->config->item('tripadvisor_website'); //根据传入的目的地简码获取TA的相应评论列表 if(isset($ta_website[$destination])){ $url = $ta_website[$destination]; //根据url获取页面内容 $content = GET_HTTP($url); //进行页面解析 $html_object = str_get_html($content); //获取第一页列表上的url foreach ($html_object->find('.reviewSelector .quote a') as $a_info){ $url = 'https://www.tripadvisor.com'.$a_info->href; } } } function get_reviews_detail(){ set_time_limit(0); $url = $this->input->get_post('url'); $destination = $this->input->get_post('destination'); $html_num = $this->input->get_post('html_num'); //$url = 'https://www.tripadvisor.com/ShowUserReviews-g294212-d4006739-r666168101-The_Trippest_Mini_Group_Tours-Beijing.html'; $destination = 'tp_Beijing'; if($url != ''){ $content = GET_HTTP($url); $html_object = str_get_html($content); //做一个数组用于存储数据 $detail_data = new stdClass(); $detail_data->destination = $destination; //提取局部,不做整个页面的寻找元素,提升效率 $meta_inner = $html_object->find('.meta_inner'); foreach($meta_inner as $detail_info){ //记录该条记录的id $detail_data->html_id = $html_num; //获取评论者帐号 foreach($detail_info->find('.info_text') as $review_name){ $detail_data->review_name = $review_name->first_child()->innertext; } //获取评论者ID foreach($detail_info->find('.reviewSelector') as $review_id){ $detail_data->review_id = str_replace('review_','',$review_id->id); } //获取标题 foreach($detail_info->find('#HEADING') as $title){ $detail_data->title = $title->innertext; } //获取星级 foreach($detail_info->find('.ui_bubble_rating') as $star_nums){ $detail_data->star_nums = str_replace('ui_bubble_rating ','',$star_nums->getAttribute('class')); $detail_data->star_nums = str_replace(array('bubble_50','bubble_40','bubble_30','bubble_20'),array(5,4,3,2),$detail_data->star_nums); } //获取评论内容 foreach($detail_info->find('.partial_entry .fullText') as $content){ $detail_data->content = $content->innertext; } //获取评论时间 foreach($detail_info->find('.prw_reviews_stay_date_hsx') as $review_date){ $detail_data->review_date = str_replace('Date of experience: ','',$review_date->innertext); } } //拿到数据后进行入库 print_r(json_encode($detail_data)); } } }