|
|
<?php
|
|
|
|
|
|
if (!defined('BASEPATH')) exit('No direct script access allowed');
|
|
|
|
|
|
class Index extends CI_Controller
|
|
|
{
|
|
|
|
|
|
public function __construct()
|
|
|
{
|
|
|
parent::__construct();
|
|
|
$this->load->library('simple_html_dom_lib');
|
|
|
}
|
|
|
|
|
|
public function index()
|
|
|
{
|
|
|
$this->load->view('welcome');
|
|
|
}
|
|
|
|
|
|
public function optimize()
|
|
|
{
|
|
|
$htmlsource = $this->input->post('htmlsource');
|
|
|
$websitehost = $this->input->post('websitehost');
|
|
|
$template_name = $this->input->post('template_name');
|
|
|
$create_amp = $this->input->post('create_amp');
|
|
|
$debug = $this->input->post('debug');
|
|
|
if (empty($htmlsource) || empty($websitehost)) {
|
|
|
$this->output->set_status_header(500);
|
|
|
echo 'error:htmlsource or websitehost is empty!';
|
|
|
log_message('error', "htmlsource or websitehost is empty! " . $websitehost);
|
|
|
return false;
|
|
|
}
|
|
|
//域名后面不能有/
|
|
|
if (substr($websitehost, -1, 1) == '/') {
|
|
|
$websitehost = substr($websitehost, 0, -1);
|
|
|
}
|
|
|
|
|
|
$html_object = str_get_html($htmlsource);
|
|
|
if (!empty($html_object)) {
|
|
|
|
|
|
//提取和下载所有CSS样式,包括链接文件和页面样式
|
|
|
$link_css_array = array();
|
|
|
$css_content = '';
|
|
|
foreach ($html_object->find('link') as $link_css) {
|
|
|
if ($link_css->rel == 'stylesheet' && !empty($link_css->href)) {
|
|
|
$link_css_array[] = $link_css->href;
|
|
|
$link_css->outertext = ''; //删除链接
|
|
|
}
|
|
|
}
|
|
|
//print_r($link_css_array);
|
|
|
foreach ($link_css_array as $item) {
|
|
|
$get_http_temp = GET_HTTP($this->format_url($item, $websitehost));
|
|
|
if ($get_http_temp == false) {
|
|
|
$this->output->set_status_header(404);
|
|
|
echo 'CSS文件下载错误';
|
|
|
log_message('error', "optimize CSS文件下载错误! " . $item);
|
|
|
return FALSE;
|
|
|
}
|
|
|
$css_content .= $get_http_temp;
|
|
|
}
|
|
|
foreach ($html_object->find('style') as $style_css) {
|
|
|
if ($style_css->type == "text/css") {
|
|
|
$css_content .= $style_css->innertext;
|
|
|
}
|
|
|
}
|
|
|
// echo $css_content;
|
|
|
// echo $html_object;die();
|
|
|
//
|
|
|
//提取和下载所有JS脚本,包括链接文件和页面脚本
|
|
|
$link_js_array = array();
|
|
|
$js_inline_content = '';
|
|
|
$js_jquery_content = '';
|
|
|
foreach ($html_object->find('script') as $link_script) {
|
|
|
if (!empty($link_script->src)) {
|
|
|
if (empty($link_script->defer) && empty($link_script->async)) { //没有defer标记的才处理,否则让js延后加载
|
|
|
$link_js_array[] = $link_script->src;
|
|
|
$link_script->outertext = ''; //删除链接,移动到页底
|
|
|
}
|
|
|
} else {
|
|
|
//网页内的js不需要提取
|
|
|
//$js_content.= $link_script->innertext;//js的内容
|
|
|
// $js_content.= $link_script;//js的内容,包含<script>标签
|
|
|
//含有$表示调用了jquery的函数,添加延迟加载defer
|
|
|
//包含gtm的js引入就跳出本次循环
|
|
|
if(strpos($link_script, 'https://www.googletagmanager.com/gtm.js')){
|
|
|
continue;
|
|
|
}
|
|
|
if (strpos($link_script, '$') !== FALSE) {
|
|
|
$js_jquery_content .= $link_script->innertext;
|
|
|
} else {
|
|
|
$js_inline_content .= $link_script; //js的内容,包含<script>标签
|
|
|
}
|
|
|
$link_script->outertext = ''; //删除js,移动到页底
|
|
|
}
|
|
|
}
|
|
|
|
|
|
$js_scr_content = '';
|
|
|
foreach ($link_js_array as $item) {
|
|
|
$get_http_temp = GET_HTTP($this->format_url($item, $websitehost));
|
|
|
if ($get_http_temp == false) {
|
|
|
$this->output->set_status_header(404);
|
|
|
echo 'JS文件下载错误';
|
|
|
log_message('error', "optimize js文件下载错误! " . $item);
|
|
|
return FALSE;
|
|
|
}
|
|
|
$js_scr_content .= $get_http_temp;
|
|
|
}
|
|
|
//echo $js_content;
|
|
|
//把网页内容和css提交到purifycss处理
|
|
|
$purifycss_server = 'http://184.172.113.216:33033/';
|
|
|
if (strpos($websitehost, 'chinarundreisen.com') !== false) {//德语站点使用自己的css处理服务器
|
|
|
$purifycss_server = 'http://158.177.67.52:33033/';
|
|
|
}
|
|
|
$optimize_css = GET_HTTP($purifycss_server, 'html_source=' . urlencode($htmlsource) . '&html_css=' . urlencode($css_content), 'POST');
|
|
|
if (empty($optimize_css)) {
|
|
|
$this->output->set_status_header(500);
|
|
|
echo 'css精简错误';
|
|
|
return FALSE;
|
|
|
}
|
|
|
|
|
|
//把精简的css添加到head前面
|
|
|
$html_object = str_replace('</head>', '<style type="text/css">' . $optimize_css . "</style></head>", $html_object);
|
|
|
//删除多余空格和换行符
|
|
|
$html_object = str_replace(array(" ", "\t", "\n", "\r"), " ", $html_object);
|
|
|
//循环n次,把双空格替换为一个空格
|
|
|
for ($i = 0; $i <= 4; $i++) {
|
|
|
$html_object = str_replace(" ", " ", $html_object);
|
|
|
}
|
|
|
|
|
|
//在最后加载原始css文件和js文件
|
|
|
//把css移动到页面底部,延迟加载
|
|
|
$lastload_js = '<noscript id="deferred-styles">';
|
|
|
foreach ($link_css_array as $item) {
|
|
|
$lastload_js .= '<link rel="stylesheet" type="text/css" href="' . $item . '"/>';
|
|
|
}
|
|
|
$lastload_js .= '</noscript><script>var loadDeferredStyles=function(){var addStylesNode=document.getElementById("deferred-styles");var replacement=document.createElement("div");replacement.innerHTML=addStylesNode.textContent;document.body.appendChild(replacement);addStylesNode.parentElement.removeChild(addStylesNode)};var raf=requestAnimationFrame||mozRequestAnimationFrame||webkitRequestAnimationFrame||msRequestAnimationFrame;if(raf){raf(function(){window.setTimeout(loadDeferredStyles,0)})}else{window.addEventListener("load",loadDeferredStyles)};</script>';
|
|
|
|
|
|
$js_content = $js_scr_content . $js_jquery_content;
|
|
|
//延迟加载js,需要把返回的js代码保存到一个文件中,然后替换占位符,以便加载js文件
|
|
|
$lastload_js .= '<script async src="##DEFERSCRIPTDEFER##"></script>';
|
|
|
$lastload_js .= $js_inline_content;
|
|
|
|
|
|
$html_object = str_replace('</body>', $lastload_js . '</body>', $html_object);
|
|
|
}
|
|
|
|
|
|
//只返回AMP代码
|
|
|
if (!empty($create_amp)) {
|
|
|
$amp = $this->create_amp($html_object, $template_name, $websitehost,$debug);
|
|
|
echo json_encode(array('result' => 'ok', 'data' => array('amp' => $amp)));
|
|
|
return;
|
|
|
}
|
|
|
echo json_encode(array('result' => 'ok', 'data' => array('html_object' => $html_object, 'js_content' => $js_content)));
|
|
|
|
|
|
}
|
|
|
|
|
|
//格式化url,保证请求的URL有域名,//更换为对应的域名路径
|
|
|
function format_url($url, $host = '')
|
|
|
{
|
|
|
if (substr($url, 0, 8) == 'https://' || substr($url, 0, 7) == 'http://') {
|
|
|
$url = str_replace('http://', 'https://', $url);//http要改为https
|
|
|
return urldecode($url);
|
|
|
}
|
|
|
|
|
|
if (substr($url, 0, 2) == '//') { //https或http
|
|
|
return urldecode('https://'.substr($url, 2, strlen($url)-2)); //只替换开头的//,部分url不规范,有//在url中间
|
|
|
}
|
|
|
|
|
|
return urldecode($host . $url);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//生成AMP版本 用移动端版本生成
|
|
|
/*
|
|
|
1.加载站点AMP模板,模板只有导航、头部、底部
|
|
|
2.删除全部js引用和内链代码
|
|
|
3.删除全部css引用,内联css放入<style amp-custom>
|
|
|
4.图片全部替换成amp-img,设置宽高,设置layout="responsive"
|
|
|
5.去掉所有元素中的style
|
|
|
6.去掉样式表中的!important
|
|
|
7.替换读取源文件title description keywords canonical
|
|
|
8.todo:增加application/ld+json
|
|
|
要求: 图片文件必须带有域名,否则的获取尺寸会出错,如 https://data.asiahighlights.com/pic/abc.jpg
|
|
|
*/
|
|
|
//$htmlsource 移动端版本的HTML对象 $cache_path AMP文件保存路径
|
|
|
function create_amp($htmlsource, $site_code, $websitehost,$debug=false)
|
|
|
{
|
|
|
|
|
|
$html_object = str_get_html($htmlsource);
|
|
|
//读取站点的AMP模板
|
|
|
$amp_template = $this->load->view('amp-template/' . $site_code, null, true);
|
|
|
|
|
|
|
|
|
//生成移动版本,删除多余元素
|
|
|
foreach ($html_object->find('.hidden-xs') as $hidden_item) {
|
|
|
$hidden_item->outertext = '';
|
|
|
}
|
|
|
|
|
|
foreach ($html_object->find('title') as $title) {
|
|
|
$amp_template = str_replace('<!--@TITLE@-->', $title->innertext, $amp_template);
|
|
|
}
|
|
|
|
|
|
foreach ($html_object->find('meta') as $meta) {
|
|
|
if ($meta->name == 'description') {
|
|
|
$amp_template = str_replace('<!--@DESCRIPTION@-->', $meta->content, $amp_template);
|
|
|
}
|
|
|
if ($meta->name == 'keywords') {
|
|
|
$amp_template = str_replace('<!--@KEYWORDS@-->', $meta->content, $amp_template);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
foreach ($html_object->find('script') as $script) {
|
|
|
$script->outertext = '';
|
|
|
}
|
|
|
|
|
|
foreach ($html_object->find('noscript') as $noscript) {
|
|
|
$noscript->outertext = '';
|
|
|
}
|
|
|
|
|
|
foreach ($html_object->find('link') as $link) {
|
|
|
if ($link->rel == 'stylesheet' && !empty($link->href)) {
|
|
|
$link->outertext = '';
|
|
|
}
|
|
|
if ($link->rel == 'canonical' && !empty($link->href)) {
|
|
|
$amp_template = str_replace('<!--@CANONICAL@-->', $link->href, $amp_template);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
$style_content = '';
|
|
|
foreach ($html_object->find('style') as $style) {
|
|
|
$style_content .= $style->innertext;
|
|
|
$style->outertext = '';
|
|
|
}
|
|
|
$style_content = str_replace('!important', '', $style_content);
|
|
|
$style_content = str_replace('@charset "utf-8";', '', $style_content);
|
|
|
$amp_template = str_replace('/*@CUSTOM-CSS@*/', $style_content, $amp_template);
|
|
|
|
|
|
|
|
|
//获取图片尺寸,AMP要求必须填写width和height
|
|
|
|
|
|
//格式化图片URL
|
|
|
foreach ($html_object->find('img') as $image) {
|
|
|
if (!empty($image->src)) {
|
|
|
$img_src_host = parse_url($image->src, PHP_URL_HOST);
|
|
|
if (empty($img_src_host)) {
|
|
|
$img_src_host = $websitehost;
|
|
|
}
|
|
|
$image->src = $this->format_url($image->src, $img_src_host);
|
|
|
}
|
|
|
if (!empty($image->originalsrc)) {
|
|
|
$img_src_host = parse_url($image->src, PHP_URL_HOST);
|
|
|
if (empty($img_src_host)) {
|
|
|
$img_src_host = $websitehost;
|
|
|
}
|
|
|
$image->originalsrc = $this->format_url($image->originalsrc, $img_src_host);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
//拼接请求的图片url,如 https://data.asiahighlights.com/imagesize.php?photo=/pic/logo-ah.png,/pic/ah-slide-logo.png
|
|
|
$request_size = array();
|
|
|
foreach ($html_object->find('img') as $image) {
|
|
|
$img_src = $image->src;
|
|
|
if (!empty($image->originalsrc)) {
|
|
|
$img_src = $image->originalsrc;
|
|
|
}
|
|
|
if (strpos($img_src, 'facebook.com') !== false || strpos($img_src, 'dmca.com') !== false) {//facebook dmca会添加一个跟踪代码,需要删除掉,否则获取图片尺寸会报错
|
|
|
$image->outertext = '';
|
|
|
continue;
|
|
|
}
|
|
|
$img_src_urls = parse_url($img_src);
|
|
|
$request_size[$img_src_urls['host']][] = $img_src_urls['path'];
|
|
|
}
|
|
|
if($debug){
|
|
|
print_r($request_size);
|
|
|
}
|
|
|
|
|
|
//请求图片尺寸
|
|
|
$image_sizes = array();
|
|
|
foreach ($request_size as $host => $path) {
|
|
|
$parse_url = "https://{$host}/imagesize.php?photo=" . implode(',', $path);
|
|
|
$size_data = GET_HTTP($parse_url);
|
|
|
if (!empty($size_data)) {
|
|
|
$size_data = json_decode($size_data);
|
|
|
foreach ($size_data as $size_item) {
|
|
|
$size_item->photo = "https://{$host}" . $size_item->photo;
|
|
|
$image_sizes[$size_item->photo] = $size_item;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
if($debug){
|
|
|
print_r($image_sizes);
|
|
|
}
|
|
|
|
|
|
|
|
|
//获取图片尺寸 end
|
|
|
foreach ($html_object->find('img') as $image) {
|
|
|
$img_src = $image->src;
|
|
|
if (!empty($image->originalsrc)) {
|
|
|
$img_src = $image->originalsrc;
|
|
|
}
|
|
|
if(empty($image_sizes[$img_src])){
|
|
|
//如果尺寸不存在则跳过
|
|
|
continue;
|
|
|
}
|
|
|
$img_size = $image_sizes[$img_src];
|
|
|
$img_alt = $image->alt;
|
|
|
$img_class = $image->class;
|
|
|
if (!empty($img_size)) {
|
|
|
//添加webp图片支持
|
|
|
$img_src_webp='';
|
|
|
if(substr($img_src, -4) == '.jpg'){
|
|
|
$img_src_jpg=" <amp-img fallback layout=\"responsive\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
|
|
|
|
|
|
$img_src_webp=substr($img_src, 0, strlen($img_src) - 4).'.webp';
|
|
|
|
|
|
$img_src_webp = " <amp-img layout=\"responsive\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src_webp}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\">{$img_src_jpg}</amp-img>";
|
|
|
}
|
|
|
if(!empty($img_src_webp)){
|
|
|
$image->outertext =$img_src_webp;
|
|
|
}else{
|
|
|
$image->outertext = " <amp-img layout=\"responsive\" class=\"{$img_class}\" alt=\"{$img_alt}\" src=\"{$img_src}\" width=\"{$img_size->width}\" height=\"{$img_size->height}\"></amp-img>";
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
foreach ($html_object->find('#content_main') as $content_main) {
|
|
|
$amp_template = str_replace('<!--@CUSTOM-CONENT@-->', $content_main, $amp_template);
|
|
|
}
|
|
|
|
|
|
//替换掉写在组件上的样式
|
|
|
$amp_template = preg_replace('/style=.+?[\'|"]/i', '', $amp_template);
|
|
|
|
|
|
return $amp_template;
|
|
|
}
|
|
|
|
|
|
} |