欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  后端开发

将网页导出为Word文档的php代码介绍

程序员文章站 2022-05-04 13:00:13
...
  1. /**
  2. * 根据html代码获取word文档内容
  3. * 创建一个本质为mht的文档,该函数会分析文件内容并从远程下载页面中的图片资源
  4. * 该函数依赖于类MhtFileMaker
  5. * 该函数会分析img标签,提取src的属性值。但是,src的属性值必须被引号包围,否则不能提取
  6. *
  7. * @param string $content HTML内容
  8. * @param string $absolutePath 网页的绝对路径。如果HTML内容里的图片路径为相对路径,那么就需要填写这个参数,来让该函数自动填补成绝对路径。这个参数最后需要以/结束
  9. * @param bool $isEraseLink 是否去掉HTML内容中的链接
  10. */
  11. function getWordDocument( $content , $absolutePath = "" , $isEraseLink = true )
  12. {
  13. $mht = new MhtFileMaker();
  14. if ($isEraseLink)
  15. $content = preg_replace('/(\s*.*?\s*)/i' , '$1' , $content); //去掉链接
  16. $images = array();
  17. $files = array();
  18. $matches = array();
  19. //这个算法要求src后的属性值必须使用引号括起来
  20. if ( preg_match_all('/将网页导出为Word文档的php代码介绍/i',$content ,$matches ) )
  21. {
  22. $arrPath = $matches[1];
  23. for ( $i=0;$i{
  24. $path = $arrPath[$i];
  25. $imgPath = trim( $path );
  26. if ( $imgPath != "" )
  27. {
  28. $files[] = $imgPath;
  29. if( substr($imgPath,0,7) == 'http://')
  30. {
  31. //绝对链接,不加前缀
  32. }
  33. else
  34. {
  35. $imgPath = $absolutePath.$imgPath;
  36. }
  37. $images[] = $imgPath;
  38. }
  39. }
  40. }
  41. $mht->AddContents("tmp.html",$mht->GetMimeType("tmp.html"),$content);
  42. for ( $i=0;$i{
  43. $image = $images[$i];
  44. if ( @fopen($image , 'r') )
  45. {
  46. $imgcontent = @file_get_contents( $image );
  47. if ( $content )
  48. $mht->AddContents($files[$i],$mht->GetMimeType($image),$imgcontent);
  49. }
  50. else
  51. {
  52. echo "file:".$image." not exist!
    ";
  53. }
  54. }
  55. return $mht->GetFile();
  56. }
复制代码

使用方法:

  1. $fileContent = getWordDocument($content,"http://www.yoursite.com/Music/etc/");
  2. $fp = fopen("test.doc", 'w');
  3. fwrite($fp, $fileContent);
  4. fclose($fp);
复制代码

其中,$content变量应该是HTML源代码,后面的链接应该是能填补HTML代码中图片相对路径的URL地址 注意,在使用这个函数之前,您需要先包含类MhtFileMaker,这个类可以帮助我们生成Mht文档。

  1. /***********************************************************************
  2. Class: Mht File Maker
  3. Version: 1.2 beta
  4. link: http://bbs.it-home.org
  5. Author: Wudi
  6. Description: The class can make .mht file.
  7. ***********************************************************************/
  8. class MhtFileMaker{
  9. var $config = array();
  10. var $headers = array();
  11. var $headers_exists = array();
  12. var $files = array();
  13. var $boundary;
  14. var $dir_base;
  15. var $page_first;
  16. function MhtFile($config = array()){
  17. }
  18. function SetHeader($header){
  19. $this->headers[] = $header;
  20. $key = strtolower(substr($header, 0, strpos($header, ':')));
  21. $this->headers_exists[$key] = TRUE;
  22. }
  23. function SetFrom($from){
  24. $this->SetHeader("From: $from");
  25. }
  26. function SetSubject($subject){
  27. $this->SetHeader("Subject: $subject");
  28. }
  29. function SetDate($date = NULL, $istimestamp = FALSE){
  30. if ($date == NULL) {
  31. $date = time();
  32. }
  33. if ($istimestamp == TRUE) {
  34. $date = date('D, d M Y H:i:s O', $date);
  35. }
  36. $this->SetHeader("Date: $date");
  37. }
  38. function SetBoundary($boundary = NULL){
  39. if ($boundary == NULL) {
  40. $this->boundary = '--' . strtoupper(md5(mt_rand())) . '_MULTIPART_MIXED';
  41. } else {
  42. $this->boundary = $boundary;
  43. }
  44. }
  45. function SetBaseDir($dir){
  46. $this->dir_base = str_replace("\\", "/", realpath($dir));
  47. }
  48. function SetFirstPage($filename){
  49. $this->page_first = str_replace("\\", "/", realpath("{$this->dir_base}/$filename"));
  50. }
  51. function AutoAddFiles(){
  52. if (!isset($this->page_first)) {
  53. exit ('Not set the first page.');
  54. }
  55. $filepath = str_replace($this->dir_base, '', $this->page_first);
  56. $filepath = 'http://mhtfile' . $filepath;
  57. $this->AddFile($this->page_first, $filepath, NULL);
  58. $this->AddDir($this->dir_base);
  59. }
  60. function AddDir($dir){
  61. $handle_dir = opendir($dir);
  62. while ($filename = readdir($handle_dir)) {
  63. if (($filename!='.') && ($filename!='..') && ("$dir/$filename"!=$this->page_first)) {
  64. if (is_dir("$dir/$filename")) {
  65. $this->AddDir("$dir/$filename");
  66. } elseif (is_file("$dir/$filename")) {
  67. $filepath = str_replace($this->dir_base, '', "$dir/$filename");
  68. $filepath = 'http://mhtfile' . $filepath;
  69. $this->AddFile("$dir/$filename", $filepath, NULL);
  70. }
  71. }
  72. }
  73. closedir($handle_dir);
  74. }
  75. function AddFile($filename, $filepath = NULL, $encoding = NULL){
  76. if ($filepath == NULL) {
  77. $filepath = $filename;
  78. }
  79. $mimetype = $this->GetMimeType($filename);
  80. $filecont = file_get_contents($filename);
  81. $this->AddContents($filepath, $mimetype, $filecont, $encoding);
  82. }
  83. function AddContents($filepath, $mimetype, $filecont, $encoding = NULL){
  84. if ($encoding == NULL) {
  85. $filecont = chunk_split(base64_encode($filecont), 76);
  86. $encoding = 'base64';
  87. }
  88. $this->files[] = array('filepath' => $filepath,
  89. 'mimetype' => $mimetype,
  90. 'filecont' => $filecont,
  91. 'encoding' => $encoding);
  92. }
  93. function CheckHeaders(){
  94. if (!array_key_exists('date', $this->headers_exists)) {
  95. $this->SetDate(NULL, TRUE);
  96. }
  97. if ($this->boundary == NULL) {
  98. $this->SetBoundary();
  99. }
  100. }
  101. function CheckFiles(){
  102. if (count($this->files) == 0) {
  103. return FALSE;
  104. } else {
  105. return TRUE;
  106. }
  107. }
  108. function GetFile(){
  109. $this->CheckHeaders();
  110. if (!$this->CheckFiles()) {
  111. exit ('No file was added.');
  112. }
  113. $contents = implode("\r\n", $this->headers);
  114. $contents .= "\r\n";
  115. $contents .= "MIME-Version: 1.0\r\n";
  116. $contents .= "Content-Type: multipart/related;\r\n";
  117. $contents .= "\tboundary=\"{$this->boundary}\";\r\n";
  118. $contents .= "\ttype=\"" . $this->files[0]['mimetype'] . "\"\r\n";
  119. $contents .= "X-MimeOLE: Produced By Mht File Maker v1.0 beta\r\n";
  120. $contents .= "\r\n";
  121. $contents .= "This is a multi-part message in MIME format.\r\n";
  122. $contents .= "\r\n";
  123. foreach ($this->files as $file) {
  124. $contents .= "--{$this->boundary}\r\n";
  125. $contents .= "Content-Type: $file[mimetype]\r\n";
  126. $contents .= "Content-Transfer-Encoding: $file[encoding]\r\n";
  127. $contents .= "Content-Location: $file[filepath]\r\n";
  128. $contents .= "\r\n";
  129. $contents .= $file['filecont'];
  130. $contents .= "\r\n";
  131. }
  132. $contents .= "--{$this->boundary}--\r\n";
  133. return $contents;
  134. }
  135. function MakeFile($filename){
  136. $contents = $this->GetFile();
  137. $fp = fopen($filename, 'w');
  138. fwrite($fp, $contents);
  139. fclose($fp);
  140. }
  141. function GetMimeType($filename){
  142. $pathinfo = pathinfo($filename);
  143. switch ($pathinfo['extension']) {
  144. case 'htm': $mimetype = 'text/html'; break;
  145. case 'html': $mimetype = 'text/html'; break;
  146. case 'txt': $mimetype = 'text/plain'; break;
  147. case 'cgi': $mimetype = 'text/plain'; break;
  148. case 'php': $mimetype = 'text/plain'; break;
  149. case 'css': $mimetype = 'text/css'; break;
  150. case 'jpg': $mimetype = 'image/jpeg'; break;
  151. case 'jpeg': $mimetype = 'image/jpeg'; break;
  152. case 'jpe': $mimetype = 'image/jpeg'; break;
  153. case 'gif': $mimetype = 'image/gif'; break;
  154. case 'png': $mimetype = 'image/png'; break;
  155. default: $mimetype = 'application/octet-stream'; break;
  156. }
  157. return $mimetype;
  158. }
  159. }
  160. ?>
复制代码