Forum Samples, Tips and Tricks

Generating a TOC with PHP

ict4schools
Hello, I have build some code to generate Table Of Contents in PHP.

I did the following:
  • First give every header an unique ID (because we are using PrinceXML) and is a good pratice.
  • Then create an OL/LI structure of it, although that piece of code can contain bugs.

See the following code:
<?php
	$matches = null;
	$smatches = null;
	$had_headers = array();
	preg_match_all('/<h[0-9].*?>.*?<\/h[0-9]>/i', $content, $matches);
	if (!empty($matches[0]) && count($matches[0]) > 0)
	foreach ($matches[0] as $headertag) {
		preg_match('/>(.*?)<\/(h[0-9])>/i', $headertag, $smatches);
	
		if (!empty($smatches[1]) && count($smatches[1]) > 0) {
			$headerid = strip_tags($headertag);
			$headerid = trim(strtolower(preg_replace('/[^a-z0-9]/i', '', $headerid)));
			$smatches[2] = strtolower($smatches[2]);
			$header_depth = intval(trim(str_ireplace('h', '', $smatches[2])));
		
			while (in_array($headerid, $had_headers)) {
				$headerid .= '1';
			}
			$had_headers[] = $headerid;
		
			$content = str_replace($headertag, '<'. $smatches[2] . ' id="' . htmlentities($headerid) . '">' . $smatches[1] . '</' . $smatches[2] . '>', $content);
		}
	}

	$matches = null;
	$smatches = null;
	$toc_html = '<ol id="toc">' . "\n";
	$old_depth = 0;
	$hadfirst = false;
	preg_match_all('/<h[0-9].*?>.*?<\/h[0-9]>/i', $content, $matches);
	if (!empty($matches[0]) && count($matches[0]) > 0)
	for ($i=0; $i < count($matches[0]); $i++) {
		$headertag = $matches[0][$i];
	
		preg_match('/<h[0-9][^>]*?id="(.*?)".*?>(.*?)<\/(h[0-9])>/i', $headertag, $smatches);
	
		if (!empty($smatches[1]) && count($smatches[1]) > 0) {
			$headerid = trim($smatches[1]);
			$header_depth = intval(trim(str_ireplace('h', '', $smatches[3]))) - 1;
		
			// don't take heigher than h3 in TOC
			if ($header_depth > 2)
				continue;
		
			if ($header_depth < $old_depth) {
				$diff = $old_depth - $header_depth; //if going multiple levels up
				$toc_html .= '</li>'.str_repeat('</ol></li>', $diff);
			} elseif ($header_depth > $old_depth) {
				$toc_html .= '<ol>';
			} else {
				$toc_html .= ($hadfirst) ? '</li>' : null;
			}
		
			$toc_html .= '<li><a href="#' . $headerid . '">' . htmlentities(trim(strip_tags($smatches[2]))) . '</a>';
		
			$old_depth = $header_depth;
			$hadfirst = true;
		}
	}
	$toc_html .=  str_repeat('</li></ol>', ($old_depth + 1));


There is also a nice XSLT 2.0 version to generate a TOC of h1,h2,h3 structure:
<?xml version="1.0" encoding="utf-8"?>

<!-- toc-raw.xsl generates the TOC
without links and numbers -->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0" xmlns="http://www.w3.org/1999/xhtml" xmlns:xhtml="http://www.w3.org/1999/xhtml" exclude-result-prefixes="xhtml">
    
    <xsl:template name="toc">
        <xsl:for-each-group select="//xhtml:h1|//xhtml:h2|//xhtml:h3|//xhtml:h4|//xhtml:h5|//xhtml:h6" group-starting-with="xhtml:h1">
            <xsl:apply-templates select="." mode="toc"/>
        </xsl:for-each-group>
    </xsl:template>
    
    <xsl:template match="xhtml:h1" mode="toc">
        <xsl:if test="following::xhtml:h2[1][preceding::xhtml:h1[1]  = current-group()]">
                <ol>
                    <xsl:for-each-group select="current-group() except ." group-starting-with="xhtml:h2">
                        <xsl:apply-templates select="." mode="toc"/>
                    </xsl:for-each-group>
                </ol>
            </xsl:if>
        </xsl:template>
        
        <xsl:template match="xhtml:h2" mode="toc">
            <li>
                <xsl:value-of select="."/>
                <xsl:if test="following::xhtml:h3[1][preceding::xhtml:h2[1]  = current-group()]">
                    <ol>
                        <xsl:for-each-group select="current-group() except ." group-starting-with="xhtml:h3">
                            <xsl:apply-templates select="." mode="toc"/>
                        </xsl:for-each-group>
                    </ol>
                </xsl:if>
            </li>
        </xsl:template>
        
        <xsl:template match="xhtml:h3" mode="toc">
            <li>
                <xsl:value-of select="."/>
                <xsl:if test="following::xhtml:h4[1][preceding::xhtml:h3[1] = current-group()]">
                    <ol>
                        <xsl:for-each-group select="current-group() except ." group-starting-with="xhtml:h4">
                            <xsl:apply-templates select="." mode="toc"/>
                        </xsl:for-each-group>
                    </ol>
                </xsl:if>
            </li>
        </xsl:template>
        
        <xsl:template match="xhtml:h4" mode="toc">
            <li>
                <xsl:value-of select="."/>
                <xsl:if test="following::xhtml:h5[1][preceding::xhtml:h4[1] = current-group()]">
                    <ol>
                        <xsl:for-each-group select="current-group() except ." group-starting-with="xhtml:h5">
                            <xsl:apply-templates select="." mode="toc"/>
                        </xsl:for-each-group>
                    </ol>
                </xsl:if>
            </li>
        </xsl:template>
        
        <xsl:template match="xhtml:h5" mode="toc">
            <li>
                <xsl:value-of select="."/>
                <xsl:if test="following::xhtml:h6[1][preceding::xhtml:h5[1] = current-group()]">
                    <ol>
                        <xsl:for-each-group select="current-group() except ." group-starting-with="xhtml:h6">
                            <xsl:apply-templates select="." mode="toc"/>
                        </xsl:for-each-group>
                    </ol>
                </xsl:if>
            </li>
        </xsl:template>
        
        <xsl:template match="xhtml:h6" mode="toc">
            <li>
                <xsl:value-of select="."/>
            </li>
        </xsl:template>
    </xsl:stylesheet>


using the following CSS:
/* toc */

#toc {
	page-break-after: always;	
}

#toc  > li > a::before {
	content: target-counter(attr(href), h1) ". ";
}

#toc  > li > ol > li > a::before {
	content: target-counter(attr(href), h1) "." target-counter(attr(href), h2) ". ";
}

#toc  > li > ol > li > ol > li > a::before {
	content: target-counter(attr(href), h1) "." target-counter(attr(href), h2) "." target-counter(attr(href), h3) ". ";
}

#toc a::after {
	content: leader(".") target-counter(attr(href), page);
}
 	
#toc, #toc ol {
	page: table-of-contents;
	list-style-type: none;
}

#toc::before {
	content: "Table of Contents";
	font-size:12pt;
	line-height: 1.2;
	padding-top: 1em;
	margin: 2em 0 0.5em 0;
  	page-break-after: avoid;
  	clear: both;
	font-weight:bold;
}		
#toc li::marker { width: 2.4cm }
#toc li.left::marker { text-align: left }
#toc li.center::marker { text-align: center }
#toc li.right::marker { text-align: right }

/* toc links */

#toc a {
	text-decoration: none;
	color: #0000FF;
}

#toc a:link, #toc a:active, #toc a:visited {
  	color: black; 
  	background: transparent;
  	text-decoration: none;
}

#toc a:visited {
	background: transparent;
}
	
@page table-of-contents {
	counter-reset: page 1;
	counter-reset: pages 1;
}

/* lists */
	
#toc { 
  margin: 0.5em 0 0 12pt;
  padding: 0;
}

#toc > li > ol {
	margin-left: 12pt;
}

#toc > li > ol > li > ol {
	margin-left: 19pt;
}

#toc li {
  margin: 0.4em 0;
  padding: 0;
}


For the people who want an example, this could be handy.
mikeday
Thanks! :D