Forum Samples, Tips and Tricks

Automatic Index Generation in Prince with Javascript

DaveInOhio
This self-contained HTML file shows how Prince can use Javascript to automatically generate an Index. It builds on my earlier simple example—Index and Cross-references with Prince - A simple example—which demonstrates the basic approach to using CSS-generated counters in manually-constructed Index entries, and includes many other useful comments.

This exercise simply adds a reasonably-well annotated Javascript that scans the document for instances of the term to be indexed, and fills the Index UL with entries that point the the associated entries.

<html>

<style>
/* Basic Settings */
body                            {font-family: sans-serif; 
                                 line-height: 140%;
                                }
h1                              {line-height: normal;}
ul                              {margin: 0; 
                                 padding:0;
                                }
li                              {list-style-type: none;}
/* Page Setup */
@page                           {size: letter;
                                 margin: .5in;
                                }
/* Setup Counter */
#entries                        {counter-reset: itemNum;}
#entries li                     {counter-increment: itemNum;}
/* Apply Generated Content */
#entries li::before             {content: " #" counters(itemNum, ".") ": ";}
#emailIndex a::after            {content: target-counter(attr(href), itemNum);}
/* Style Index */
#emailIndex span                {display: inline-block;
                                 position: relative;
                                 width: 2.5in;
                                 overflow: hidden;
                                }
#emailIndex span::after         {position: absolute;
                                 content: "....................................................";
                                }
</style>

<body onload="indexTerms();">
<h1>Automatic Index Generation in <i>Prince</i> with <i>Javascript</i></h1>
<h2>Introduction</h2>
<p>This self-contained HTML file shows how <i>Prince</i> can use Javascript to automatically generate an Index. It builds on my earlier simple example which demonstrates the basic approach to using CSS-generated counters in manually-constructed Index entries, and includes many other useful comments.</p>
<p>This exercise simply adds a reasonably-well annotated Javascript that scans the document for instances of the term to be indexed, and fills the Index UL with entries that point to the associated entries.</p>
<h2>Entries to be indexed</h2>
<ul id="entries">
    <li>
        <span class="name">Joe</span>, 
        <span class="zip">43016</span>,
		<span class="email">Joe@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr2Again@Domain</span>
    </li>
    <li>
        <span class="name">Sally</span>, 
        <span class="zip">43018</span>,
		<span class="email">Sally@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>
        <span class=email>Addr4@Domain</span>
    </li>
    <li>
        <span class="name">Shirley</span>, 
        <span class="zip">43018</span>,
		<span class="email">Shirley@gmail.com</span>, 
        <span class=email>Addr6@Domain</span>, 
    </li>
    <li>
        <span class="name">John</span>, 
        <span class="zip">43022</span>,
		<span class="email">John@gmail.com</span>, 
        <span class=email>Addr6@NewDomain</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr2@Domain</span>
    </li>
    <li>
        <span class="name">George</span>, 
        <span class="zip">43016</span>,
		<span class="email">George@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr6@NewDomain</span>, 
    </li>
    <li>
        <span class="name">Victoria</span>, 
        <span class="zip">43018</span>,
		<span class="email">Victoria@gmail.com</span>, 
        <span class=email>Addr2again@Domain</span>
        <span class=email>Addr2@Domain</span>
    </li>
    <li>
        <span class="name">Phyllis</span>, 
        <span class="zip">43018</span>,
		<span class="email">Phyllis@gmail.com</span>, 
        <span class=email>Addr6@Domain</span>, 
        <span class=email>Addr4@Domain</span>
    </li>
    <li>
        <span class="name">Wendy</span>, 
        <span class="zip">43022</span>,
		<span class="email">Wendy@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr2@Domain</span>
    </li>
</ul>
<h2>Email Index</h2>
<ul id="emailIndex"></ul>
</body>

<script type="text/javascript">
function indexTerms() {
/*
Populates the Index UL with LI elements, each containing an email address
followed by the Item Number for each of the Entries that contain it.

The Item Numbers are hyperlinks to the Entries, which must therefore have
a unique ID attribute. This routine adds an ID to the Entry if necessary.
*/
    var i;
    var j;
    // Get all of the LI elements in UL "entries"
    var entryLIs = document.getElementById("entries").getElementsByTagName("li");
    // Loop thru each LI.
    // Record each different value of the term to be indexed,
    // and the hyperlink reference ID of each entry that contains that value.
    var liID;
    var emailSpans;
    var termText;
    var termList = new Array();
    var termIdx;
    var linkIDs = new Array();
    var linkIdx;
    for (i=0; i < entryLIs.length; i++) {
        // if LI doesn't have an ID, give it one so it can be target of a LINK
        liID = entryLIs[i].getAttribute("id");
        if (liID == null) {
            liID = "entries" + i;
            entryLIs[i].setAttribute("id", liID);
        }
        // Find all SPANs with CLASS "email",
        emailSpans = entryLIs[i].getElementsByClassName("email");
        // Loop thru them:
        // - add each email value to an array -- one copy only; no duplicates
        // - add the corresponding hyperlink ID to an array -- one copy only; no duplicates
        for (j=0; j < emailSpans.length; j++) {
            termText = emailSpans[j].innerHTML;
            termIdx = termList.indexOf(termText);
            if (termIdx < 0) {
                termList.push(termText);
                termIdx = termList.length-1;
                linkIDs.push(new Array());
            }
            linkIdx = linkIDs[termIdx].indexOf(liID);
            if (linkIdx < 0) {
                linkIDs[termIdx].push(liID);
            }
        }
    }
    // Sort by email
    // First put into an array of objects along with the associated hyperlink IDs
    var terms = new Array();
    for (i=0; i < termList.length; i++) {
        terms.push( {"text": termList[i], "refs": linkIDs[i] } );
    }
    terms.sort(function(a,b) {
        if (a.text > b.text) {return 1;}
        if (a.text < b.text) {return -1;}
        return 0;
    })
    // write an LI into the Index UL for each Term
    indexUL = document.getElementById("emailIndex");
    for (i=0; i < terms.length; i++) {
        // convert all the hyperlink IDs into a string of "a" elements
        var refStr = "";
        for (j=0; j < terms[i].refs.length; j++) {
            refStr += ', ' + '<a href="#' + terms[i].refs[j] + '"></a>';
        }
        var li = document.createElement("li");
        li.innerHTML = '<span>' + terms[i].text + '</span>' + refStr.substr(2);
        indexUL.appendChild(li); 
    }
}
</script>

</html>
hallvord
Hi, thanks for a useful script - a nifty way to use target-counter styling :)

May I suggest a couple of changes?

1) Regarding the
termText = emailSpans[j].innerHTML;

statement, I would recommend not using innerHTML here - if the span contains embedded markup or HTML entities, they may appear as text or even mess up your markup and styling where the index is generated. Use a property like .textContent instead.

2) Rather than use two different arrays and link values by index (termList and linkIDs) you can use an object with the texts as property names and refs as values. JavaScript objects are very versatile :) Something like
var terms = {};
for (j=0; j < emailSpans.length; j++) {
    termText = emailSpans[j].textContent;
    if(!terms[termsText]){
        terms[termsText] = [liID];
    }else{
        terms[termsText].push(liID);
    }
}


You can now get a sorted list of all E-mails being indexed in this way:
Object.keys(terms).sort();

And you could iterate over that list to generate the new markup.

Just suggestions, the script as-is works very well!
-Hallvord

Edit: fixed missing ! in if statement - ops..

Announcement: repos for tests/utils

Edited by hallvord

DaveInOhio
Thank you, Hallvord. It's great that you take the time to help me learn to code better.

I'm not sure I implemented all your suggestions correctly, but it's great to have made the routine more compact. Here is my revised file.

<html>

<style>
/* Basic Settings */
body                            {font-family: sans-serif; 
                                 line-height: 140%;
                                }
h1                              {line-height: normal;}
ul                              {margin: 0; 
                                 padding:0;
                                }
li                              {list-style-type: none;}
/* Page Setup */
@page                           {size: letter;
                                 margin: .5in;
                                }
/* Setup Counter */
#entries                        {counter-reset: itemNum;}
#entries li                     {counter-increment: itemNum;}
/* Apply Generated Content */
#entries li::before             {content: " #" counters(itemNum, ".") ": ";}
#emailIndex a::after            {content: target-counter(attr(href), itemNum);}
/* Style Index */
#emailIndex span                {display: inline-block;
                                 position: relative;
                                 width: 2.5in;
                                 overflow: hidden;
                                }
#emailIndex span::after         {position: absolute;
                                 content: "....................................................";
                                }
</style>

<body onload="indexTerms();">
<h1>Automatic Index Generation in <i>Prince</i> with <i>Javascript</i></h1>
<h2>Introduction</h2>
<p>This self-contained HTML file shows how <i>Prince</i> can use Javascript to automatically generate an Index. It builds on my earlier simple example which demonstrates the basic approach to using CSS-generated counters in manually-constructed Index entries, and includes many other useful comments.</p>
<p>This exercise simply adds a reasonably-well annotated Javascript that scans the document for instances of the term to be indexed, and fills the Index UL with entries that point to the associated entries.</p>
<h2>Entries to be indexed</h2>
<ul id="entries">
    <li>
        <span class="name">Joe</span>, 
        <span class="zip">43016</span>,
		<span class="email">Joe@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr2Again@Domain</span>
    </li>
    <li>
        <span class="name">Sally</span>, 
        <span class="zip">43018</span>,
		<span class="email">Sally@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>
        <span class=email>Addr4@Domain</span>
    </li>
    <li>
        <span class="name">Shirley</span>, 
        <span class="zip">43018</span>,
		<span class="email">Shirley@gmail.com</span>, 
        <span class=email>Addr6@Domain</span>, 
    </li>
    <li>
        <span class="name">John</span>, 
        <span class="zip">43022</span>,
		<span class="email">John@gmail.com</span>, 
        <span class=email>Addr6@NewDomain</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr2@Domain</span>
    </li>
    <li>
        <span class="name">George</span>, 
        <span class="zip">43016</span>,
		<span class="email">George@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr6@NewDomain</span>, 
    </li>
    <li>
        <span class="name">Victoria</span>, 
        <span class="zip">43018</span>,
		<span class="email">Victoria@gmail.com</span>, 
        <span class=email>Addr2again@Domain</span>
        <span class=email>Addr2@Domain</span>
    </li>
    <li>
        <span class="name">Phyllis</span>, 
        <span class="zip">43018</span>,
		<span class="email">Phyllis@gmail.com</span>, 
        <span class=email>Addr6@Domain</span>, 
        <span class=email>Addr4@Domain</span>
    </li>
    <li>
        <span class="name">Wendy</span>, 
        <span class="zip">43022</span>,
		<span class="email">Wendy@gmail.com</span>, 
        <span class=email>Addr2@Domain</span>, 
        <span class=email>Addr2@Domain</span>
    </li>
</ul>
<h2>Email Index</h2>
<ul id="emailIndex"></ul>
</body>

<script type="text/javascript">
function indexTerms() {
/*
Populates the Index UL with LI elements, each containing an email address
followed by the Item Number for each of the Entries that contain it.

The Item Numbers are hyperlinks to the Entries, which must therefore have
a unique ID attribute. This routine adds an ID to the Entry if necessary.
*/
    var i;
    var j;
    // Get all of the LI elements in UL "entries"
    var entryLIs = document.getElementById("entries").getElementsByTagName("li");
    // Loop thru each LI.
    // Record each different value of the term to be indexed,
    // and the hyperlink reference ID of each entry that contains that value.
    var liID;
    var emailSpans;
    var termText;
    var values = {};
    for (i=0; i < entryLIs.length; i++) {
        // if LI doesn't have an ID, give it one so it can be target of a LINK
        liID = entryLIs[i].getAttribute("id");
        if (liID == null) {
            liID = "entries" + i;
            entryLIs[i].setAttribute("id", liID);
        }
        // Find all SPANs with CLASS "email",
        emailSpans = entryLIs[i].getElementsByClassName("email");
        // Loop thru them:
        // - add each email value as a property of an object -- one copy only; no duplicates
        // - add the corresponding hyperlink ID to an array as the property value -- one copy only; no duplicates
        for (j=0; j < emailSpans.length; j++) {
            termText = emailSpans[j].textContent;
            if (termText in values) {
                if (values[termText].indexOf(liID) < 0) {
                    values[termText].push(liID);
                }
            } else {
                values[termText] = [liID];
            }
        }
    }
    // Sort by email
    var sortedVals = Object.keys(values).sort();
    // write an LI into the Index UL for each Term
    indexUL = document.getElementById("emailIndex");
    for (i=0; i < sortedVals.length; i++) {
        // convert all the hyperlink IDs into a string of "a" elements
        var refStr = "";
        for (j=0; j < values[sortedVals[i]].length; j++) {
            refStr += ', ' + '<a href="#' + values[sortedVals[i]][j] + '"></a>';
        }
        var li = document.createElement("li");
        li.innerHTML = '<span>' + sortedVals[i] + '</span>' + refStr.substr(2);
        indexUL.appendChild(li); 
    }
}
</script>

</html>
hallvord
Looks good - and you caught the case I hadn't thought of where a term might occur several times inside a LI, yet only one link/number should appear :)

I'll keep this in mind and hopefully I'm going to need it some day because it would be fun. I wasn't really familiar with target-counter but it opens up some interesting possibilites!

Announcement: repos for tests/utils

DaveInOhio
Thanks again, Hallvord. Prince, Javascript, HTML, and CSS is an ongoing learning experience for me that I'm enjoying immensely. Thanks again for your assistance!
csant
Coming a bit late - but you might simplify the rule for #emailIndex span::after into:
#emailIndex span::after {
  content: leader(".");
}
DaveInOhio
Thank you @csant. That's nifty indeed! I haven't tried it out yet, but love the idea of such a simple solution.