I am am trying to create a script for adobe acrobat that allows me to search documents for specified naming structures. An example would be if I am searching for clauses 123.123.1234 and 234.234.2345 I want it to search for everything in with the naming structure xxx.xxx.xxxx among a few other variations. I then want it to highlight all found results so that I can modify the search results and ultimately export to an excel document. I have the below script but am running into a syntax error and am very new to javascript so your insight would be greatly appreciated, thank you!
// Define the regular expressions to search for
var regex1 = /\d{3}\.\d{3}-\d{4}/g; // xxx.xxx-xxxx
var regex2 = /\d{2}\.\d{3}-\d{2}/g; // xx.xxx-xx
var regex3 = /\d{2}\.\d{3}-\d{1}/g; // xx.xxx-x
// Get the current PDF document
var doc = app.activeDocs[0];
// Loop through each page in the document
for (var i = 0; i < doc.numPages; i++) {
// Get the page object
var page = doc.getPageNth(i);
// Get the page content stream
var content = page.contents;
// Search for matches using the regular expressions
var matches1 = content.match(regex1);
var matches2 = content.match(regex2);
var matches3 = content.match(regex3);
// Highlight the matches
if (matches1) highlightMatches(matches1, page);
if (matches2) highlightMatches(matches2, page);
if (matches3) highlightMatches(matches3, page);
}
// Function to highlight the matches
function highlightMatches(matches, page) {
// Loop through each match
for (var i = 0; i < matches.length; i++) {
// Get the match position and length
var pos = content.indexOf(matches[i]);
var len = matches[i].length;
// Create a highlight annotation around the match
var annot = page.addAnnot({
type: "Highlight",
page: i,
rect: [pos, 0, pos+len, 0]
});
}
}
I have an update, it’s no longer a syntax error, it seems I am now having an issue defining the document to run the script on? below is the current script and I receive the error ReferenceError: File is not defined
7:Console:Exec
undefined
// Define the regular expressions to search for
var regex1 = /\d{3}\.\d{3}-\d{4}/g; // xxx.xxx-xxxx
var regex2 = /\d{2}\.\d{3}-\d{2}/g; // xx.xxx-xx
var regex3 = /\d{2}\.\d{3}-\d{1}/g; // xx.xxx-x
// Prompt the user to select a PDF file
var filePath = File.openDialog("Select a PDF file to search");
// If a file was selected, open it and search for matches
if (filePath != null) {
// Open the PDF document
var doc = app.openDoc(filePath);
// Loop through each page in the document
for (var i = 0; i < doc.numPages; i++) {
// Get the page object
var page = doc.getPageNth(i);
// Get the page content stream
var content = page.contents;
// Search for matches using the regular expressions
var matches1 = content.match(regex1);
var matches2 = content.match(regex2);
var matches3 = content.match(regex3);
// Highlight the matches
if (matches1) highlightMatches(matches1, page, content);
if (matches2) highlightMatches(matches2, page, content);
if (matches3) highlightMatches(matches3, page, content);
}
// Close the PDF document
doc.closeDoc();
}
// Function to highlight the matches
function highlightMatches(matches, page, content) {
// Loop through each match
for (var i = 0; i < matches.length; i++) {
// Get the match position and length
var pos = content.indexOf(matches[i]);
var len = matches[i].length;
// Create a highlight annotation around the match
var annot = page.addAnnot({
type: "Highlight",
page: i,
rect: [pos, 0, pos+len, 0]
});
}
}
When developing apps that require an external file I first put the file path in a variable and get the file processing part finished and then work on importing the file contents.
I also prefer to drag the file into the app rather than using a file selection dialog.
The environment you’re in does not contain a globally loaded reference File. Either you’re in the wrong environment, or your API reference is out of date.
Im currently trying to execute this in the debugger of adobe acrobat, I assumed it would apply to whichever file was selected/open but I realize that is not the case.