How do I find and select a next bold word - google-apps-script

From the https://gist.github.com/oshliaer/d468759b3587cfb424348fa722765187 , It is possible to select a particular word from the findText, I want to implement the same for bold words only
I have a function to find bold. How do I modify the above gist?
var startFlag = x;
var flag = false;
for (var i = x; i < y; i++) {
if (text.isBold(i) && !flag) {
startFlag = i;
flag = true;
} else if (!text.isBold(i) && flag) {
flag = false;
rangeBuilder.addElement(text, startFlag, i - 1);
doc.setSelection(rangeBuilder.build());
return;
}
}
if (flag) {
rangeBuilder.addElement(text, startFlag, i - 1);
doc.setSelection(rangeBuilder.build());
return;
}

Let's assume another algorithm
/*
* #param {(DocumentApp.ElementType.LIST_ITEM | DocumentApp.ElementType.PARAGRAPH)} element
*/
function hasBold(element, start) {
var text = element.editAsText();
var length = element.asText().getText().length;
var first = -1;
var end = -1;
while (start < length) {
if (first < 0 && text.isBold(start)) {
first = start;
}
if (first > -1 && !text.isBold(start)) {
end = start - 1;
return {
s: first,
e: end
}
}
start++;
}
if (first > -1) {
return {
s: first,
e: length - 1
}
}
return false;
}
It's not clean but I've tested it and it works fine.
hasBold lets us finding bolds in the current element.
Finally, we have to loop this feature within document.getBody().
You could to get the full code here find next bold text in google document.
Also you could try it on a copy
A new idea
The Direct searcing
The best way is to use a callback while it is checked
var assay = function (re) {
var text = re.getElement()
.asText();
for (var offset = re.getStartOffset(); offset <= re.getEndOffsetInclusive(); offset++) {
if (!text.isBold(offset)) return false;
}
return true;
}
function findNextBold() {
var sp = 'E.';
Docer.setDocument(DocumentApp.getActiveDocument());
var rangeElement = Docer.findText(sp, Docer.getActiveRangeElement(), assay);
rangeElement ? Docer.selectRangeElement(rangeElement) : Docer.setCursorBegin();
}
The Approx searching
var assay = function(re) {
var text = re.getElement().asText();
var startOffset = re.getStartOffset();
var endOffset = re.getEndOffsetInclusive() + 1;
for (var offset = startOffset; offset < endOffset; offset++) {
if (!text.isBold(offset)) return false;
}
return this.test(text.getText().slice(startOffset, endOffset));
}
function findNextBold() {
var searchPattern = '[^ ]+#[^ ]+';
var testPattern = new RegExp('^[A-Z0-9._%+-]+#[A-Z0-9.-]+\.[A-Z]{2,4}$');
Docer.setDocument(DocumentApp.getActiveDocument());
var rangeElement = Docer.findText(searchPattern, Docer.getActiveRangeElement(), assay.bind(testPattern));
rangeElement ? Docer.selectRangeElement(rangeElement) : Docer.setCursorBegin();
}
Docer

Yes. it is possible to find bold text. You need to use findText(searchPattern) to search the contents of the element for the specific text pattern using regular expressions. The provided regular expression pattern is independently matched against each text block contained in the current element. Then, use isBold() to retrieve the bold setting. It is a Boolean which returns whether the text is bold or null.

Related

Google Apps Script: insert and update texts by ID/name in Google Docs

I want to be able to insert, in Google Docs using Google Apps Script, custom texts with a given ID, so that afterwards I'd be able to update them (any number of times). The insertion should work with cursor placement as well as with replacing any selected elements.
I have a code that works pretty well for this (based partly on this answer), see below. I use "named ranges" for IDing the inserted/updated texts. The only problem is, when I have several such inserted texts immediately next to each other, and I update both repeatedly, suddenly the preceding one "absorbs" the following one (i.e., deletes it). So clearly it is a problem of the named ranges somehow expanding into each other, but I cannot figure out why.
// function for inserting text
insertAny = (textToInsert, textName = null, range = null) => {
var doc = DocumentApp.getActiveDocument();
var cursor = doc.getCursor();
var rangeBuilder = null;
if (cursor && (range === null)) {
// Attempt to insert text at the cursor position. If the insertion returns null, the cursor's
// containing element doesn't allow insertions, so show the user an error message.
var cElement = cursor.insertText(textToInsert);
if (!cElement) {
textName = null
DocumentApp.getUi().alert('Cannot insert text here.');
} else {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(cElement);
}
} else {
var selection;
if (range === null) {
selection = DocumentApp.getActiveDocument().getSelection();
} else {
selection = range;
}
if (!selection) {
textName = null
DocumentApp.getUi().alert('Insertion omitted: A cursor placed in the text or a selected text is needed to indicate the position of the insertion.');
} else {
var elements = selection.getRangeElements();
var replace = true;
for (var i = 0; i < elements.length; i++) {
if (elements[i].isPartial()) {
var tElement = elements[i].getElement().asText();
var startIndex = elements[i].getStartOffset();
var endIndex = elements[i].getEndOffsetInclusive();
var text = tElement.getText().substring(startIndex, endIndex + 1);
tElement.deleteText(startIndex, endIndex);
if (replace) {
tElement.insertText(startIndex, textToInsert);
if (rangeBuilder === null) {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(tElement, startIndex, startIndex + textToInsert.length - 1);
}
replace = false;
}
} else {
var eElement = elements[i].getElement();
// if not specified as "any", throws type errors for some reason
if (replace && eElement.editAsText) {
eElement.clear().asText().setText(textToInsert);
replace = false;
if (rangeBuilder === null) {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(eElement);
}
} else {
if (replace && i === elements.length - 1) {
var parent = eElement.getParent();
parent[parent.insertText ? 'insertText' : 'insertParagraph'](parent.getChildIndex(eElement), textToInsert);
if (rangeBuilder === null) {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(eElement);
}
replace = false; //not really necessary since it's the last one
}
eElement.removeFromParent();
}
}
}
}
}
if (textName !== null && rangeBuilder !== null) {
doc.addNamedRange(textName, rangeBuilder.build());
}
}
// function for updating text
const updateNamedRange = (textName, newText) => {
var doc = DocumentApp.getActiveDocument();
var myNamedRanges = doc.getNamedRanges(textName);
for (var i = 0; i < myNamedRanges.length; i++) {
var range = myNamedRanges[i].getRange();
insertAny(newText, textName, range);
}
}
Any ideas (or better solutions)?
Okay, so it seems that the reason is that if I insert text immediately next to a named range, it will automatically belong to that range. (Hence subsequent updates affected these unrelated parts too.)
My really hacky solution is to temporarily insert a placeholder character to separate the new text from any potential named ranges... It makes me laugh, but nothing else I tried works as well. This seems to be robust to all the tricky scenarios I can think of. My final code is below.
const insertAny = (textToInsert, textName = null, range = null) => {
var doc = DocumentApp.getActiveDocument();
var cursor = doc.getCursor();
var rangeBuilder = null;
if (cursor && (range === null)) {
// Attempt to insert text at the cursor position. If the insertion returns null, the cursor's
// containing element doesn't allow insertions, so show the user an error message.
var cElement = cursor.insertText(textToInsert);
if (!cElement) {
textName = null
DocumentApp.getUi().alert('Cannot insert text here.');
} else {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(cElement);
}
} else {
var selection;
if (range === null) {
selection = DocumentApp.getActiveDocument().getSelection();
} else {
selection = range;
}
if (!selection) {
textName = null
DocumentApp.getUi().alert('Insertion omitted: A cursor placed in the text or a selected text is needed to indicate the position of the insertion.');
} else {
var elements = selection.getRangeElements();
if (range !== null) {
elements.length = 1;
}
var replace = true;
for (var i = 0; i < elements.length; i++) {
if (elements[i].isPartial()) {
var tElement = elements[i].getElement().asText();
var startIndex = elements[i].getStartOffset();
var endIndex = elements[i].getEndOffsetInclusive();
var text = tElement.getText().substring(startIndex, endIndex + 1);
if (replace) {
tElement.insertText(endIndex + 1, 'x');
tElement.deleteText(startIndex, endIndex);
tElement.insertText(startIndex + 1, textToInsert);
if (rangeBuilder === null) {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(tElement, startIndex + 1, startIndex + 1 + textToInsert.length - 1);
}
replace = false;
tElement.deleteText(startIndex, startIndex);
} else {
tElement.deleteText(startIndex, endIndex);
}
} else {
var eElement = elements[i].getElement();
// if not specified as "any", throws type errors for some reason
if (replace && eElement.editAsText) {
eElement.clear().asText().setText(textToInsert);
replace = false;
if (rangeBuilder === null) {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(eElement);
}
} else {
if (replace && i === elements.length - 1) {
var parent = eElement.getParent();
parent[parent.insertText ? 'insertText' : 'insertParagraph'](parent.getChildIndex(eElement), textToInsert);
if (rangeBuilder === null) {
rangeBuilder = doc.newRange();
rangeBuilder.addElement(eElement);
}
replace = false; //not really necessary since it's the last one
}
eElement.removeFromParent();
}
}
}
}
}
if (textName !== null && rangeBuilder !== null) {
doc.addNamedRange(textName, rangeBuilder.build());
}
}
const updateNamedRange = (textName, newText) => {
var doc = DocumentApp.getActiveDocument();
var myNamedRanges = doc.getNamedRanges(textName);
for (var i = 0; i < myNamedRanges.length; i++) {
var range = myNamedRanges[i].getRange();
myNamedRanges[i].remove();
insertAny(newText, textName, range);
}
}

Get the first hyperlink and its text value

I hope everyone is in good health health and condition.
Recently, I have been working on Google Docs hyperlinks using app scripts and learning along the way. I was trying to get all hyperlink and edit them and for that I found an amazing code from this post. I have read the code multiple times and now I have a good understanding of how it works.
My confusion
My confusion is the recursive process happening in this code, although I am familiar with the concept of Recursive functions but when I try to modify to code to get only the first hyperlink from the document, I could not understand it how could I achieve that without breaking the recursive function.
Here is the code that I am trying ;
/**
* Get an array of all LinkUrls in the document. The function is
* recursive, and if no element is provided, it will default to
* the active document's Body element.
*
* #param {Element} element The document element to operate on.
* .
* #returns {Array} Array of objects, vis
* {element,
* startOffset,
* endOffsetInclusive,
* url}
*/
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
links.push(curUrl); // add to links
curUrl = {};
}
}
}
if (inUrl) {
// in case the link ends on the same char that the element does
links.push(curUrl);
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
links = links.concat(getAllLinks(element.getChild(i)));
}
}
return links;
}
I tried adding
if (links.length > 0){
return links;
}
but it does not stop the function as it is recursive and it return back to its previous calls and continue running.
Here is the test document along with its script that I am working on.
https://docs.google.com/document/d/1eRvnR2NCdsO94C5nqly4nRXCttNziGhwgR99jElcJ_I/edit?usp=sharing
I hope you will understand what I am trying to convey, Thanks for giving a look at my post. Stay happy :D
I believe your goal as follows.
You want to retrieve the 1st link and the text of link from the shared Document using Google Apps Script.
You want to stop the recursive loop when the 1st element is retrieved.
Modification points:
I tried adding
if (links.length > 0){
return links;
}
but it does not stop the function as it is recursive and it return back to its previous calls and continue running.
About this, unfortunately, I couldn't understand where you put the script in your script. In this case, I think that it is required to stop the loop when links has the value. And also, it is required to also retrieve the text. So, how about modifying as follows? I modified 3 parts in your script.
Modified script:
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
if (links.length > 0) break; // <--- Added
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
curUrl.text = text.slice(curUrl.startOffset, curUrl.endOffsetInclusive + 1); // <--- Added
links.push(curUrl); // add to links
curUrl = {};
}
}
}
if (inUrl) {
// in case the link ends on the same char that the element does
links.push(curUrl);
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
if (links.length > 0) { // <--- Added or if (links.length > 0) break;
return links;
}
links = links.concat(getAllLinks(element.getChild(i)));
}
}
return links;
}
In this case, I think that if (links.length > 0) {return links;} can be modified to if (links.length > 0) break;.
Note:
By the way, when Google Docs API is used, both the links and the text can be also retrieved by a simple script as follows. When you use this, please enable Google Docs API at Advanced Google services.
function myFunction() {
const doc = DocumentApp.getActiveDocument();
const res = Docs.Documents.get(doc.getId()).body.content.reduce((ar, {paragraph}) => {
if (paragraph && paragraph.elements) {
paragraph.elements.forEach(({textRun}) => {
if (textRun && textRun.textStyle && textRun.textStyle.link) {
ar.push({text: textRun.content, url: textRun.textStyle.link.url});
}
});
}
return ar;
}, []);
console.log(res) // You can retrieve 1st link and test by console.log(res[0]).
}

Google Apps Script; Docs; convert selected element to HTML

I am just starting with Google Apps Script and following the Add-on quickstart
https://developers.google.com/apps-script/quickstart/docs
In the quickstart you can create a simple add-on to get a selection from a document and translate it with the LanguageApp service. The example gets the underlying text using this:
function getSelectedText() {
var selection = DocumentApp.getActiveDocument().getSelection();
if (selection) {
var text = [];
var elements = selection.getSelectedElements();
for (var i = 0; i < elements.length; i++) {
if (elements[i].isPartial()) {
var element = elements[i].getElement().asText();
var startIndex = elements[i].getStartOffset();
var endIndex = elements[i].getEndOffsetInclusive();
text.push(element.getText().substring(startIndex, endIndex + 1));
} else {
var element = elements[i].getElement();
// Only translate elements that can be edited as text; skip images and
// other non-text elements.
if (element.editAsText) {
var elementText = element.asText().getText();
// This check is necessary to exclude images, which return a blank
// text element.
if (elementText != '') {
text.push(elementText);
}
}
}
}
if (text.length == 0) {
throw 'Please select some text.';
}
return text;
} else {
throw 'Please select some text.';
}
}
It gets the text only: element.getText(), without any formatting.
I know the underlying object is not html, but is there a way to get the selection converted into a HTML string? For example, if the selection has a mix of formatting, like bold:
this is a sample with bold text
Then is there any method, extension, library, etc, -- like element.getHTML() -- that could return this?
this is a sample with <b>bold</b> text
instead of this?
this is a sample with bold text
There is a script GoogleDoc2HTML by Omar AL Zabir. Its purpose is to convert the entire document into HTML. Since you only want to convert rich text within the selected element, the function relevant to your task is processText from the script, shown below.
The method getTextAttributeIndices gives the starting offsets for each change of text attribute, like from normal to bold or back. If there is only one change, that's the attribute for the entire element (typically paragraph), and this is dealt with in the first part of if-statement.
The second part deals with the general case, looping over the indices and inserting HTML markup corresponding to the attributes.
The script isn't maintained, so consider it as a starting point for your own code, rather than a ready-to-use library. There are some unmerged PRs that improve the conversion process, in particular for inline links.
function processText(item, output) {
var text = item.getText();
var indices = item.getTextAttributeIndices();
if (indices.length <= 1) {
// Assuming that a whole para fully italic is a quote
if(item.isBold()) {
output.push('<b>' + text + '</b>');
}
else if(item.isItalic()) {
output.push('<blockquote>' + text + '</blockquote>');
}
else if (text.trim().indexOf('http://') == 0) {
output.push('' + text + '');
}
else {
output.push(text);
}
}
else {
for (var i=0; i < indices.length; i ++) {
var partAtts = item.getAttributes(indices[i]);
var startPos = indices[i];
var endPos = i+1 < indices.length ? indices[i+1]: text.length;
var partText = text.substring(startPos, endPos);
Logger.log(partText);
if (partAtts.ITALIC) {
output.push('<i>');
}
if (partAtts.BOLD) {
output.push('<b>');
}
if (partAtts.UNDERLINE) {
output.push('<u>');
}
// If someone has written [xxx] and made this whole text some special font, like superscript
// then treat it as a reference and make it superscript.
// Unfortunately in Google Docs, there's no way to detect superscript
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
output.push('<sup>' + partText + '</sup>');
}
else if (partText.trim().indexOf('http://') == 0) {
output.push('' + partText + '');
}
else {
output.push(partText);
}
if (partAtts.ITALIC) {
output.push('</i>');
}
if (partAtts.BOLD) {
output.push('</b>');
}
if (partAtts.UNDERLINE) {
output.push('</u>');
}
}
}
}
Ended up making a script to support my use-case of bold+links+italics:
function getHtmlOfElement(element) {
var text = element.editAsText();
var string = text.getText();
var indices = text.getTextAttributeIndices();
var output = [];
for (var i = 0; i < indices.length; i++) {
var offset = indices[i];
var startPos = offset;
var endPos = i+1 < indices.length ? indices[i+1]: string.length;
var partText = string.substring(startPos, endPos);
var isBold = text.isBold(offset);
var isItalic = text.isItalic(offset);
var linkUrl = text.getLinkUrl(offset);
if (isBold) {
output.push('<b>');
}
if (isItalic) {
output.push('<i>');
}
if (linkUrl) {
output.push('<a href="' + linkUrl + '">');
}
output.push(partText);
if (isBold) {
output.push('</b>');
}
if (isItalic) {
output.push('</i>');
}
if (linkUrl) {
output.push('</a>');
}
}
return output.join("");
}
You can simply call it using something like:
getHtmlOfElement(myTableCell); // returns something like "<b>Bold</b> test."
This is obviously a workaround, but you can copy/paste a Google Doc into a draft in Gmail and then that draft can be turned into HTML using
GmailApp.getDraft(draftId).getMessage().getBody().toString();
I found this thread trying to skip that step by going straight from a Doc to HTML, but I thought I'd share.

GAS is it possible to replace getActiveDocument().getSelection() at once?

My User has the following selection in his Gdoc.
Now from the sidebar he wants to to replace the selection he made on the document. The GAS question is if it is possible to do that at once, something like:
var selection = DocumentApp.getActiveDocument().getSelection()
selection.replace("newtext")
Or do I have to loop through selection.getRangeElements() in order to delete them (or replace them) and than in someway place the new text in that position?
Not, that's not possible (well, if it is, it's not documented).
You have to loop through the selected elements, mainly because the selection may take part of paragraphs, forcing you to manage that. i.e. deleting just the selected part. And for completed selected elements, you can just remove them entirely (like images).
Here's an implementation on how to do this (part of the Kaylan's Translate script modified by me to properly replace images and partially selected paragraphs.
function replaceSelection(newText) {
var selection = DocumentApp.getActiveDocument().getSelection();
if (selection) {
var elements = selection.getRangeElements();
var replace = true;
for (var i = 0; i < elements.length; i++) {
if (elements[i].isPartial()) {
var element = elements[i].getElement().asText();
var startIndex = elements[i].getStartOffset();
var endIndex = elements[i].getEndOffsetInclusive();
var text = element.getText().substring(startIndex, endIndex + 1);
element.deleteText(startIndex, endIndex);
if( replace ) {
element.insertText(startIndex, newText);
replace = false;
}
} else {
var element = elements[i].getElement();
if( replace && element.editAsText ) {
element.clear().asText().setText(newText);
replace = false;
} else {
if( replace && i === elements.length -1 ) {
var parent = element.getParent();
parent[parent.insertText ? 'insertText' : 'insertParagraph'](parent.getChildIndex(element), newText);
replace = false; //not really necessary since it's the last one
}
element.removeFromParent();
}
}
}
} else
throw "Hey, select something so I can replace!";
}

A* algorithm works OK, but not perfectly. What's wrong?

This is my grid of nodes:
I'm moving an object around on it using the A* pathfinding algorithm. It generally works OK, but it sometimes acts wrongly:
When moving from 3 to 1, it correctly goes via 2. When going from 1 to 3 however, it goes via 4.
When moving between 3 and 5, it goes via 4 in either direction instead of the shorter way via 6
What can be wrong? Here's my code (AS3):
public static function getPath(from:Point, to:Point, grid:NodeGrid):PointLine {
// get target node
var target:NodeGridNode = grid.getClosestNodeObj(to.x, to.y);
var backtrace:Map = new Map();
var openList:LinkedSet = new LinkedSet();
var closedList:LinkedSet = new LinkedSet();
// begin with first node
openList.add(grid.getClosestNodeObj(from.x, from.y));
// start A*
var curNode:NodeGridNode;
while (openList.size != 0) {
// pick a new current node
if (openList.size == 1) {
curNode = NodeGridNode(openList.first);
}
else {
// find cheapest node in open list
var minScore:Number = Number.MAX_VALUE;
var minNext:NodeGridNode;
openList.iterate(function(next:NodeGridNode, i:int):int {
var score:Number = curNode.distanceTo(next) + next.distanceTo(target);
if (score < minScore) {
minScore = score;
minNext = next;
return LinkedSet.BREAK;
}
return 0;
});
curNode = minNext;
}
// have not reached
if (curNode == target) break;
else {
// move to closed
openList.remove(curNode);
closedList.add(curNode);
// put connected nodes on open list
for each (var adjNode:NodeGridNode in curNode.connects) {
if (!openList.contains(adjNode) && !closedList.contains(adjNode)) {
openList.add(adjNode);
backtrace.put(adjNode, curNode);
}
}
}
}
// make path
var pathPoints:Vector.<Point> = new Vector.<Point>();
pathPoints.push(to);
while(curNode != null) {
pathPoints.unshift(curNode.location);
curNode = backtrace.read(curNode);
}
pathPoints.unshift(from);
return new PointLine(pathPoints);
}
NodeGridNode::distanceTo()
public function distanceTo(o:NodeGridNode):Number {
var dx:Number = location.x - o.location.x;
var dy:Number = location.y - o.location.y;
return Math.sqrt(dx*dx + dy*dy);
}
The problem I see here is the line
if (!openList.contains(adjNode) && !closedList.contains(adjNode))
It may be the case that an adjNode may be easier(shorter) to reach through the current node although it was reached from another node previously which means it is in the openList.
Found the bug:
openList.iterate(function(next:NodeGridNode, i:int):int {
var score:Number = curNode.distanceTo(next) + next.distanceTo(target);
if (score < minScore) {
minScore = score;
minNext = next;
return LinkedSet.BREAK;
}
return 0;
});
The return LinkedSet.BREAK (which acts like a break statement in a regular loop) should not be there. It causes the first node in the open list to be selected always, instead of the cheapest one.