Google Apps Script. Get all links from document - google-apps-script

Hi all) I need to get all links from google document. I found that general approach:
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
Logger.log("text " + text);
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
links.push(curUrl); // add to links
curUrl = {};
}
}
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
links = links.concat(getAllLinks(element.getChild(i)));
}
}
Logger.log(links);
}
It works perfectly fine if i, for example, type url in text, but if add link via menu ("Insert" -> "Link") it doesn't work, function getLinkUrl() returns null. Documentation contains info about Link class, i thought all links represented by it, but don't understand why i can't get link inserted via menu.
I thought maybe i can use some regular expression on text of document element, but if i add link via menu item i can specify custom label for link, which may not contain url in it.
Have anyone faced this scenario? What i missed?

Related

Bug in google doc list item management?

I have developped a script to transform paragraphs into list item, using a specific list item model (StyleT1 in the doc).
I have a very strange behavior:
1/ if you select the first paragraph of the doc and launch the script, it works well and the paragraph become a list item copying the format of StyleT1
2/ if you select the second paragraph and apply the script, then all the list item paragraph become with a number instead of a bullet
I think it is clearly a bug: what do you think?
https://docs.google.com/document/d/16i4nzxj0ptIgjaD6pjVedJqi3wwlw-PtN-W-AGOVuoU/edit?usp=sharing
function manageStyles() {
try {
var doc = DocumentApp.getActiveDocument();
var body = doc.getBody();
var elementTable = [];
var paragraphe, ParagrapheIndex, Text;
var selectionRange = doc.getSelection() ? doc.getSelection().getRangeElements() : null;
if (!selectionRange) {
console.log("No selection!");
return;
}
var ListT1 = body.findText("StyleT1");
ListT1 = ListT1.getElement().getParent();
for (var i=0; i < selectionRange.length; i++){
elementTable[i] = selectionRange[i].getElement();
}
for (var i=0; i < elementTable.length; i++){
if (elementTable[i].getType() == DocumentApp.ElementType.TEXT) {
paragraphe = elementTable[i].getParent();
} else if (elementTable[i].getType() == DocumentApp.ElementType.PARAGRAPH || elementTable[i].getType() == DocumentApp.ElementType.LIST_ITEM) {
paragraphe = elementTable[i];
} else {
console.log("Not applicable the selection!");
continue;
}
Text = paragraphe.getText();
ParagrapheIndex = body.getChildIndex(paragraphe);
body.removeChild(paragraphe);
paragraphe = body.insertListItem(ParagrapheIndex,Text);
paragraphe.setListId(ListT1);
/*paragraphe.setGlyphType(DocumentApp.GlyphType.BULLET);
paragraphe.setNestingLevel(0); */
} // fin du for
} catch (e) {
console.log("Error in manageStyles " + e.toString());
}
}
Thx Rman

Get the first hyperlink and its text value

I hope everyone is in good health health and condition.
Recently, I have been working on Google Docs hyperlinks using app scripts and learning along the way. I was trying to get all hyperlink and edit them and for that I found an amazing code from this post. I have read the code multiple times and now I have a good understanding of how it works.
My confusion
My confusion is the recursive process happening in this code, although I am familiar with the concept of Recursive functions but when I try to modify to code to get only the first hyperlink from the document, I could not understand it how could I achieve that without breaking the recursive function.
Here is the code that I am trying ;
/**
* Get an array of all LinkUrls in the document. The function is
* recursive, and if no element is provided, it will default to
* the active document's Body element.
*
* #param {Element} element The document element to operate on.
* .
* #returns {Array} Array of objects, vis
* {element,
* startOffset,
* endOffsetInclusive,
* url}
*/
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
links.push(curUrl); // add to links
curUrl = {};
}
}
}
if (inUrl) {
// in case the link ends on the same char that the element does
links.push(curUrl);
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
links = links.concat(getAllLinks(element.getChild(i)));
}
}
return links;
}
I tried adding
if (links.length > 0){
return links;
}
but it does not stop the function as it is recursive and it return back to its previous calls and continue running.
Here is the test document along with its script that I am working on.
https://docs.google.com/document/d/1eRvnR2NCdsO94C5nqly4nRXCttNziGhwgR99jElcJ_I/edit?usp=sharing
I hope you will understand what I am trying to convey, Thanks for giving a look at my post. Stay happy :D
I believe your goal as follows.
You want to retrieve the 1st link and the text of link from the shared Document using Google Apps Script.
You want to stop the recursive loop when the 1st element is retrieved.
Modification points:
I tried adding
if (links.length > 0){
return links;
}
but it does not stop the function as it is recursive and it return back to its previous calls and continue running.
About this, unfortunately, I couldn't understand where you put the script in your script. In this case, I think that it is required to stop the loop when links has the value. And also, it is required to also retrieve the text. So, how about modifying as follows? I modified 3 parts in your script.
Modified script:
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
if (links.length > 0) break; // <--- Added
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
curUrl.text = text.slice(curUrl.startOffset, curUrl.endOffsetInclusive + 1); // <--- Added
links.push(curUrl); // add to links
curUrl = {};
}
}
}
if (inUrl) {
// in case the link ends on the same char that the element does
links.push(curUrl);
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
if (links.length > 0) { // <--- Added or if (links.length > 0) break;
return links;
}
links = links.concat(getAllLinks(element.getChild(i)));
}
}
return links;
}
In this case, I think that if (links.length > 0) {return links;} can be modified to if (links.length > 0) break;.
Note:
By the way, when Google Docs API is used, both the links and the text can be also retrieved by a simple script as follows. When you use this, please enable Google Docs API at Advanced Google services.
function myFunction() {
const doc = DocumentApp.getActiveDocument();
const res = Docs.Documents.get(doc.getId()).body.content.reduce((ar, {paragraph}) => {
if (paragraph && paragraph.elements) {
paragraph.elements.forEach(({textRun}) => {
if (textRun && textRun.textStyle && textRun.textStyle.link) {
ar.push({text: textRun.content, url: textRun.textStyle.link.url});
}
});
}
return ar;
}, []);
console.log(res) // You can retrieve 1st link and test by console.log(res[0]).
}

Google Apps Script; Docs; convert selected element to HTML

I am just starting with Google Apps Script and following the Add-on quickstart
https://developers.google.com/apps-script/quickstart/docs
In the quickstart you can create a simple add-on to get a selection from a document and translate it with the LanguageApp service. The example gets the underlying text using this:
function getSelectedText() {
var selection = DocumentApp.getActiveDocument().getSelection();
if (selection) {
var text = [];
var elements = selection.getSelectedElements();
for (var i = 0; i < elements.length; i++) {
if (elements[i].isPartial()) {
var element = elements[i].getElement().asText();
var startIndex = elements[i].getStartOffset();
var endIndex = elements[i].getEndOffsetInclusive();
text.push(element.getText().substring(startIndex, endIndex + 1));
} else {
var element = elements[i].getElement();
// Only translate elements that can be edited as text; skip images and
// other non-text elements.
if (element.editAsText) {
var elementText = element.asText().getText();
// This check is necessary to exclude images, which return a blank
// text element.
if (elementText != '') {
text.push(elementText);
}
}
}
}
if (text.length == 0) {
throw 'Please select some text.';
}
return text;
} else {
throw 'Please select some text.';
}
}
It gets the text only: element.getText(), without any formatting.
I know the underlying object is not html, but is there a way to get the selection converted into a HTML string? For example, if the selection has a mix of formatting, like bold:
this is a sample with bold text
Then is there any method, extension, library, etc, -- like element.getHTML() -- that could return this?
this is a sample with <b>bold</b> text
instead of this?
this is a sample with bold text
There is a script GoogleDoc2HTML by Omar AL Zabir. Its purpose is to convert the entire document into HTML. Since you only want to convert rich text within the selected element, the function relevant to your task is processText from the script, shown below.
The method getTextAttributeIndices gives the starting offsets for each change of text attribute, like from normal to bold or back. If there is only one change, that's the attribute for the entire element (typically paragraph), and this is dealt with in the first part of if-statement.
The second part deals with the general case, looping over the indices and inserting HTML markup corresponding to the attributes.
The script isn't maintained, so consider it as a starting point for your own code, rather than a ready-to-use library. There are some unmerged PRs that improve the conversion process, in particular for inline links.
function processText(item, output) {
var text = item.getText();
var indices = item.getTextAttributeIndices();
if (indices.length <= 1) {
// Assuming that a whole para fully italic is a quote
if(item.isBold()) {
output.push('<b>' + text + '</b>');
}
else if(item.isItalic()) {
output.push('<blockquote>' + text + '</blockquote>');
}
else if (text.trim().indexOf('http://') == 0) {
output.push('' + text + '');
}
else {
output.push(text);
}
}
else {
for (var i=0; i < indices.length; i ++) {
var partAtts = item.getAttributes(indices[i]);
var startPos = indices[i];
var endPos = i+1 < indices.length ? indices[i+1]: text.length;
var partText = text.substring(startPos, endPos);
Logger.log(partText);
if (partAtts.ITALIC) {
output.push('<i>');
}
if (partAtts.BOLD) {
output.push('<b>');
}
if (partAtts.UNDERLINE) {
output.push('<u>');
}
// If someone has written [xxx] and made this whole text some special font, like superscript
// then treat it as a reference and make it superscript.
// Unfortunately in Google Docs, there's no way to detect superscript
if (partText.indexOf('[')==0 && partText[partText.length-1] == ']') {
output.push('<sup>' + partText + '</sup>');
}
else if (partText.trim().indexOf('http://') == 0) {
output.push('' + partText + '');
}
else {
output.push(partText);
}
if (partAtts.ITALIC) {
output.push('</i>');
}
if (partAtts.BOLD) {
output.push('</b>');
}
if (partAtts.UNDERLINE) {
output.push('</u>');
}
}
}
}
Ended up making a script to support my use-case of bold+links+italics:
function getHtmlOfElement(element) {
var text = element.editAsText();
var string = text.getText();
var indices = text.getTextAttributeIndices();
var output = [];
for (var i = 0; i < indices.length; i++) {
var offset = indices[i];
var startPos = offset;
var endPos = i+1 < indices.length ? indices[i+1]: string.length;
var partText = string.substring(startPos, endPos);
var isBold = text.isBold(offset);
var isItalic = text.isItalic(offset);
var linkUrl = text.getLinkUrl(offset);
if (isBold) {
output.push('<b>');
}
if (isItalic) {
output.push('<i>');
}
if (linkUrl) {
output.push('<a href="' + linkUrl + '">');
}
output.push(partText);
if (isBold) {
output.push('</b>');
}
if (isItalic) {
output.push('</i>');
}
if (linkUrl) {
output.push('</a>');
}
}
return output.join("");
}
You can simply call it using something like:
getHtmlOfElement(myTableCell); // returns something like "<b>Bold</b> test."
This is obviously a workaround, but you can copy/paste a Google Doc into a draft in Gmail and then that draft can be turned into HTML using
GmailApp.getDraft(draftId).getMessage().getBody().toString();
I found this thread trying to skip that step by going straight from a Doc to HTML, but I thought I'd share.

GAS is it possible to replace getActiveDocument().getSelection() at once?

My User has the following selection in his Gdoc.
Now from the sidebar he wants to to replace the selection he made on the document. The GAS question is if it is possible to do that at once, something like:
var selection = DocumentApp.getActiveDocument().getSelection()
selection.replace("newtext")
Or do I have to loop through selection.getRangeElements() in order to delete them (or replace them) and than in someway place the new text in that position?
Not, that's not possible (well, if it is, it's not documented).
You have to loop through the selected elements, mainly because the selection may take part of paragraphs, forcing you to manage that. i.e. deleting just the selected part. And for completed selected elements, you can just remove them entirely (like images).
Here's an implementation on how to do this (part of the Kaylan's Translate script modified by me to properly replace images and partially selected paragraphs.
function replaceSelection(newText) {
var selection = DocumentApp.getActiveDocument().getSelection();
if (selection) {
var elements = selection.getRangeElements();
var replace = true;
for (var i = 0; i < elements.length; i++) {
if (elements[i].isPartial()) {
var element = elements[i].getElement().asText();
var startIndex = elements[i].getStartOffset();
var endIndex = elements[i].getEndOffsetInclusive();
var text = element.getText().substring(startIndex, endIndex + 1);
element.deleteText(startIndex, endIndex);
if( replace ) {
element.insertText(startIndex, newText);
replace = false;
}
} else {
var element = elements[i].getElement();
if( replace && element.editAsText ) {
element.clear().asText().setText(newText);
replace = false;
} else {
if( replace && i === elements.length -1 ) {
var parent = element.getParent();
parent[parent.insertText ? 'insertText' : 'insertParagraph'](parent.getChildIndex(element), newText);
replace = false; //not really necessary since it's the last one
}
element.removeFromParent();
}
}
}
} else
throw "Hey, select something so I can replace!";
}

How to insert a paragraph object in listItem preserving the formating of each word of the paragraph?

Following the documentation sample, I'm trying to create a function that search for a numerated list in a google document and, if it finds it, adds a new item to the list. My code works well (thanks to #Serge insas for previous help) with strings, but not with paragraphs objects. I know I could get the paragraph text and add it to listItem, but then I lose the formating. Is there a way to insert a paragraph preserving all it's formating? (I know I could use var newElement = child.getParent().insertListItem(childIndex, elementContent.getText()) do insert text without words formating)
Here the code:
function test() {
var targetDocId = "1A02VhxOWLUIdl8LTV1tt2S1yASDbOq77VbsUpxPa6vk";
var targetDoc = DocumentApp.openById(targetDocId);
var body = targetDoc.getBody();
var elementContent = targetDoc.getChild(2); // a paragraph with its formating
var childIndex = 0;
for (var p= 0; p< targetDoc.getNumChildren(); p++) {
var child = targetDoc.getChild(p);
if (child.getType() == DocumentApp.ElementType.LIST_ITEM){
while(child.getType() == DocumentApp.ElementType.LIST_ITEM){
child = targetDoc.getChild(p)
Logger.log("child = " + child.getText())
childIndex = body.getChildIndex(child);
Logger.log(childIndex)
p++
}
child = targetDoc.getChild(p-2);
var listId = child.getListId();
if (child.getText() == '') {
childIndex = childIndex -1;
}
Logger.log(childIndex)
var newElement = child.getParent().insertListItem(childIndex, elementContent);
newElement.setListId(child);
var lastEmptyItem = targetDoc.getChild(childIndex +1).removeFromParent();
break;
}
Here a screen shot of my targetDoc (note the second item, Paragraph):
I know this question is old, but I've come up with a solution and will leave here for anyone that may need it. It is not complete, as I have yet to find a way to copy any Inline Drawing and Equation to a new element...
Anyways, here is my code, it will work well if the paragraph you want to convert to a list item only has text and Inline Images.
function parToList() {
var doc = DocumentApp.getActiveDocument();
var body = doc.getBody();
//gets the paragraph at index 1 on body -> can be changed to what you want
var par = body.getChild(1);
var childs = [];
for (var i = 0; i<par.getNumChildren(); i++) {
var child = par.getChild(0);
childs.push(child);
child.removeFromParent();
};
par.removeFromParent();
//puts the list item on index 1 of body -> can be changed to the wanted position
var li = body.insertListItem(1, "");
childs.reverse();
for (var j in childs) {
var liChild = childs[j];
var childType = liChild.getType();
if (childType == DocumentApp.ElementType.EQUATION) {
//still need to find a way to append an equation
} else if (childType == DocumentApp.ElementType.INLINE_DRAWING) {
//still need to find a way to append an inlineDrawing
} else if (childType == DocumentApp.ElementType.INLINE_IMAGE) {
li.appendInlineImage(liChild);
} else if (childType == DocumentApp.ElementType.TEXT) {
li.appendText(liChild);
};
};
};
Cheers