While converting doc to html graphics or shapes are not converting into html format - html

We want to display doc file into dialog box on browser. That is why I convert it into html file. So doc file converted into html successfully but if doc file has graphics or any shapes then it converts into html file. But graphics ware not converting into any html tags like img or etc and not shown in file displayed on UI,
So how we convert doc file which has graphics or shape into html.
InputStream input = new FileInputStream (baseDir + fileName);
HWPFDocument wordDocument = new HWPFDocument (input);
wordToHtmlConverter.processDocument (wordDocument);
wordToHtmlConverter.setPicturesManager (picmang=new PicturesManager() {
public String savePicture (byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
});
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource (htmlDocument);
StreamResult streamResult = new StreamResult (outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty (OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty (OutputKeys.INDENT, "yes");
serializer.setOutputProperty (OutputKeys.METHOD, "html");
serializer.transform (domSource, streamResult);
outStream.close();
String content = new String (outStream.toByteArray() );
FileOutputStream fos = null;
String destinationHTMLFile = baseDir + fileName.replace(".docx", "").replace(".doc", "")+".html";
BufferedWriter bw = null;
File file = new File(destinationHTMLFile);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"));
bw.write(content);
So please help me out to display doc file in browser.

The AbstractWordConverter.setPicturesManager must be done before AbstractWordConverter.processDocument. And of course the method PicturesManager.savePicture in Interface PicturesManager needs to be filled with functionality for saving the pictures in the class which implements this interface.
Following example takes a WordDocument.doc from my home directory and transforms this to HTML including pictures and puts the resulting files (HTML file and image files) in a new created directory html. Note, the pictures included in the WordDocument.doc must be either *.gif or *.png or *.jpg since the used approach for Writing/Saving an Image only supports those types.
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.util.XMLHelper;
import org.w3c.dom.Document;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.StringWriter;
import java.io.FileInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
public class TestWordToHtmlConverter {
private static void convertDocToHTML(String docFilePathAndName, String htmlPath, String htmlFileName) throws Exception {
new File(htmlPath).mkdir();
HWPFDocument hwpfDocument = new HWPFDocument(new FileInputStream(docFilePathAndName));
Document newDocument = XMLHelper.getDocumentBuilderFactory().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(newDocument);
wordToHtmlConverter.setPicturesManager(
new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
/*
System.out.println(content);
System.out.println(pictureType);
System.out.println(suggestedName);
System.out.println(widthInches);
System.out.println(heightInches);
*/
try {
BufferedImage image = ImageIO.read(new ByteArrayInputStream(content));
ImageIO.write(image, pictureType.getExtension(), new File(htmlPath, suggestedName));
} catch (Exception e) {
e.printStackTrace();
}
return suggestedName;
}
}
);
wordToHtmlConverter.processDocument(hwpfDocument);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.setOutputProperty(OutputKeys.METHOD, "html");
transformer.transform(new DOMSource(wordToHtmlConverter.getDocument()),
new StreamResult(new File(htmlPath, htmlFileName)));
}
public static void main(String[] args) throws Exception {
convertDocToHTML("/home/axel/Dokumente/WordDocument.doc", "/home/axel/Dokumente/html", "WordDocument.html");
}
}

Related

iText7: Creating PDF from TIFF multipage image using iText

I am trying to use iText 7.1.1 to convert a TIFF image to PDF file with multiple pages. Thanks for those to get me started with this article Create PDF from TIFF image using iText. However, it is iText 5.5.x and I have trouble to duplicate it in iText 7.
I did find TiffImageData.getNumberOfPages(raf) to replace int pages = TiffImage.getNumberOfPages(rafa).
However, I am not able to replace TiffImage.getTiffImage(rafa, i) in iText7. Do I need to use new Image(ImageDataFactory.createTiff(...)). Appreciate any suggestion(s).
iText 5.5.x code
import java.io.FileOutputStream;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import com.itextpdf.text.Document;
import com.itextpdf.text.Image;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.io.FileChannelRandomAccessSource;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import com.itextpdf.text.pdf.codec.TiffImage;
public class Test1 {
public static void main(String[] args) throws Exception {
RandomAccessFile aFile = new RandomAccessFile("/myfolder/origin.tif", "r");
FileChannel inChannel = aFile.getChannel();
FileChannelRandomAccessSource fcra = new FileChannelRandomAccessSource(inChannel);
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream("/myfolder/destination.pdf"));
document.open();
RandomAccessFileOrArray rafa = new RandomAccessFileOrArray(fcra);
int pages = TiffImage.getNumberOfPages(rafa);
Image image;
for (int i = 1; i <= pages; i++) {
image = TiffImage.getTiffImage(rafa, i);
Rectangle pageSize = new Rectangle(image.getWidth(), image.getHeight());
document.setPageSize(pageSize);
document.newPage();
document.add(image);
}
document.close();
aFile.close();
}
Do I need to use new Image( ImageDataFactory.createTiff(...))
Yes.
You want this: ImageDataFactory.createTiff(bytes, recoverFromImageError, page, direct)
Then you would open a new PDF, loop through the TIFF pages and:
Get the TIFF image size
Create a new page in the PDF matching the TIFF page size
Add the TIFF image to the new PDF page
Here is a note from Bruno Lowagie on using TIFF with iText 7: How to avoid an exception when importing a TIFF file?
I see you probably want fully working code. Here you go:
import com.itextpdf.io.image.ImageData;
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.io.image.TiffImageData;
import com.itextpdf.io.source.RandomAccessFileOrArray;
import com.itextpdf.io.source.RandomAccessSourceFactory;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
public class TiffToPdf {
public static void main(String[] args) throws IOException {
Path tiffFile = Paths.get("/myfolder/origin.tiff");
RandomAccessFileOrArray raf = new RandomAccessFileOrArray(new RandomAccessSourceFactory().createBestSource(tiffFile.toString()));
int tiffPages = TiffImageData.getNumberOfPages(raf);
raf.close();
try (PdfDocument output = new PdfDocument(new PdfWriter("/myfolder/destination.pdf"))) {
for (int page = 1; page <= tiffPages; page++) {
ImageData tiffImage = ImageDataFactory.createTiff(tiffFile.toUri().toURL(), true, page, true);
Rectangle tiffPageSize = new Rectangle(tiffImage.getWidth(), tiffImage.getHeight());
PdfPage newPage = output.addNewPage(new PageSize(tiffPageSize));
PdfCanvas canvas = new PdfCanvas(newPage);
canvas.addImage(tiffImage, tiffPageSize, false);
}
}
}
}
Some might suggest you use the high level API to achieve this a little more cleanly but this should be sufficient for your question.
This is the same above but in vb.net.
It converts a multipage TIFF to a PDF.
Imports System.IO
Imports iTextSharp.text
Imports iTextSharp.text.pdf
Sub ConvertTIFF2PDF(ByVal inFile As String, ByVal outFile As String)
Dim pdfDoc As PdfDocument = New PdfDocument(New PdfWriter(outFile))
Dim doc As Document = New Document(pdfDoc)
Dim aFile = New RandomAccessFileOrArray(New RandomAccessSourceFactory().CreateBestSource(inFile.ToString))
Dim tiffPages = TiffImageData.GetNumberOfPages(aFile)
Dim uri As System.Uri = New Uri(inFile)
For i As Integer = 1 To tiffPages
Console.WriteLine("tiffPages: " & (i) & " of " & tiffPages.ToString)
Dim tiffImage = ImageDataFactory.CreateTiff(uri, False, i, False)
Dim tiffPageSize = New Geom.Rectangle(tiffImage.GetWidth(), tiffImage.GetHeight())
Dim newPage = pdfDoc.AddNewPage(New PageSize(tiffPageSize))
Dim canvas As PdfCanvas = New PdfCanvas(newPage)
canvas.AddImage(tiffImage, tiffPageSize, False)
Next
doc.Close()
pdfDoc.Close()
aFile.Close()
End Sub
It's Just the C# Version :
public void ConvertTIFF2PDF(string inFile, string outFile)
{
iTextSharp.text.Document document = new iTextSharp.text.Document(iTextSharp.text.PageSize.A4, 0, 0, 0, 0);
iTextSharp.text.pdf.PdfWriter writer = iTextSharp.text.pdf.PdfWriter.GetInstance(document, new FileStream(outFile, FileMode.Open));
Bitmap bm = new Bitmap(inFile);
int total = bm.GetFrameCount(FrameDimension.Page);
document.Open();
iTextSharp.text.pdf.PdfContentByte cb = writer.DirectContent;
for (int k = 0; k < total; ++k)
{
bm.SelectActiveFrame(FrameDimension.Page, k);
iTextSharp.text.Image img = iTextSharp.text.Image.GetInstance(bm, ImageFormat.Bmp);
// scale the image to fit in the page
img.ScalePercent(72f / img.DpiX * 100);
img.SetAbsolutePosition(0, 0);
cb.AddImage(img);
document.NewPage();
}
document.Close();
}

java.lang.NoSuchMethodError: org.codehaus.jackson.JsonFactory.enable(Lorg/codehaus/jackson/JsonParser$Feature;

I am getting below error while executing java code in Eclipse (I am not using Maven)
Exception in thread "main" java.lang.NoSuchMethodError: org.codehaus.jackson.JsonFactory.enable(Lorg/codehaus/jackson/JsonParser$Feature;)Lorg/codehaus/jackson/JsonFactory;
at org.apache.avro.Schema.<clinit>(Schema.java:88)
at org.apache.avro.Schema$Parser.parse(Schema.java:997)
at com.rishav.avro.AvroExampleWithoutCodeGeneration.serialize(AvroExampleWithoutCodeGeneration.java:36)
at com.rishav.avro.AvroExampleWithoutCodeGeneration.main(AvroExampleWithoutCodeGeneration.java:94)
I am using jars:
avro-1.8.2.jar
java-jason.jar
jason-simple-1.1.1.jar
org.apache.sling.commons.json-2.0.6-sources.jar
org.apache.sling.launchpad-9
jackson-core-asl-1.1.0.jar
jackson-mapper-asl-1.1.0.jar
Line 36 --> Schema schema = new Schema.Parser().parse(new File("StudentActivity.avsc"));
package com.rishav.avro;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashMap;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
//import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonProcessingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.json.simple.JSONObject;
public class AvroExampleWithoutCodeGeneration {
public void serialize() throws JsonParseException, JsonProcessingException, IOException {
InputStream in = new FileInputStream("StudentActivity.json");
// create a schema
Schema schema = new Schema.Parser().parse(new File("StudentActivity.avsc"));**// THIS IS LINE 36**
// create a record to hold json
GenericRecord AvroRec = new GenericData.Record(schema);
// create a record to hold course_details
GenericRecord CourseRec = new GenericData.Record(schema.getField("course_details").schema());
// this file will have AVro output data
File AvroFile = new File("resources/StudentActivity.avro");
// Create a writer to serialize the record
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.create(schema, AvroFile);
// iterate over JSONs present in input file and write to Avro output file
ObjectMapper mapper = new ObjectMapper();
Iterator it= (Iterator) mapper.readValue(new JsonFactory().createJsonParser(in), JSONObject.class);
while (it.hasNext())
{
//for (Iterator it = mapper.readValues(new JsonFactory().createJsonParser(in), JSONObject.class); it.hasNext();) {
JSONObject JsonRec = (JSONObject) it.next();
AvroRec.put("id", JsonRec.get("id"));
AvroRec.put("student_id", JsonRec.get("student_id"));
AvroRec.put("university_id", JsonRec.get("university_id"));
LinkedHashMap CourseDetails = (LinkedHashMap) JsonRec.get("course_details");
CourseRec.put("course_id", CourseDetails.get("course_id"));
CourseRec.put("enroll_date", CourseDetails.get("enroll_date"));
CourseRec.put("verb", CourseDetails.get("verb"));
CourseRec.put("result_score", CourseDetails.get("result_score"));
AvroRec.put("course_details", CourseRec);
dataFileWriter.append(AvroRec);
} // end of for loop
in.close();
dataFileWriter.close();
} // end of serialize method
public void deserialize () throws IOException {
// create a schema
Schema schema = new Schema.Parser().parse(new File("resources/StudentActivity.avsc"));
// create a record using schema
GenericRecord AvroRec = new GenericData.Record(schema);
File AvroFile = new File("resources/StudentActivity.avro");
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(AvroFile, datumReader);
System.out.println("Deserialized data is :");
while (dataFileReader.hasNext()) {
AvroRec = dataFileReader.next(AvroRec);
System.out.println(AvroRec);
}
}
public static void main(String[] args) throws JsonParseException, JsonProcessingException, IOException {
AvroExampleWithoutCodeGeneration AvroEx = new AvroExampleWithoutCodeGeneration();
AvroEx.serialize();
AvroEx.deserialize();
}
}
You can put this instead:
Schema schema = new Schema.Parser().parse.newFile("resources/StudentActivity.avsc");

Creating a JSON file from a url

Hi guys I have a problem creating a JSON file from a google url that i have. This is my code that im using.
import android.util.Log;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class DownloadUrl {
public String readUrl(String strUrl) throws IOException, InterruptedException {
Log.d("URLS = ",strUrl);
Thread.sleep(2000);
String data = "";
InputStream iStream = null;
HttpURLConnection urlConnection = null;
try {
URL url = new URL(strUrl);
// Creating an http connection to communicate with url
urlConnection = (HttpURLConnection) url.openConnection();
// Connecting to url
urlConnection.connect();
// Reading data from url
iStream = urlConnection.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(iStream));
StringBuffer sb = new StringBuffer();
String line = "";
while ((line = br.readLine()) != null) {
sb.append(line);
}
data = sb.toString();
Log.d("downloadUrl", data.toString());
br.close();
} catch (Exception e) {
Log.d("Exception", e.toString());
} finally {
iStream.close();
urlConnection.disconnect();
}
return data;
}
}
It works fine when i throw a url that looks like this into it.
https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=40.7207523,-73.383851&radius=4828&type=bar&key=MYKEY
But when i try and throw a url that looks like this into it.
https://maps.googleapis.com/maps/api/place/details/json?placeid=ChIJe3AmoGsr6IkRuWcK1LAh-DE&key=MYKEY
I get an error: D/GooglePlacesReadTask: java.lang.NullPointerException: Attempt to invoke virtual method 'void java.io.InputStream.close()' on a null object reference
I dont know how i fix this. Any help?
Aah
you did not mention this is in android,
I presume this because you said ,
android.os.NetworkOnMainThreadException
in your comment
Android does not allow time consuming tasks on main thread,
use AsyncTask to call your function or use plain old java thread
Network on main thread exception comes when you run a networking operation on main thread .
Generally AsyncTask is used for these works but if you want to use the same code you written Just add..
StrictMode.ThreadPolicy policy = new StrictMode.ThreadPolicy.Builder().permitAll().build();
StrictMode.setThreadPolicy(policy);

How do I parse a string to HTML DOM in java

My java program is storing the content of web page in the string sb and I want to parse the string to HTML DOM. How do I do that?
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class Scraper {
public static void main(String[] args) throws IOException, SAXException {
URL u;
try {
u = new URL("https://twitter.com/ssjsatish");
URLConnection cn = u.openConnection();
System.out.println("content type: "+cn.getContentType());
InputStream is = cn.getInputStream();
long l = cn.getContentLengthLong();
StringBuilder sb = new StringBuilder();
if (l!=0) {
int c;
while ((c = is.read()) != -1) {
sb.append((char)c);
}
is.close();
System.out.println(sb);
DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
InputSource i = new InputSource();
i.setCharacterStream(new StringReader(sb.toString()));
Document doc = db.parse(i);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
}
}
You don't want to use an XML parser to parse HTML, because not all valid HTML is valid XML. I would recommend using a library specifically designed to parse "real-world" HTML, for example I have had good results with jsoup, but there are others. Another advantage of using this sort of library is that their APIs are designed with Web Scraping in mind, and provide much simpler ways of accessing data in the HTML document.

Unknown Reason for Runtime Error

I was starting out with google map Api's. Following some examples on the internet.I came up with these code. I was able to get the Json code quite easily by crating HttpURLCOnnection but somehow I am getting a runtime error.Somehow i am unable to create a Json Object.I searched quite a bit,but didn't find any relation between JSONObject and Runtimeerror
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import android.os.Bundle;
public class Parser {
public static void main(String args[])
{
String response="";
int lat1=20270000,lon1=85520000,lat2=20264500,lon2=85835500;
String urlString="https://maps.googleapis.com/maps/api/directions/json?origin="+Double.toString((double)lat1/1E6)+","+Double.toString((double) lon1/1E6)+"&destination="+Double.toString((double)lat2/1E6)+","+Double.toString((double) lon2/1E6)+"&sensor=true";
HttpURLConnection urlConnection= null;
URL url = null;
try{
url = new URL(urlString.toString());
urlConnection=(HttpURLConnection)url.openConnection();
urlConnection.setRequestMethod("GET");
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.connect();
InputStream inStream = urlConnection.getInputStream();
BufferedReader bReader = new BufferedReader(new InputStreamReader(inStream));
String temp;
while((temp = bReader.readLine()) != null){
response += temp;
}
System.out.println(response);
bReader.close();
inStream.close();
urlConnection.disconnect();
}
catch(Exception e){}
ArrayList<Bundle> list = new ArrayList<Bundle>();
try {
System.out.println(response.toString()); // uptil here every thing is fine..... I am getting correct Json text.
JSONObject jsonObject = new JSONObject(response); //I am getting a runtime error in this line. Can't figure out the reason
System.out.println(jsonObject);
JSONArray routesArray= jsonObject.getJSONArray("routes");
JSONObject route = routesArray.getJSONObject(0);
JSONArray legs = route.getJSONArray("legs");
JSONObject leg = legs.getJSONObject(0);
JSONObject durationObject = leg.getJSONObject("duration");
String duration = durationObject.getString("text");
System.out.println("egferg"+duration);
}
catch(Exception e1){e1.printStackTrace();}
}
}