How to differentiate pdf document types with PDFSharp? - ocr

There are basically 4 types of pdf files.
• PDF Image Only
• PDF Searchable Image Exact
• PDF Searchable Image Compact
• PDF Formatted Text and Graphics
I need to know how to differentiate a given pdf document in-order to perform OCR.
I've tried something like this, but the given pdf which identified as Image only (mentioned as Scanned in my code) failed to be converted as an image using Image.fromStream method.
public static PdfType GetPdfType(Stream sourcePdf)
{
PdfDocument pdf = PdfReader.Open(sourcePdf);
PdfDictionary pg = pdf.Pages[0];
PdfDictionary res = pg.Elements.GetDictionary("/Resources");
PdfDictionary xobj = res.Elements.GetDictionary("/XObject");
PdfDictionary font = res.Elements.GetDictionary("/Font");
if (xobj == null)
{
return PdfType.Text;
}
else if (xobj != null && font != null)
{
return PdfType.ScannedText;
}
else
return PdfType.Scanned;
}
Edit: Here is the code snippet where I try to add images which are extracted from pdf file
public static Collection<Image> GetExtractedImagesFromPDF(Stream sourcePdf, string outputPath)
{
Collection<Image> extractedImages = new Collection<Image>();
PdfDocument pdf = PdfReader.Open(sourcePdf);
for (int pageNumber = 0; pageNumber < pdf.Pages.Count; pageNumber++)
{
PdfDictionary pg = pdf.Pages[pageNumber];
// recursively search pages, forms and groups for images.
PdfDictionary obj = FindImageInPDFDictionary(pg);
if (obj != null)
{
byte[] bytes = obj.Stream.Value;
ImageConverter ic = new ImageConverter();
//Image img = (Image)ic.ConvertFrom(bytes); // throws parameter not valid error
MemoryStream ms = new MemoryStream(bytes);
Image img = Image.FromStream(ms, false, true); // throws parameter not valid error
extractedImages.Add(img);
}
}
return extractedImages;
}
And this is the FindImageInPdfDictionary method
static PdfDictionary FindImageInPDFDictionary(PdfDictionary pg)
{
PdfDictionary res = pg.Elements.GetDictionary("/Resources");
if (res != null)
{
PdfDictionary xobj = res.Elements.GetDictionary("/XObject");
if (xobj != null)
{
ICollection<PdfItem> items = xobj.Elements.Values;
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
return xObject;
}
}
}
}
}
return null;
}

Related

How to send JSON objects to another computer?

Using Websockets, I am able to use 1 computer and 1 kinect v2.0 to generate JSON objects of the joints x,y, and z coordinates in real-time. The next steps is to have this data transferred to another computer, using possibly TCP/IP network. My question is to anyone who knows how this is done. Having this data transferred to another computer.
Output that needs to be transferred to another computer in real-time
namespace WebSockets.Server
{
class Program
{
// Store the subscribed clients.
static List<IWebSocketConnection> clients = new List<IWebSocketConnection>();
// Initialize the WebSocket server connection.
static Body[] bodies = new Body[6];
//static KinectSensor kinectSensor = null;
static CoordinateMapper _coordinateMapper;
static Mode _mode = Mode.Color;
static void Main(string[] args)
{
//const string SkeletonStreamName = "skeleton";
//SkeletonStreamMessage skeletonStreamMessage;// = new SkeletonStreamMessage { stream = SkeletonStreamName };
KinectSensor kinectSensor = KinectSensor.GetDefault();
BodyFrameReader bodyFrameReader = null;
bodyFrameReader = kinectSensor.BodyFrameSource.OpenReader();
ColorFrameReader colorFrameReader = null;
colorFrameReader = kinectSensor.ColorFrameSource.OpenReader();
_coordinateMapper = kinectSensor.CoordinateMapper;
kinectSensor.Open();
WebSocketServer server = new WebSocketServer("ws://localhost:8001");
server.Start(socket =>
{
socket.OnOpen = () =>
{
// Add the incoming connection to our list.
clients.Add(socket);
};
socket.OnClose = () =>
{
// Remove the disconnected client from the list.
clients.Remove(socket);
};
socket.OnMessage = message =>
{
if (message == "get-video")
{
int NUMBER_OF_FRAMES = new DirectoryInfo("Video").GetFiles().Length;
// Send the video as a list of consecutive images.
for (int index = 0; index < NUMBER_OF_FRAMES; index++)
{
foreach (var client in clients)
{
string path = "Video/" + index + ".jpg";
byte[] image = ImageUtil.ToByteArray(path);
client.Send(image);
}
// We send 30 frames per second, so sleep for 34 milliseconds.
System.Threading.Thread.Sleep(270);
}
}
else if (message == "get-bodies")
{
if (kinectSensor.IsOpen)
{
if (bodyFrameReader != null)
{
bodyFrameReader.FrameArrived += bodyFrameReader_FrameArrived;
}
}
}
else if (message == "get-color")
{
if (kinectSensor.IsOpen)
{
if (colorFrameReader != null)
{
colorFrameReader.FrameArrived += colorFrameReader_FrameArrived;
}
}
}
};
});
// Wait for a key press to close...
Console.ReadLine();
kinectSensor.Close();
}
private static void colorFrameReader_FrameArrived(object sender, ColorFrameArrivedEventArgs e)
{
//throw new NotImplementedException();
using (ColorFrame colorFrame = e.FrameReference.AcquireFrame()) {
if (colorFrame != null) {
var blob = colorFrame.Serialize();
foreach (var client in clients)
{
if (blob != null)
{
client.Send(blob);
Console.WriteLine("After color Blob sent");
}
}
}
}
}
private static void bodyFrameReader_FrameArrived(object sender, BodyFrameArrivedEventArgs e)
{
//throw new NotImplementedException();
bool dataReceived = false;
using (BodyFrame bodyFrame = e.FrameReference.AcquireFrame())
{
if (bodyFrame != null)
{
if (bodies == null)
{
bodies = new Body[bodyFrame.BodyCount];
}
// The first time GetAndRefreshBodyData is called, Kinect will allocate each Body in the array.
// As long as those body objects are not disposed and not set to null in the array,
// those body objects will be re-used.
bodyFrame.GetAndRefreshBodyData(bodies);
dataReceived = true;
}
}
if (dataReceived)
{
foreach (var client in clients)
{
var users = bodies.Where(s => s.IsTracked.Equals(true)).ToList();
if (users.Count>0){
string json = users.Serialize(_coordinateMapper, _mode);
Console.WriteLine("jsonstring: " + json);
Console.WriteLine("After body serialization and to send");
}
}
}
}
}
}
Try changing it from
WebSocketServer server = new WebSocketServer("ws://localhost:8001");
to
WebSocketServer server = new WebSocketServer(" ws://192.168.X.X:8001");
on the client end. Enter the IP address of the client computer and make sure both server and client are on the same network.

How to open csv file which contains special characters in one of the fields in csv?

hi I am working on a xamarin.forms app, While trying to open one of the csv file the following exception is displayed "input string is not in a correct format " the csv file contains a field called item name which consists the following names ET Door,E459-2,H 91 Ft and Key,Door so these both items contain comma so I am not able to open the csv file which consists of these two elements as they contain special characters like comma and underscore .Here is my code to read and open csv file ,please check the code and let me know what changes do i need to make so the file with items consisting of special characters also open ?
public async void OnProcess(object o, EventArgs args)
{
if (!string.IsNullOrWhiteSpace(csv_file.Text))
{
// _database.AddFiles();
if (App.Current.MainPage is NavigationPage)
{
try
{
List<ItemsCSV> items = new List<ItemsCSV>();
string[] lines = File.ReadAllLines(string.Format(#"{0}", this.file.FilePath));
if (lines != null)
{
for (int x = 1; x < lines.Length; x++)
{
string data = lines[x];
string[] item = data.Split(',');
// ItemsCSV itemsCSV = new ItemsCSV();
_itemsCSV = new ItemsCSV();
{
_itemsCSV.Cycle_Count = string.IsNullOrEmpty(item.ElementAtOrDefault(0)) ? 0 : Convert.ToInt32(item[0]);
_itemsCSV.Line_Number = string.IsNullOrEmpty(item.ElementAtOrDefault(1)) ? 0 : Convert.ToInt32(item[1]);
_itemsCSV.Item_Number = item.ElementAtOrDefault(2);
_itemsCSV.Name = item.ElementAtOrDefault(3);
_itemsCSV.Warehouse = item.ElementAtOrDefault(4);
_itemsCSV.Aisle = item.ElementAtOrDefault(5);
_itemsCSV.Bin = item.ElementAtOrDefault(6);
_itemsCSV.Level = item.ElementAtOrDefault(7);
_itemsCSV.Order_Qty = string.IsNullOrEmpty(item.ElementAtOrDefault(8)) ? 0 : Convert.ToInt32(item[8]);
_itemsCSV.Order_UOM = item.ElementAtOrDefault(9);
_itemsCSV.Consumption_Qty = string.IsNullOrEmpty(item.ElementAtOrDefault(10)) ? 0 : Convert.ToInt32(item[10]);
_itemsCSV.Consumption_UOM = item.ElementAtOrDefault(11);
_itemsCSV.Status = "";
};
items.Add(_itemsCSV);
_database.AddItems(_itemsCSV);
}
var result = await DisplayAlert("", "CSV has been processed, please do cycle count", "OK", "Cancel");
if(result == true)
{
var cyclecountPage = new CycleCountPage(items, 0, "MainPage",this.file.FilePath);
await (App.Current.MainPage as NavigationPage).PushAsync(cyclecountPage);
}
else
{
}
}
else
{
await DisplayAlert("Alert", "File is empty", "OK");
}
}
catch (Exception e)
{
await DisplayAlert("Exception", e.Message, "OK");
}
}
}
else
{
await DisplayAlert("Alert", "File name is mandatory", "OK");
}
}

Can we search or filter " data-tag='to-do' " in onenote API ? If yes then how we can do this?

How can we use OneNote tags (like data-tag='to-do') with search or filter in OneNote API. I tried using provide operators but found no success.
I tried in this way --
$url = "https://www.onenote.com/api/v1.0/me/notes";
//$url .= "/pages?search=hello";
$url .= "/pages?filter=data-tag eq 'to-do'";
I want to search data-tag and then extract the data from OneNote pages which contains the data-tag='to-do'.
Any help is appreciated and thanks in advance.
You'll have to run through all your pages.
For each pages, you can retrieve its content with a GET call to https://www.onenote.com/api/v1.0/me/notes/pages/%s/content?includeIds=true
From there you get a string that you can parse.
I'll advise you to use jsoup.
With jsoup you can then write (assuming contentcontains your page's content):
Document doc = Jsoup.parse(content);
Elements todos=doc.select("[data-tag^=\"to-do\"]");
for(Element todo:todos) {
System.out.println(todo.ownText());
}
Sadly OneNote API doesn't support it yet, so I've written my custom parser which extracts notes with data-tags from page content. Here it is:
public class OneNoteParser
{
static public List<Note> ExtractTaggedNotes(string pageContent, string tag = "*")
{
List<Note> allNotes = new List<Note>();
string[] dataTagString = { "data-tag=\""};
string[] dirtyNotes = pageContent.Split(dataTagString, StringSplitOptions.RemoveEmptyEntries);
//First one in this array can be dropped as it doesn't contain todo
for (int i = 1; i < dirtyNotes.Length; i )
{
string curStr = dirtyNotes[i];
Note curNote = new Note();
// Firstly we need to extract all the tags from it (sample html: data-tag="to-do:completed,important" ....)
string allTags = curStr.Substring(0,curStr.IndexOf("\""));
curNote.Tags = new List<string>(allTags.Split(','));
// Now we have to jump to the next ">" symbol and start finding the text after it
curStr = curStr.Substring(curStr.IndexOf(">"));
int depth = 1;
bool addAllowed = false;
for (int j = 0; j < curStr.Length - 1; j )
{
// Finding next tag opener "<" symbol
if (curStr[j] == '<')
{
addAllowed = false;
// Checking if it is not "</" closer
if (curStr[j 1] == '/')
{
// Means this is a tag closer. Decreasing depth
depth--;
}
else
{
// Means this is an tag opener. Increasing depth
depth ;
}
}
else if (curStr[j] == '>')
{
addAllowed = true;
if (j > 0 && curStr[j - 1] == '/')
{
// Means this is a tag closer. Decreasing depth
depth--;
}
}
else
{
if (depth < 1)
{
// Found end of the tag. Saving index and exiting for loop
break;
}
if (addAllowed)
curNote.Text = curStr[j]; // Appending letter to string
}
}
// Filtering by tag and adding to final list
if (tag == "*" || curNote.Tags.Any(str => str.Contains(tag)))//curNote.Tags.Contains(tag, StringComparer.CurrentCultureIgnoreCase))
allNotes.Add(curNote);
}
return allNotes;
}
}
And here is the class Note
public class Note
{
public string Text;
public List<string> Tags;
public Note()
{
Tags = new List<string>();
}
}
To extract todo-s simply call this function:
OneNoteParser.ExtractTaggedNotes(pageContent, "to-do");
Also you can extract other tags like this:
OneNoteParser.ExtractTaggedNotes(pageContent, "important");
OneNoteParser.ExtractTaggedNotes(pageContent, "highlight");
//...

How to insert a Path in between an existing Path

I am working on an implementation to generate alternate Paths using via node method.
While checking for local optimality I do the following
forwardEdge = bestWeightMapFrom.get(viaNode);
reverseEdge = bestWeightMapTo.get(viaNode);
double unpackedUntilDistance = 0;
while(forwardEdge.edge != -1) {
double parentDist = forwardEdge.parent != null ? forwardEdge.parent.distance : 0;
double dist = forwardEdge.distance - parentDist;
if(unpackedUntilDistance + dist >= T_THRESHOLD) {
EdgeSkipIterState edgeState = (EdgeSkipIterState) graph.getEdgeProps(forwardEdge.edge, forwardEdge.adjNode);
unpackStack.add(new EdgePair(edgeState, false));
sV = forwardEdge.adjNode;
forwardEdge = forwardEdge.parent;
break;
}
else {
unpackedUntilDistance += dist;
forwardEdge = forwardEdge.parent;
sV = forwardEdge.adjNode;
}
}
int oldSV = forwardEdge.adjNode;
EdgeEntry oldForwardEdge = forwardEdge;
I unpack the edge in the stack to further narrow down sV.
I get vT and oldVt in a similar fashion by traversing reverseEdge.
if I determine that the path from sV and vT is <= length of unpacked edges I accept this via node and construct the alternatePath as follows.
PathBidirRef p = (PathBidirRef) algo.calcPath(oldSV, oldVT);
Path4CHAlt p1 = new Path4CHAlt(graph, flagEncoder);
p1.setSwitchToFrom(false);
p1.setEdgeEntry(oldForwardEdge);
p1.segmentEdgeEntry = p.edgeEntry;
double weight = oldForwardEdge.weight + oldReverseEdge.weight + p.edgeEntry.weight + p.edgeTo.weight;
p1.setWeight(weight);
p1.edgeTo = oldReverseEdge;
p1.segmentEdgeTo = p.edgeTo;
Path p2 = p1.extract();
Path4CHAlt is
public class Path4CHAlt extends Path4CH {
private boolean switchWrapper = false;
public EdgeEntry segmentEdgeTo;
public EdgeEntry segmentEdgeEntry;
public Path4CHAlt( Graph g, FlagEncoder encoder )
{
super(g, encoder);
}
public Path4CHAlt setSwitchToFrom( boolean b )
{
switchWrapper = b;
return this;
}
#Override
public Path extract()
{
System.out.println("Path4CHAlt extract");
if (edgeEntry == null || edgeTo == null || segmentEdgeEntry == null || segmentEdgeTo == null)
return this;
if (switchWrapper)
{
EdgeEntry ee = edgeEntry;
edgeEntry = edgeTo;
edgeTo = ee;
ee = segmentEdgeEntry;
segmentEdgeEntry = segmentEdgeTo;
segmentEdgeTo = ee;
}
EdgeEntry currEdge = segmentEdgeEntry;
while (EdgeIterator.Edge.isValid(currEdge.edge))
{
processEdge(currEdge.edge, currEdge.adjNode);
currEdge = currEdge.parent;
}
currEdge.parent = edgeEntry;
currEdge = edgeEntry;
while (EdgeIterator.Edge.isValid(currEdge.edge))
{
processEdge(currEdge.edge, currEdge.adjNode);
currEdge = currEdge.parent;
}
setFromNode(currEdge.adjNode);
reverseOrder();
currEdge = segmentEdgeTo;
int tmpEdge = currEdge.edge;
while (EdgeIterator.Edge.isValid(tmpEdge))
{
currEdge = currEdge.parent;
processEdge(tmpEdge, currEdge.adjNode);
tmpEdge = currEdge.edge;
}
currEdge.parent = edgeTo;
currEdge = edgeTo;
tmpEdge = currEdge.edge;
while (EdgeIterator.Edge.isValid(tmpEdge))
{
currEdge = currEdge.parent;
processEdge(tmpEdge, currEdge.adjNode);
tmpEdge = currEdge.edge;
}
setEndNode(currEdge.adjNode);
return setFound(true);
}
}
This is not working all the time. I get exceptions in Path4CH
java.lang.NullPointerException
at com.graphhopper.routing.ch.Path4CH.expandEdge(Path4CH.java:62)
at com.graphhopper.routing.ch.Path4CH.processEdge(Path4CH.java:56)
at com.graphhopper.routing.PathBidirRef.extract(PathBidirRef.java:95)
at com.graphhopper.routing.DijkstraBidirectionRef.extractPath(DijkstraBidirectionRef.java:99)
at com.graphhopper.routing.AbstractBidirAlgo.runAlgo(AbstractBidirAlgo.java:74)
at com.graphhopper.routing.AbstractBidirAlgo.calcPath(AbstractBidirAlgo.java:60)
In Path
java.lang.IllegalStateException: Edge 1506012 was empty when requested with node 1289685, array index:0, edges:318
at com.graphhopper.routing.Path.forEveryEdge(Path.java:253)
at com.graphhopper.routing.Path.calcInstructions(Path.java:349)
I dont know what I am doing wrong. I could really use some help with this.
Thanks.
I solved this issue.
Inside DijkstraBidirectionRef.calcPath I was trying to calculate shortest path from an arbitrary node to source node and vertex node.
The error used to occur because the original call to calcPath was operating on QueryGraph and inside I was creating a new Object of DijkstraBidirectionRef using LevelGraphStorage.
This was a problem because QueryGraph may create virtual nodes and edges for source and target nodes. Call to calcPath(node, virtualNode) operating on LevelGraphStorage would throw an exception.
The fix was to call algo.setGraph(queryGraph) after creating DijkstraBidirectionRef.

APEX, Unit Test, Callout No Response with Static Resource

Bit stuck on another one i'm afraid, i am trying to write a unit test for a bulk APEX class.
The class has a calllout to the google api, so i have created a static resource which i am feeding in via a mock, so i can complete testing of processing the JSON that is returned. However for some reason the response is always empty.
Now the very odd thing is that if i use exactly the same callout/JSON code, and the same mock code on a previous #future call, then it does return fine.
Here is the class:
global class mileage_bulk implements Database.Batchable<sObject>,
Database.AllowsCallouts
{
global Database.QueryLocator start(Database.BatchableContext BC)
{
String query = 'SELECT Id,Name,Amount,R2_Job_Ref__c,R2_Shipping_Post_Code__c,Shipping_Postcode_2__c FROM Opportunity WHERE R2_Shipping_Post_Code__c != null';
return Database.getQueryLocator(query);
//system.debug('Executing'+query);
}
global void execute(Database.BatchableContext BC, List<Opportunity> scope)
{
system.debug(scope);
for(Opportunity a : scope)
{
String startPostcode = null;
startPostcode = EncodingUtil.urlEncode('HP27DU', 'UTF-8');
String endPostcode = null;
String endPostcodeEncoded = null;
if (a.R2_Shipping_Post_Code__c != null){
endPostcode = a.R2_Shipping_Post_Code__c;
Pattern nonWordChar = Pattern.compile('[^\\w]');
endPostcode = nonWordChar.matcher(endPostcode).replaceAll('');
endPostcodeEncoded = EncodingUtil.urlEncode(endPostcode, 'UTF-8');
}
Double totalDistanceMeter = null;
Integer totalDistanceMile = null;
String responseBody = null;
Boolean firstRecord = false;
String ukPrefix = 'UKH';
if (a.R2_Job_Ref__c != null){
if ((a.R2_Job_Ref__c).toLowerCase().contains(ukPrefix.toLowerCase())){
system.debug('Is Hemel Job');
startPostcode = EncodingUtil.urlEncode('HP27DU', 'UTF-8');
} else {
system.debug('Is Bromsgrove Job');
startPostcode = EncodingUtil.urlEncode('B604AD', 'UTF-8');
}
}
// build callout
Http h = new Http();
HttpRequest req = new HttpRequest();
req.setEndpoint('http://maps.googleapis.com/maps/api/directions/json?origin='+startPostcode+'&destination='+endPostcodeEncoded+'&units=imperial&sensor=false');
req.setMethod('GET');
req.setTimeout(60000);
system.debug('request follows');
system.debug(req);
try{
// callout
HttpResponse res = h.send(req);
// parse coordinates from response
JSONParser parser = JSON.createParser(res.getBody());
responseBody = res.getBody();
system.debug(responseBody);
while (parser.nextToken() != null) {
if ((parser.getCurrentToken() == JSONToken.FIELD_NAME) &&
(parser.getText() == 'distance')){
parser.nextToken(); // object start
while (parser.nextToken() != JSONToken.END_OBJECT){
String txt = parser.getText();
parser.nextToken();
//system.debug(parser.nextToken());
//system.debug(txt);
if (firstRecord == false){
//if (txt == 'text'){
//totalDistanceMile = parser.getText();
system.debug(parser.getText());
//}
if (txt == 'value'){
totalDistanceMeter = parser.getDoubleValue();
double inches = totalDistanceMeter*39.3701;
totalDistanceMile = (integer)inches/63360;
system.debug(parser.getText());
firstRecord = true;
}
}
}
}
}
} catch (Exception e) {
}
//system.debug(accountId);
system.debug(a);
system.debug(endPostcodeEncoded);
system.debug(totalDistanceMeter);
system.debug(totalDistanceMile);
// update coordinates if we get back
if (totalDistanceMile != null){
system.debug('Entering Function to Update Object');
a.DistanceM__c = totalDistanceMile;
a.Shipping_Postcode_2__c = a.R2_Shipping_Post_Code__c;
//update a;
}
}
update scope;
}
global void finish(Database.BatchableContext BC)
{
}
}
and here is the test class;
#isTest
private class mileage_bulk_tests{
static testMethod void myUnitTest() {
Opportunity opp1 = new Opportunity(name = 'Google Test Opportunity',R2_Job_Ref__c = 'UKH12345',R2_Shipping_Post_Code__c = 'AL35QW',StageName = 'qualified',CloseDate = Date.today());
insert opp1;
Opportunity opp2 = new Opportunity(name = 'Google Test Opportunity 2',StageName = 'qualified',CloseDate = Date.today());
insert opp2;
Opportunity opp3 = new Opportunity(name = 'Google Test Opportunity 3',R2_Job_Ref__c = 'UKB56789',R2_Shipping_Post_Code__c = 'AL35QW',StageName = 'qualified',CloseDate = Date.today());
insert opp3;
StaticResourceCalloutMock mock = new StaticResourceCalloutMock();
mock.setStaticResource('googleMapsJSON');
mock.setStatusCode(200); // Or other appropriate HTTP status code
mock.setHeader('Content-Type', 'application/json'); // Or other appropriate MIME type like application/xml
//Set the mock callout mode
Test.setMock(HttpCalloutMock.class, mock);
system.debug(opp1);
system.debug(opp1.id);
//Call the method that performs the callout
Test.startTest();
mileage_bulk b = new mileage_bulk();
database.executeBatch((b), 10);
Test.stopTest();
}
}
Help greatly appreciated!
Thanks
Gareth
Not certain what 'googleMapsJSON' looks like, perhaps you could post for us.
Assuming your mock resource is well formatted, make sure the file extension is ".json" and was saved with UTF-8 encoding.
If #2 does not work, you should try saving your resource as .txt - I've run in to this before where it needed a plain text resource but expected application/json content type
Be certain that the resource name string you are providing has the same casing as the name of the resource. It is case sensitive.
Are you developing on a namespaced package environment? Try adding the namespace to the resource name if so.
Otherwise, your code looks pretty good at first glance.