Export html to Excel format? [duplicate] - html

I want to extract some data like " email addresses " .. from table which are in PDF file and use this email addresses which I extract to send email to those people.
What I have found so far through searching the web:
I have to convert the PDF file to Excel to read the data easily and use them as I want.
I find some free dll like itextsharp or PDFsharp.
But I didn't find any snippet code help to do this in C#. is there any solution ?

You absolutely do not have to convert PDF to Excel.
First of all, please determine whether your PDF contains textual data, or it is scanned image.
If it contains textual data, then you are right about using "some free dll". I recommend iTextSharp as it is popular and easy to use.
Now the controversial part. If you don't need rock solid solution, it would be easiest to read all PDF to a string and then retrieve emails using regular expression.
Here is example (not perfect) of reading PDF with iTextSharp and extracting emails:
public string PdfToString(string fileName)
{
var sb = new StringBuilder();
var reader = new PdfReader(fileName);
for (int page = 1; page <= reader.NumberOfPages; page++)
{
var strategy = new SimpleTextExtractionStrategy();
string text = PdfTextExtractor.GetTextFromPage(reader, page, strategy);
text = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text)));
sb.Append(text);
}
reader.Close();
return sb.ToString();
}
//adjust expression as needed
Regex emailRegex = new Regex("Email Address (?<email>.+?) Passport No");
public IEnumerable<string> ExtractEmails(string content)
{
var matches = emailRegex.Matches(content);
foreach (Match m in matches)
{
yield return m.Groups["email"].Value;
}
}

Using bytescout PDF Extractor SDK we can be able to extract the whole page to csv as below.
CSVExtractor extractor = new CSVExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
TableDetector tdetector = new TableDetector();
tdetector.RegistrationKey = "demo";
tdetector.RegistrationName = "demo";
// Load the document
extractor.LoadDocumentFromFile("C:\\sample.pdf");
tdetector.LoadDocumentFromFile("C:\\sample.pdf");
int pageCount = tdetector.GetPageCount();
for (int i = 1; i <= pageCount; i++)
{
int j = 1;
do
{
extractor.SetExtractionArea(tdetector.GetPageRect_Left(i),
tdetector.GetPageRect_Top(i),
tdetector.GetPageRect_Width(i),
tdetector.GetPageRect_Height(i)
);
// and finally save the table into CSV file
extractor.SavePageCSVToFile(i, "C:\\page-" + i + "-table-" + j + ".csv");
j++;
} while (tdetector.FindNextTable()); // search next table
}

public void Convert(string fileNames) {
int pageCount = 0;
iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(fileNames);
pageCount = reader.NumberOfPages;
string ext = System.IO.Path.GetExtension(fileNames);
//string[] outfiles = new string[pageCount];
//Excel.Application app = new Excel.Application();
//app.Workbooks.Add("");
CSVExtractor extractor = new CSVExtractor();
//string outfilePDF1 = fileNames.Replace((System.IO.Path.GetFileName(fileNames)), (System.IO.Path.GetFileName(fileNames).Replace(".pdf", "") + "_rez" + ".csv"));
string outfilePDFExcel1 = fileNames.Replace((System.IO.Path.GetFileName(fileNames)),
(System.IO.Path.GetFileName(fileNames).Replace(".pdf", "") + "_rez" + ".xls"));
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
string folderName = #"C:\Users\Dafina\Desktop\PDF_EditProject\PDF_EditProject\PDFs";
string pathString = System.IO.Path.Combine(folderName, System.IO.Path.GetFileName(fileNames).Replace(".pdf", "")) + "-CSVs";
System.IO.Directory.CreateDirectory(pathString);
for (int i = 0; i < pageCount; i++)
{
string outfilePDF = fileNames.Replace((System.IO.Path.GetFileName(fileNames)),
(System.IO.Path.GetFileName(fileNames).Replace(".pdf", "") + "_" + (i + 1).ToString()) + ext);
extractor.LoadDocumentFromFile(outfilePDF);
//string outfile = fileNames.Replace((System.IO.Path.GetFileName(fileNames)),
// (System.IO.Path.GetFileName(fileNames).Replace(".pdf", "") + "_" + (i + 1).ToString()) + ".csv");
string outfile = fileNames.Replace((System.IO.Path.GetFileName(fileNames)),
(System.IO.Path.GetFileName(fileNames).Replace(".pdf", "") + "-CSVs\\" + "Sheet_" + (i + 1).ToString()) + ".csv");
extractor.SaveCSVToFile(outfile);
}
Excel.Application xlApp = new Microsoft.Office.Interop.Excel.Application();
if (xlApp == null)
{
Console.WriteLine("Excel is not properly installed!!");
return;
}
Excel.Workbook xlWorkBook;
object misValue = System.Reflection.Missing.Value;
xlWorkBook = xlApp.Workbooks.Add(misValue);
string[] cvsFiles = Directory.GetFiles(pathString);
Array.Sort(cvsFiles, new AlphanumComparatorFast());
//string[] lista = new string[pageCount];
//for (int t = 0; t < pageCount; t++)
//{
// lista[t] = cvsFiles[t];
//}
//Array.Sort(lista, new AlphanumComparatorFast());
Microsoft.Office.Interop.Excel.Worksheet xlWorkSheet;
for (int i = 0; i < cvsFiles.Length; i++)
{
int sheet = i + 1;
xlWorkSheet = xlWorkBook.Sheets[sheet];
if (i < cvsFiles.Length - 1)
{
xlWorkBook.Worksheets.Add(Type.Missing, xlWorkSheet, Type.Missing, Type.Missing);
}
int sheetRow = 1;
Encoding objEncoding = Encoding.Default;
StreamReader readerd = new StreamReader(File.OpenRead(cvsFiles[i]));
int ColumLength = 0;
while (!readerd.EndOfStream)
{
string line = readerd.ReadLine();
Console.WriteLine(line);
try
{
string[] columns = line.Split((new char[] { '\"' }));
for (int col = 0; col < columns.Length; col++)
{
if (ColumLength < columns.Length)
{
ColumLength = columns.Length;
}
if (col % 2 == 0)
{
}
else if (columns[col] == "")
{
}
else
{
xlWorkSheet.Cells[sheetRow, col + 1] = columns[col].Replace("\"", "");
}
}
sheetRow++;
}
catch (Exception e)
{
string msg = e.Message;
}
}
int k = 1;
for (int s = 1; s <= ColumLength; s++)
{
xlWorkSheet.Columns[k].Delete();
k++;
}
releaseObject(xlWorkSheet);
readerd.Close();
}
xlWorkBook.SaveAs(outfilePDFExcel1, Microsoft.Office.Interop.Excel.XlFileFormat.xlWorkbookNormal,
misValue, misValue, misValue, misValue, Microsoft.Office.Interop.Excel.XlSaveAsAccessMode.xlExclusive,
misValue, misValue, misValue, misValue, misValue);
xlWorkBook.Close(true, misValue, misValue);
xlApp.Quit();
releaseObject(xlWorkBook);
releaseObject(xlApp);
var dir = new DirectoryInfo(pathString);
dir.Attributes = dir.Attributes & ~FileAttributes.ReadOnly;
dir.Delete(true);
}

Probably the Best code would be to use Third party dll
namespace ConsoleApp2
{
internal class Program
{
static void Main(string[] args)
{
string pathToPdf = #"D:\abc\abc.pdf";
string pathToExcel = Path.ChangeExtension(pathToPdf, ".xls");
SautinSoft.PdfFocus f = new SautinSoft.PdfFocus();
f.ExcelOptions.ConvertNonTabularDataToSpreadsheet = false;
// 'true' = Preserve original page layout.
// 'false' = Place tables before text.
f.ExcelOptions.PreservePageLayout = true;
// The information includes the names for the culture, the writing system,
// the calendar used, the sort order of strings, and formatting for dates and numbers.
System.Globalization.CultureInfo ci = new System.Globalization.CultureInfo("en-US");
ci.NumberFormat.NumberDecimalSeparator = ",";
ci.NumberFormat.NumberGroupSeparator = ".";
f.ExcelOptions.CultureInfo = ci;
f.OpenPdf(pathToPdf);
if (f.PageCount > 0)
{
int result = f.ToExcel(pathToExcel);
// Open the resulted Excel workbook.
if (result == 0)
{
System.Diagnostics.Process.Start(pathToExcel);
}
}
}
}
}

Related

Refer to json individual objects in cpp crow

i am using a cpp crow library and i am having difficulty in accessing individual objects i am attaching my code here.
CROW_ROUTE(app,"/hello/<string>")
([](string name){
crow::json::wvalue x;
x["name"] = "llllds";
x["Town"] = "Texas";
x["nickname"] = "drax";
x["father"] = "Nethan";
if (name == "Nothing")
cout << "Bad Responce";
std::ostringstream os;
cout << name << " is the required query";
val = x[name];
return val;
And i want to return my name can anyone help me with this. Thanks in advance
I used JSON for Modern C++ within crow (https://github.com/nlohmann/json)
Here is an example CROW_ROUTE I wrote
CROW_ROUTE(app, "/palindromes/<string>/<string>")([](const request &req, response &res, string ID, string words){
palindromeHash.insert({ID, words}); //ignore this
nlohmann::json x;
x = getPalindromes(palindromeHash.at(ID));
palindromeHash.erase(ID); //ignore this
res.sendJSON(x);
});
//getPalindromesFunction()
nlohmann::json getPalindromes(string data){
nlohmann::json x;
unordered_map<string, bool> hashmap;
string word = "";
std::string::size_type i = data.find("%20");
while (i != std::string::npos){
data.erase(i, 3);
data.insert(i, " ");
i = data.find("%20", i);
}
for(int i = 0; i < data.size(); i++){
if(data[i] == ' '){
hashmap.insert({word, true});
word = "";
}
else{
word += data[i];
}
}
string temp;
vector<string> words;
int numValid = 0;
for(auto& i: hashmap){
temp = i.first;
reverse(temp.begin(), temp.end());
auto got = hashmap.find(temp);
if(got != hashmap.end()){
words.push_back(i.first);
numValid++;
}
}
x["words"] = words;
x["numValid"] = numValid;
return x;
}
As you can see it returns a JSON object x that holds palindromes. The sendJSON() function is something I added to crow_all.h. Add it under the struct response section on line 7215
void sendJSON(const nlohmann::json& data){
std::string response = data.dump();
add_header("Access-Control-Allow-Origin", "*");
add_header("Content-Type", "text/html");
write(response);
end();
}
Remember to include "json.h" in both main.cpp and crow_all.h. The res.sendJSON will send it to my JS file which can loop through the JSON with ease.
$.get("/palindromes/" + ID + "/" + curr_line, {}, function(response){
let x = JSON.parse(response); //This will allow JS to read the C++ JSON
for(let i = 0; i < x.words.length; i++){
term.write(x.words[i] + "\r\n");
}
}

Export data from the mssql database to a csv file

good work
v.b.net I will start a new project on my project and my purpose in this project from the database to extract data from certain tables and I want to save as csv
"##FILE VERSION##","251" "##TABLEDEF START##"
"MESAJ=String,50,""MESAJ"","""",50,Data,"""""
"ID=Integer,0,""ID"","""",10,Data,"""""
"SUBEIND=Integer,0,""SUBEIND"","""",10,Data,"""""
"KASAIND=Integer,0,""KASAIND"","""",10,Data,""""" "##INDEXDEF START##"
"##INDEXDEF END##" "##TABLEDEF END##"
"MESAJ","ID","SUBEIND","KASAIND", "YeniFirma","112","100","101",
"YeniCari","100","100","101", "YeniStok","101","100","101", –
Send your sql dataset result as a parameter to this function. It create csv format for you.
public string ConvertToCSV(DataSet objDataSet)
{
StringBuilder content = new StringBuilder();
if (objDataSet.Tables.Count >= 1)
{
System.Data.DataTable table = objDataSet.Tables[0];
if (table.Rows.Count > 0)
{
DataRow dr1 = (DataRow)table.Rows[0];
int intColumnCount = dr1.Table.Columns.Count;
int index = 1;
foreach (DataColumn item in dr1.Table.Columns)
{
content.Append(String.Format("\"{0}\"", item.ColumnName));
if (index < intColumnCount)
content.Append(",");
else
content.Append("\r\n");
index++;
}
foreach (DataRow currentRow in table.Rows)
{
string strRow = string.Empty;
for (int y = 0; y <= intColumnCount - 1; y++)
{
strRow += "\"" + currentRow[y].ToString() + "\"";
if (y < intColumnCount - 1 && y >= 0)
strRow += ",";
}
content.Append(strRow + "\r\n");
}
}
}
This function send a mail:
public void sendMail(string csv)
{
var sendMailThread = new Thread(() =>
{
MemoryStream stream = new MemoryStream(Encoding.ASCII.GetBytes(csv));
Attachment attachment = new Attachment(stream, new ContentType("text/csv"));
attachment.Name = DateTime.Now.ToShortDateString() + "Report.csv";
MailMessage ePosta = new MailMessage();
ePosta.From = new MailAddress("xx");
ePosta.To.Add("xxx");
ePosta.CC.Add("xxx");
ePosta.CC.Add("xxx");
ePosta.Attachments.Add(attachment);
ePosta.Subject = DateTime.Now + " Subject";
ePosta.Body = DateTime.Now + " body message.";
SmtpClient smtp = new SmtpClient();
smtp.Credentials = new System.Net.NetworkCredential("xxx", "xxx");
smtp.Port = 587;
smtp.Host = "smtp.gmail.com";
smtp.EnableSsl = true;
object userState = ePosta;
smtp.SendAsync(ePosta, (object)ePosta);
});
sendMailThread.Start();
}

Downloading Attachment from Exchange Server using SSIS package deployed on another Server

I have built an SSIS package which reads CSV from certain folder. But now I need to download same csv from exchange server.Also Outlook is not installed on my machine. Will I be able to download CSV from exchange server and how ? Thanks!
I have used some of the code from the link http://sqlandbilearning.blogspot.com.au/2014/07/download-email-attachment-using-ssis.html but i have added some new code for removing TCP binding error using ServicePointManager as well as added search filter for retrieving specific emails and this code also takes care of multiple attachment from different emails to be saved on file system.
public void Main()
{
string filePath = "";
string fileName = "";
List<SearchFilter> searchFilterCollection = new List<SearchFilter>();
DateTime now = DateTime.Now;
DateTime beginRecievedTime = new DateTime(now.Year, now.Month, now.Day, 7, 55, 0);
DateTime finishRecievedTime = new DateTime(now.Year, now.Month, now.Day, 8, 15, 0);
EmailMessage latestEmail = null;
try
{
ServicePointManager.ServerCertificateValidationCallback = (sender, certificate, chain, sslPolicyErrors) => true;
ExchangeService service = new ExchangeService(ExchangeVersion.Exchange2010);
service.UseDefaultCredentials = true;
//service.Credentials = new WebCredentials("username", "password");
service.Url = new Uri("");
// 10 mails per page in DESC order
ItemView view = new ItemView(10);
view.OrderBy.Add(ItemSchema.DateTimeReceived, SortDirection.Descending);
searchFilterCollection.Add(new SearchFilter.ContainsSubstring(ItemSchema.Subject, "Scheduled search"));
SearchFilter greaterthanfilter = new SearchFilter.IsGreaterThanOrEqualTo(ItemSchema.DateTimeReceived, beginRecievedTime);
searchFilterCollection.Add(greaterthanfilter);
SearchFilter lessthanfilter = new SearchFilter.IsLessThan(ItemSchema.DateTimeReceived, finishRecievedTime);
searchFilterCollection.Add(lessthanfilter);
SearchFilter filter = new SearchFilter.SearchFilterCollection(LogicalOperator.And, searchFilterCollection);
//Find mails
FindItemsResults<Item> fir = service.FindItems(WellKnownFolderName.Inbox, filter, view);
Dictionary<EmailMessage, string> emailsMap = new Dictionary<EmailMessage, string>();
foreach (Item item in fir.Items)
{
item.Load(); //Load the entire message with attachment
EmailMessage email = item as EmailMessage;
if (email != null)
{
if (email.HasAttachments == true && email.Attachments.Count == 1)
{
if (email.Subject.StartsWith("Scheduled search") == true)
{
filePath = Path.Combine(Dts.Variables["User::SourceFolderPath"].Value.ToString()
, email.DateTimeReceived.Date.ToString("MM.dd.yyyy") + "_" +
email.Attachments[0].Name);
// fileName = email.DateTimeReceived.Date.ToString("MM.dd.yyyy") + "_" +
// email.Attachments[0].Name.ToString();
emailsMap.Add(email, filePath);
}
}
}
}
if (emailsMap.Count > 0) {
foreach (var item in emailsMap) {
//Save attachment
EmailMessage email = item.Key;
filePath = item.Value;
FileAttachment fileAttachment = email.Attachments[0] as FileAttachment;
fileAttachment.Load(filePath);
string extractPath = Dts.Variables["User::SourceFolderPath"].Value.ToString() + "\\" + email.Attachments[0].Name;
System.IO.Compression.ZipFile.ExtractToDirectory(filePath, extractPath);
fileName = Dts.Variables["User::SourceFolderPath"].Value.ToString() + "\\" + email.DateTimeReceived.Date.ToString("MM.dd.yyyy") + "_" +
email.Attachments[0].Name.ToString();
if (File.Exists(fileName))
{
File.Delete(fileName);
}
}
}
// Dts.Variables["User::SourceFileName"].Value = fileName;
Dts.TaskResult = (int)ScriptResults.Success;
}
catch(System.Runtime.InteropServices.COMException ex)
{
if (Dts.Variables.Locked == true)
{
Dts.Variables.Unlock();
}
//An error occurred.
Dts.Events.FireError(0, "Error occured", ex.Message, String.Empty, 0);
Dts.TaskResult = (int)ScriptResults.Failure;
}
}

In SSIS,How to read the data from Object and mail in the form of Table

My main objective is:
To read the object which I get through Execute Sql task
Then Using Script task I want to mail it ,but I need to format the values in object in form of table as the body of mail.
And IF Time TimeDifference Column had value =<100 Then Row should be Green Else Red.
So I have an Object called "ResultSet",I pass to Script Task, I convert it into c# table structure and place it in a variable called "ApplicationTotal".
Below is the code for that which works fine.
public void Main()
{
DataTable dtTotal = new DataTable();
OleDbDataAdapter adapter = new OleDbDataAdapter();
DataTable dt = new DataTable();
adapter.Fill(dt, Dts.Variables["InactiveSet"].Value);
// In the first Run dtTotal is created
if (Convert.ToInt32(Dts.Variables["InsertedRowCountTotal"].Value) == 0)
{
foreach (DataColumn dc in dt.Columns)
{
dtTotal.Columns.Add(dc.ColumnName,dc.DataType);
}
}
else // In the next runs dtTotal is retrieved from variable
{
dtTotal = (DataTable)Dts.Variables["InactiveSetTotal"].Value;
}
foreach (DataRow dr in dt.Rows)
{
DataRow newDR = dtTotal.NewRow();
foreach( DataColumn dc in dt.Columns)
{
newDR[dc.ColumnName] = dr[dc.ColumnName];
}
dtTotal.Rows.Add(newDR);
}
Dts.Variables["InactiveSetTotal"].Value = dtTotal;
Dts.Variables["InsertedRowCountTotal"].Value = Convert.ToInt32(Dts.Variables["InsertedRowCountTotal"].Value) + Convert.ToInt32(Dts.Variables["InsertedRowCount"].Value);
Dts.TaskResult = (int)ScriptResults.Success;
}
Then I pass ApplicationTotal object to script task where i have to read the object and mail the details in form of table.I am successful in sending mail but I am not able to format the data in table and change the colour
I get the output in mail as
if (Convert.ToInt32(Dts.Variables["InsertedRowCount"].Value) == 0)
{
return;
}
#region BuildingEmailBody
StringBuilder sb = new StringBuilder();
sb.AppendLine(string.Format("Monitor Application Report"));
sb.AppendLine();
sb.AppendLine();
//sb.AppendLine(string.Format("Following are the Details:\n\n<TABLE><TR>\n<TH>{0}</TH><TH>{1}</TH><TH>{2}</TH> <TH>{3}</TH> <TH>{4}</TH>\n</TR>\n", "TimeDifferences(Minutes) ", "UpdateTime", "ApplicationName", "ServerName", "DatabaseName"));
OleDbDataAdapter adapter = new OleDbDataAdapter();
if (Convert.ToInt32(Dts.Variables["InsertedRowCount"].Value) > 0)
{
sb.AppendLine();
DataTable ApplicationTotal = new DataTable();
ApplicationTotal = (DataTable)Dts.Variables["ApplicationTotal"].Value;
foreach (DataRow dr in ApplicationTotal.Rows)
{
sb.AppendLine(string.Format("{0} {1} {2} {3} {4} ", dr[0], dr[1], dr[2], dr[3], dr[4]));
}
sb.AppendLine();
}
# endregion
In the above code ,
1.I have taken a variable html in which I create table and load the data in the table.
2.I add the color coding to the table's rows based on condition
3.Make IsBodyHtml =true
4.Pass html variable to smpt.Send(message)
public void Main()
{
string html = string.Empty;
if (Convert.ToInt32(Dts.Variables["InsertedRowCount"].Value) == 0)
{
return;
}
#region BuildingEmailBody
OleDbDataAdapter adapter = new OleDbDataAdapter();
if (Convert.ToInt32(Dts.Variables["InsertedRowCount"].Value) > 0)
{
DataTable ApplicationTotal = new DataTable();
ApplicationTotal = (DataTable)Dts.Variables["ApplicationTotal"].Value;
html = "Monitor Application Report <br></br> <style type='text/css'>td.datacellone { background-color: #FF0000; color: black;}td.datacelltwo { background-color: #00FF00; color: black;}</style> <table border=1>";
//add header row
html += "<tr>";
for (int i = 0; i < ApplicationTotal.Columns.Count; i++)
{
html += "<th>" + ApplicationTotal.Columns[i].ColumnName + "</th>";
}
html += "</tr>";
//add rows
for (int i = 0; i < ApplicationTotal.Rows.Count; i++)
{
html += "<tr>";
for (int j = 0; j < ApplicationTotal.Columns.Count; j++)
if (Convert.ToInt32(ApplicationTotal.Rows[i][0].ToString()) > Convert.ToInt32(Dts.Variables["TimeDifference"].Value.ToString()))
html += "<td class='datacellone'>" + ApplicationTotal.Rows[i][j].ToString() + "</td>";
else
html += "<td class='datacelltwo'>" + ApplicationTotal.Rows[i][j].ToString() + "</td>";
html += "</tr>";
}
html += "</table>";
}
//MessageBox.Show(Dts.Variables["TimeDifference"].Value.ToString());
# endregion
#region SendingEmail
string sendTo = Dts.Variables["AlarmOperator"].Value.ToString();
string from = "pemsadmin#pemsportal.com.au";
string subject = "Monitor Application Status";
string server = "192.168.240.171";
string user = "pemsadmin#pemsportal.com.au";
string password = "Sawu7619";
string domain = "pemsportal.com.au";
int port = 25;
System.Net.Mail.MailMessage message = new System.Net.Mail.MailMessage(from, sendTo, subject, html.ToString());
message.IsBodyHtml = true;
message.Body = html.ToString();
System.Net.Mail.SmtpClient smpt = new System.Net.Mail.SmtpClient(server, port);
smpt.Credentials = System.Net.CredentialCache.DefaultNetworkCredentials;
smpt.Credentials = new System.Net.NetworkCredential(user, password, domain);
smpt.Send(message);
#endregion
Dts.TaskResult = (int)ScriptResults.Success;
}

How to append results in Processing?

I have implemented the Table() function in order to save the results generated by the application. However, it seems that the Timer function in the application causes the application to write over the existing CSV file each time it runs. Rather than write over the existing CSV file, I would like to append the newest search results to the existing CSV file. Is there a way to do this? Is it easier to append the results if the results are stored in a different format such as JSON?
Timer timer;
import java.util.List;
Table table;
long lastID = Long.MAX_VALUE;
void setup() {
timer = new Timer(30000);
timer.start();
goTwitter();
table = new Table();
table.addColumn("id");
table.addColumn("latitude");
table.addColumn("longitude");
}
void draw(){
if (timer.isFinished()){
goTwitter();
timer.start();
}
}
void goTwitter(){
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setOAuthConsumerKey("");
cb.setOAuthConsumerSecret("");
cb.setOAuthAccessToken("");
cb.setOAuthAccessTokenSecret("");
Twitter twitter = new TwitterFactory(cb.build()).getInstance();
Query query = new Query("#love");
int numberOfTweets = 300;
ArrayList<Status> tweets = new ArrayList<Status>();
while (tweets.size () < numberOfTweets) {
if (numberOfTweets - tweets.size() > 100)
query.setCount(100);
else
query.setCount(numberOfTweets - tweets.size());
//long lastID = Long.MAX_VALUE;
try {
QueryResult result = twitter.search(query);
tweets.addAll(result.getTweets());
println("Gathered " + tweets.size() + " tweets");
for (Status t: tweets)
if(t.getId() < lastID) lastID = t.getId();
}
catch (TwitterException te) {
println("Couldn't connect: " + te);
};
query.setSinceId(lastID);
}
for (int i = 0; i < tweets.size(); i++) {
Status t = (Status) tweets.get(i);
GeoLocation loc = t.getGeoLocation();
String user = t.getUser().getScreenName();
String msg = t.getText();
String time = "";
if (loc!=null) {
Double lat = t.getGeoLocation().getLatitude();
Double lon = t.getGeoLocation().getLongitude();
println(i + " USER: " + user + " wrote: " + msg + " located at " + lat + ", " + lon);
TableRow newRow = table.addRow();
newRow.setString("id", user);
newRow.setDouble("latitude", lat);
newRow.setDouble("longitude", lon);
saveTable(table, "data2/syria_16500_5.csv");
}
}
println("lastID= " + lastID);
}
class Timer {
int savedTime;
int totalTime;
Timer (int tempTotalTime) {
totalTime = tempTotalTime;
}
void start(){
savedTime = millis();
}
boolean isFinished() {
int passedTime = millis() - savedTime;
if (passedTime > totalTime){
return true;
} else {
return false;
}
}
}
Well, there does not seem to be a direct implementation to append to a table, so you'll have to resort to a hack: load the table in processing, write to it and resave it, sort of like this:
processing.data.Table table;
void setup() {
File f = new File(sketchPath("") + "data2/syria_16500_5.csv");
println(f.getAbsolutePath());
if (!f.exists()) {
table = new processing.data.Table();
table.addColumn("id");
table.addColumn("latitude");
table.addColumn("longitude");
}
else
table = loadTable("data2/syria_16500_5.csv", "header, csv");
TableRow newRow = table.addRow();
newRow.setString("id", "asad");
newRow.setDouble("latitude", 234);
newRow.setDouble("longitude", 2523);
saveTable(table, "data2/syria_16500_5.csv");
}
The sketch first checks if the file exists. If it does not, it creates a new table, otherwise it loads the old table in with its header.
Be warned, this is not particularly safe... If you change your columns (say, in a text editor) and try to run the sketch again you will get an exception.