Converting Excel to CSV with SSIS - ssis

I found code to use in my script task below, to convert my excel files to CSV files and put them in a seperate folder. But my files only have one sheet (so think there is extra code), but also when it converts it over the column header is removed. I also set up SSIS variables for (destination, source, and file dlimiter) Any suggestions:
/// <summary>
/// ScriptMain is the entry point class of the script. Do not change the name, attributes,
/// or parent of this class.
/// </summary>
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
#region Help: Using Integration Services variables and parameters in a script
/* To use a variable in this script, first ensure that the variable has been added to
* either the list contained in the ReadOnlyVariables property or the list contained in
* the ReadWriteVariables property of this script task, according to whether or not your
* code needs to write to the variable. To add the variable, save this script, close this instance of
* Visual Studio, and update the ReadOnlyVariables and
* ReadWriteVariables properties in the Script Transformation Editor window.
* To use a parameter in this script, follow the same steps. Parameters are always read-only.
*
* Example of reading from a variable:
* DateTime startTime = (DateTime) Dts.Variables["System::StartTime"].Value;
*
* Example of writing to a variable:
* Dts.Variables["User::myStringVariable"].Value = "new value";
*
* Example of reading from a package parameter:
* int batchId = (int) Dts.Variables["$Package::batchId"].Value;
*
* Example of reading from a project parameter:
* int batchId = (int) Dts.Variables["$Project::batchId"].Value;
*
* Example of reading from a sensitive project parameter:
* int batchId = (int) Dts.Variables["$Project::batchId"].GetSensitiveValue();
* */
#endregion
#region Help: Firing Integration Services events from a script
/* This script task can fire events for logging purposes.
*
* Example of firing an error event:
* Dts.Events.FireError(18, "Process Values", "Bad value", "", 0);
*
* Example of firing an information event:
* Dts.Events.FireInformation(3, "Process Values", "Processing has started", "", 0, ref fireAgain)
*
* Example of firing a warning event:
* Dts.Events.FireWarning(14, "Process Values", "No values received for input", "", 0);
* */
#endregion
#region Help: Using Integration Services connection managers in a script
/* Some types of connection managers can be used in this script task. See the topic
* "Working with Connection Managers Programatically" for details.
*
* Example of using an ADO.Net connection manager:
* object rawConnection = Dts.Connections["Sales DB"].AcquireConnection(Dts.Transaction);
* SqlConnection myADONETConnection = (SqlConnection)rawConnection;
* //Use the connection in some code here, then release the connection
* Dts.Connections["Sales DB"].ReleaseConnection(rawConnection);
*
* Example of using a File connection manager
* object rawConnection = Dts.Connections["Prices.zip"].AcquireConnection(Dts.Transaction);
* string filePath = (string)rawConnection;
* //Use the connection in some code here, then release the connection
* Dts.Connections["Prices.zip"].ReleaseConnection(rawConnection);
* */
#endregion
/// <summary>
/// This method is called when this script task executes in the control flow.
/// Before returning from this method, set the value of Dts.TaskResult to indicate success or failure.
/// To open Help, press F1.
/// </summary>
public void Main()
{
// TODO: Add your code here
string SourceFolderPath = Dts.Variables["User::SourcePath"].Value.ToString();
string DestinationFolderPath = Dts.Variables["User::DestinationPath"].Value.ToString();
string FileDelimited = Dts.Variables["User::FileDelimiter"].Value.ToString();
var directory = new DirectoryInfo(SourceFolderPath);
FileInfo[] files = directory.GetFiles();
//Declare and initilize variables
string fileFullPath = "";
//Get one Book(Excel file at a time)
foreach (FileInfo file in files)
{
string filename = "";
fileFullPath = SourceFolderPath + "\\" + file.Name;
filename = file.Name.Replace(".xlsx", "");
MessageBox.Show(fileFullPath);
//Create Excel Connection
string ConStr;
string HDR;
HDR = "YES";
ConStr = "Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + fileFullPath + ";Extended Properties=\"Excel 12.0;HDR=" + HDR + ";IMEX=0\"";
OleDbConnection cnn = new OleDbConnection(ConStr);
//Get Sheet Name
cnn.Open();
DataTable dtSheet = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
string sheetname;
sheetname = "";
Int16 sheetcnt = 0;
foreach (DataRow drSheet in dtSheet.Rows)
{
sheetcnt += 1;
if (drSheet["TABLE_NAME"].ToString().Contains("$"))
{
sheetname = drSheet["TABLE_NAME"].ToString();
//Display Sheet Name , you can comment it out
// MessageBox.Show(sheetname);
//Load the DataTable with Sheet Data
OleDbCommand oconn = new OleDbCommand("select * from [" + sheetname + "]", cnn);
//cnn.Open();
OleDbDataAdapter adp = new OleDbDataAdapter(oconn);
DataTable dt = new DataTable();
adp.Fill(dt);
//drop $from sheet name
sheetname = sheetname.Replace("$", "");
//Create CSV File and load data to it from Sheet
StreamWriter sw = new StreamWriter(DestinationFolderPath + "\\" + filename + ".csv", true);
int ColumnCount = dt.Columns.Count;
//we are checking SheetCnt=1, so put header in csv for only one time
if (sheetcnt == 1)
{
// Write the Header Row to File
for (int i = 0; i < ColumnCount; i++)
{
sw.Write(dt.Columns[i]);
if (i < ColumnCount - 1)
{
sw.Write(FileDelimited);
}
}
sw.Write(sw.NewLine);
}
// Write All Rows to the File
foreach (DataRow dr in dt.Rows)
{
for (int i = 0; i < ColumnCount; i++)
{
if (!Convert.IsDBNull(dr[i]))
{
sw.Write(dr[i].ToString());
}
if (i < ColumnCount - 1)
{
sw.Write(FileDelimited);
}
}
sw.Write(sw.NewLine);
}
sw.Close();
}
}
}
Dts.TaskResult = (int)ScriptResults.Success;
}
#region ScriptResults declaration
/// <summary>
/// This enum provides a convenient shorthand within the scope of this class for setting the
/// result of the script.
///
/// This code was generated automatically.
/// </summary>
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
Sorry first time question here!!!

Related

How to Parse the Text file in SSIS

Iam new to SSIS , Iam facing the below issue while parsing a text file which contains the below sample data
Below is the requirement
-> Need to Capture the number after IH1(454756567) and insert into one column as
InvoiceNumber
-> Need to insert the data between ABCD1234 to ABCD2345 into another column as
TotalRecord .
Many thanks for the help .
ABCD1234
IH1 454756567 686575634
IP2 HJKY TXRT
IBG 23455GHK
ABCD2345
IH1 689343256 686575634
IP2 HJKY TXRT
IBG 23455GHK
ABCD5678
This is the script component to process the entire file. You need to create your output and they are currently being processed as strings.
This assumes your file format is consistent. If you don't have 2 columns in IH1 and IP2 ALL the time. I would recommend a for loop from 1 to len -1 to process. And send the records to their own output.
public string recordID = String.Empty;
public override void CreateNewOutputRows()
{
string filePath = ""; //put your filepath here
using (System.IO.StreamReader sr = new System.IO.StreamReader(filePath))
{
while (!sr.EndOfStream)
{
string line = sr.ReadLine();
if (line.Substring(0, 4) == "ABCD") //Anything that identifies the start of a new record
// line.Split(' ').Length == 1 also meets your criteria.
{
recordID = line;
Output0Buffer.AddRow();
Output0Buffer.RecordID = line;
}
string[] cols = line.Split(' ');
switch (cols[0])
{
case "IH1":
Output0Buffer.InvoiceNumber = cols[1];
Output0Buffer.WhatEverTheSecondColumnIs = cols[2];
break;
case "IP2":
Output0Buffer.ThisRow = cols[1];
Output0Buffer.ThisRow2 = cols[2];
break;
case "IBG":
Output0Buffer.Whatever = cols[1];
break;
}
}
}
}
You'll need to do this in a script component.

SSIS Script Task does not work on second iteration of ForEach Loop Container

This is my first post on Stack Overflow and it surely will not be the last one. I had a very rough week trying to fix a problem with my SSIS Script Task in SSDT for VS2015.
The problem is the following:
I have a ForEach Loop Container in the Control Flow. I map a variable USER::FileName. This Variable I do use to process an excel file (replace commas with dots) in a Script Task and afterwards save the processed file and convert it to a flat file in data flow task. So far so good. But on the second loop of the ForEach Loop Container the original excel file is not processed properly. The names are set correctly but the commas are not replaced. I don't know what to try anymore. Maybe someone has any suggestion?
Edit 2: This is how the Script Task Code looks like:
using Excel = Microsoft.Office.Interop.Excel;
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
public void Main()
{
Excel._Application oApp = new Excel.Application();
Excel.Workbook oWorkbook = oApp.Workbooks.Open(Dts.Variables["User::SourcePath"].Value.ToString() + "\\" + Dts.Variables["User::FileName"].Value.ToString() + ".xls");
Excel.Worksheet oWorksheet = oWorkbook.Worksheets[1];
try
{
ChangeValues(oWorksheet);
oWorkbook.SaveAs("C:\\TEMP\\" + Dts.Variables["User::FileName"].Value.ToString() + ".xls");
oWorkbook.Close();
oApp.Quit();
oWorksheet = null;
oWorkbook = null;
oApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
GC.WaitForPendingFinalizers();
Dts.TaskResult = (int)ScriptResults.Success;
}
catch (Exception e)
{
Dts.Events.FireError(0, "Script task", e.Message + "\r" + e.StackTrace, String.Empty, 0);
}
}
private static void ChangeValues(Excel.Worksheet oWorksheet)
{
Excel.Range range = oWorksheet.UsedRange;
int colNo = range.Columns.Count;
int rowNo = range.Rows.Count;
// read the values into an array.
object[,] cells = range.Value;
for (int j = 1; j <= colNo; j++)
{
for (int i = 1; i <= rowNo; i++)
{
if (j > 3 && cells[i, j] != null)
{
cells[i, j] = cells[i, j].ToString().Replace(",", ".");
}
}
}
// set the values back into the range.
range.Value = cells;
return;
}
}
Ok, I know this Post is a bit old but I found a solution for the problem a few weeks later and wanted to share it realy quickly with you guys. To debug the code I had to install Windows 10 and VS2017 on a VM because the Visual Studio Tools for Application (VSTA) Debugger that runs Script Tasks in SSIS does not work on VS2015 and SSDT is not available for VS2017 on Windows 7. So I was able to debug the Script on Windows 10 in VS2017 and found out that my code didn't open the correct excel sheet. Hope it might help anyone else with problems debugging SSIS Script Tasks.

Parse MS Excel files dynamically with SSIS

I have a business requirement that is looking for the ability to have Excel files placed in a network location and the data from these files uploaded to a SQL Server database. The files will contain 1 worksheet of data. The files correspond to a table found within a known database. The files can and will correlate to multiple tables and will be known only when opening up the file, i.e., the name of the worksheet. I'm currently creating multiple SSIS packages for each of these files as they are uploaded to the shared drive but sometimes, I'm not creating the package fast enough.
I guess my question is, is this type of dynamic parsing something that SSIS can accomplish from a Script Task within a Foreach container? or should I look into another option?
So far, I have the following...but as I've researched, I've come across post similar to this: Extracting excel files with SSIS and that is making me slightly concerned regarding the feasiability...
public void Main()
{
// TODO: Add your code here
Dts.TaskResult = (int)ScriptResults.Success;
string NetworkLocation;
//Create database connection
SqlConnection myADONETConnection = new SqlConnection();
myADONETConnection = (SqlConnection)(Dts.Connections["db"].AcquireConnection(Dts.Transaction) as SqlConnection);
//Obtain the location of the file(s)
NetworkLocation = (string)Dts.Variables["User::NetworkLocation"].Value;
string[] dirs = Directory.GetFiles(NetworkLocation, "*.csv");
}
So, any thoughts or ideas or what direction I should look into?
I wrote a SSIS package a few months ago that does exactly what you seek, plus a little more. In my case, several hundred Excel files containing one or more worksheets of differing names needed to be imported into a database as unique staging tables. Also, the column names and number of columns in each worksheet were unknown. Each worksheet became its own table and the table name was a combination of the original Excel file name and the worksheet name (FileName__WorksheetName). I applied two underscores between the file name and worksheet name in case the file and worksheet names contained underscores. There are a few caveats to this process: 1) All of the Excel files must be located in the same folder; 2) The column headers in each worksheet must appear in the first row; and 3) the worksheet names must not contain any special characters (spaces are automatically replaced with an underscore).
Steps:
1) Create a For Each Loop Container. Under Collection, apply a "Foreach File Enumerator" where under Enumerator configuration, list the folder location and the Files. For files you can list . or even *.xlsx or *.xls to filter to specific files. Apply Fully Qualified. For Variable Mappings, apply a string user variable like "ExcelFile" with an index of 0.
2) Add a Script task in the For Each Loop Container. You will send it the ReadOnlyVariable "ExcelFile" and it will write to two new string variables "TableName" and "Worksheets" under ReadWriteVariables. Apply the following C# script. Note, scince the following script will update your Excel files, you should be applying copies of your originals.
#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using System.IO;
using Excel = Microsoft.Office.Interop.Excel;
using System.Text;
using System.Linq;
using System.Threading.Tasks;
using System.Data.OleDb;
using System.Xml.Serialization;
#endregion
namespace xxxxxxxxx
{
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
{
public void Main()
{
// Includes full path, filename and extension... C:\Documents\ThisExcel.xlsx
string xlFile = Dts.Variables["User::ExcelFile"].Value.ToString();
// Remove path changing value to "ThisExcel.xlsx"
string NoPath = Path.GetFileName(xlFile);
// Remove extension changing value to "ThisExcel".
// This is applied because filename will become part of the name for new database tables
string tableName = Path.GetFileNameWithoutExtension(NoPath);
// Replace any spaces with underscores in tableName (FileName without path and extension)
tableName = tableName.Replace(" ", "_");
Dts.Variables["User::TableName"].Value = tableName;
Excel.Application app = new Excel.Application();
Excel.Workbook excelWorkbook;
try
{
excelWorkbook = app.Workbooks.Open(xlFile);
string tempsheet = " ";
int CountWorksheets = excelWorkbook.Sheets.Count;
//Dts.Variables["User::WorksheetCount"].Value = CountWorksheets;
string[] Excelworksheets;
Excelworksheets = new string[CountWorksheets];
int x = 0;
// Rename worksheets replace empty space with an underscore needed for an SSIS import and
// to avoid empty spaces in final table names.
foreach (Excel.Worksheet sheet in excelWorkbook.Worksheets)
{
tempsheet = sheet.Name;
tempsheet = tempsheet.Replace(" ", "_");
Excelworksheets[x++] = tempsheet.ToString();
sheet.Name = tempsheet;
}
Dts.Variables["User::Worksheets"].Value = Excelworksheets;
excelWorkbook.Save();
excelWorkbook.Close();
}
catch (Exception ex)
{
MessageBox.Show("Excel sheet rename failed for file " + xlFile + " based on " + ex.Message);
}
finally
{
app.Quit();
app = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
Dts.TaskResult = (int)ScriptResults.Success;
}
#region ScriptResults declaration
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
3) After saving and building the above C# script task, add a For Each Loop Container in the earlier For Each Loop container below the script task just created. This will loop through each worksheet in each Excel file. If you only have one worksheet, that is fine. It will apply an Enumerator of "Foreach From Variable Enumerator", which will be the "Worksheets" string variable created that is populated in the before mentioned script task. It will write to a new user string variable called "Worksheet" with an Index of 0.
4) Within this new nested For Each Loop Container, add script task that will create the database table for each worksheet. The tricky part I had to deal with here was defining the field types, this is not retained from the Excel worksheets or text CSV files. So I made them all nvarchar(255) or, if column headers were something like Remark, Description or something else, I made it nvarchar(max), which is good to 4000 or 4262 characters (I do not recall for certain). Here is the dynamic code I applied stemming from what you began.
#region Namespaces
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data;
using System.Data.OleDb;
using System.Xml.Serialization;
using System.IO;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
#endregion
namespace yyyyyyyyyy
{
[Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
public partial class ScriptMain : Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase
public void Main()
{
// TODO: Add your code here
string xlFile = Dts.Variables["User::ExcelFile"].Value.ToString(); //Includes full path and filename with extension
//xlFilex = xlFilex.Replace(#"\", #"\\");
string worksheet = Dts.Variables["User::Worksheet"].Value.ToString(); //Worksheet name from Excel file.
string Tablename = Dts.Variables["User::TableName"].Value.ToString(); //Currently file name without path and extension. Spaces replaced by underscores.
string ExcelExtension = Path.GetExtension(xlFile);
string columnName = "";
string columnType = "";
int i = 0;
string worksheet2 = worksheet + "$";
OleDbConnection xl = new OleDbConnection("Provider=Microsoft.ACE.OLEDB.12.0;Data Source=" + xlFile + ";Extended Properties=\"Excel 12.0 Xml;HDR=YES;IMEX=1\"");
xl.Open();
System.Data.DataTable dt = xl.GetOleDbSchemaTable(OleDbSchemaGuid.Columns, new object[] { null, null, worksheet2, null });
List<string> listColumn = new List<string>();
// Create the name of the table that will be created in the SQL Server database, which is
// a concatentation of the root file name and worksheet name separated by two undescores.
Tablename = Tablename + "__" + worksheet;
string CreateTable = "CREATE TABLE " + Tablename + " (";
string InsertTable = "INSERT INTO " + Tablename + " (";
string SelectColumns = "";
// Create the string that will be applied to create the table defining the field types based on the names
foreach (DataRow row in dt.Rows)
{
listColumn.Add(row["Column_name"].ToString());
columnName = listColumn[i].ToString();
if ((columnName == "Remark") || (columnName == "remark") || (columnName == "REMARK") ||
(columnName == "Remarks") || (columnName == "remarks") || (columnName == "REMARKS") ||
(columnName.Contains("Remarks")) || (columnName.Contains("remarks")) || (columnName.Contains("REMARKS")) ||
(columnName.Contains("Remark")) || (columnName.Contains("remark")) || (columnName.Contains("REMARK")) ||
(columnName == "Comment") || (columnName == "comment") || (columnName == "COMMENT") ||
(columnName == "Comments") || (columnName == "comments") || (columnName == "COMMENTS") ||
(columnName == "Description") || (columnName == "description") || (columnName == "DESCRIPTION") ||
(columnName.Contains("Description")) || (columnName.Contains("description")) || (columnName.Contains("DESCRIPTION")) ||
(columnName == "Legal") || (columnName == "legal") || (columnName == "LEGAL") ||
(columnName == "Note") || (columnName == "note") || (columnName == "NOTE") ||
(columnName.Contains("Format")) || (columnName.Contains("format")) || (columnName.Contains("FORMAT")) ||
(columnName == "Notes") || (columnName == "notes") || (columnName == "NOTES")
)
{
columnType = "nvarchar(max),";
}
else
{
columnType = "nvarchar(255),";
}
CreateTable = CreateTable + "[" + columnName + "] " + columnType;
InsertTable = InsertTable + "[" + columnName + "],";
SelectColumns = SelectColumns + "[" + columnName + "],";
//MessageBox.Show(columnName + " " + columnType);
i++;
}
// Remove last comma from CreateTable and add closing
CreateTable = CreateTable.Remove(CreateTable.Length - 1);
CreateTable = CreateTable + ")";
// Removoe last comman from InsertTable and add closing
InsertTable = InsertTable.Remove(InsertTable.Length - 1);
InsertTable = InsertTable + ")";
// Removoe last comman from SelectColumns
SelectColumns = SelectColumns.Remove(SelectColumns.Length - 1);
xl.Close();
string SQL = "";
// Assemble the dynamic SQL that will be applied in the SQL task next to generate and populate a new database table
if (ExcelExtension == ".xlsx")
{
SQL = "IF OBJECT_ID ('dbo." + Tablename + "') IS NOT NULL DROP TABLE dbo." + Tablename +
" " + CreateTable + " " +
InsertTable + " " + "SELECT " + SelectColumns + " FROM OPENROWSET('Microsoft.ACE.OLEDB.12.0', " +
//" INSERT INTO [dbo].[" + Tablename + "] SELECT * FROM OPENROWSET('Microsoft.ACE.OLEDB.12.0', " +
"'Excel 12.0 Xml;HDR=YES;Database=" + xlFile + "', 'SELECT * FROM [" + worksheet + "$]');";
}
else if (ExcelExtension == ".xls")
{
SQL = "IF OBJECT_ID ('dbo." + Tablename + "') IS NOT NULL DROP TABLE dbo." + Tablename +
" " + CreateTable + " " +
" INSERT INTO [dbo].[" + Tablename + "] SELET * FROM OPENROWSET('Microsoft.Jet.OLEDB.4.0', " +
"'Excel 8.0 Xml;HDR=YES;Database=" + xlFile + "', 'SELECT * FROM [" + worksheet + "$]');";
}
//MessageBox.Show(SQL);
Dts.Variables["User::CreateTableSQL"].Value = SQL;
Dts.TaskResult = (int)ScriptResults.Success;
}
#region ScriptResults declaration
enum ScriptResults
{
Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
};
#endregion
}
}
Looking at the above script you'll notice that the following ReadOnlyVariables will need to be declared: ExelFile, SourceFolder, TableName, tempFileName, and Worksheet. Then the following ReadWriteVariables will need to be declared: ColumnCount, CreateTable, and InsertTableName.
5) Within the nested ForEach Loop Container and just below the above Task script, add an Execute SQL Task that will run the sql contained in the CreateTableSQL variable. Be sure to set the SQLSourceType to "Variable". This will create and populate the table and even overwrite it if it already exists.
When done, you should have something that looks like the following flow:
Hope this helps and let me know if you have any questions. I did not have time to remove the extraneous stuff, but this should put you on the right path. This loop container is for Excel files, but you can add on other loop containers with code modified to handle CSV or other file types. All of this can be enclosed in a single SSIS package.
The final SQL task just runs the following TSQL that looks for field names in your database containing a space between words and replaces that space with an underscore. It is not necessary, but avoids having to apply SQL with columns wrapped with brackets [].
DECLARE My_Cursor Cursor
FOR
SELECT 'sp_rename '''+table_name+'.['+column_name+']'','''+replace(column_name,' ','_')+''',''COLUMN'''
FROM information_schema.columns
WHERE column_name like '% %'
OPEN My_Cursor
DECLARE #SQL NVARCHAR(1000)
FETCH NEXT FROM My_Cursor INTO #SQL
WHILE ##FETCH_STATUS <> -1
BEGIN
EXECUTE sp_executesql #SQL
FETCH NEXT FROM My_Cursor INTO #SQL
END
CLOSE My_Cursor
DEALLOCATE My_Cursor

How to append results in Processing?

I have implemented the Table() function in order to save the results generated by the application. However, it seems that the Timer function in the application causes the application to write over the existing CSV file each time it runs. Rather than write over the existing CSV file, I would like to append the newest search results to the existing CSV file. Is there a way to do this? Is it easier to append the results if the results are stored in a different format such as JSON?
Timer timer;
import java.util.List;
Table table;
long lastID = Long.MAX_VALUE;
void setup() {
timer = new Timer(30000);
timer.start();
goTwitter();
table = new Table();
table.addColumn("id");
table.addColumn("latitude");
table.addColumn("longitude");
}
void draw(){
if (timer.isFinished()){
goTwitter();
timer.start();
}
}
void goTwitter(){
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setOAuthConsumerKey("");
cb.setOAuthConsumerSecret("");
cb.setOAuthAccessToken("");
cb.setOAuthAccessTokenSecret("");
Twitter twitter = new TwitterFactory(cb.build()).getInstance();
Query query = new Query("#love");
int numberOfTweets = 300;
ArrayList<Status> tweets = new ArrayList<Status>();
while (tweets.size () < numberOfTweets) {
if (numberOfTweets - tweets.size() > 100)
query.setCount(100);
else
query.setCount(numberOfTweets - tweets.size());
//long lastID = Long.MAX_VALUE;
try {
QueryResult result = twitter.search(query);
tweets.addAll(result.getTweets());
println("Gathered " + tweets.size() + " tweets");
for (Status t: tweets)
if(t.getId() < lastID) lastID = t.getId();
}
catch (TwitterException te) {
println("Couldn't connect: " + te);
};
query.setSinceId(lastID);
}
for (int i = 0; i < tweets.size(); i++) {
Status t = (Status) tweets.get(i);
GeoLocation loc = t.getGeoLocation();
String user = t.getUser().getScreenName();
String msg = t.getText();
String time = "";
if (loc!=null) {
Double lat = t.getGeoLocation().getLatitude();
Double lon = t.getGeoLocation().getLongitude();
println(i + " USER: " + user + " wrote: " + msg + " located at " + lat + ", " + lon);
TableRow newRow = table.addRow();
newRow.setString("id", user);
newRow.setDouble("latitude", lat);
newRow.setDouble("longitude", lon);
saveTable(table, "data2/syria_16500_5.csv");
}
}
println("lastID= " + lastID);
}
class Timer {
int savedTime;
int totalTime;
Timer (int tempTotalTime) {
totalTime = tempTotalTime;
}
void start(){
savedTime = millis();
}
boolean isFinished() {
int passedTime = millis() - savedTime;
if (passedTime > totalTime){
return true;
} else {
return false;
}
}
}
Well, there does not seem to be a direct implementation to append to a table, so you'll have to resort to a hack: load the table in processing, write to it and resave it, sort of like this:
processing.data.Table table;
void setup() {
File f = new File(sketchPath("") + "data2/syria_16500_5.csv");
println(f.getAbsolutePath());
if (!f.exists()) {
table = new processing.data.Table();
table.addColumn("id");
table.addColumn("latitude");
table.addColumn("longitude");
}
else
table = loadTable("data2/syria_16500_5.csv", "header, csv");
TableRow newRow = table.addRow();
newRow.setString("id", "asad");
newRow.setDouble("latitude", 234);
newRow.setDouble("longitude", 2523);
saveTable(table, "data2/syria_16500_5.csv");
}
The sketch first checks if the file exists. If it does not, it creates a new table, otherwise it loads the old table in with its header.
Be warned, this is not particularly safe... If you change your columns (say, in a text editor) and try to run the sketch again you will get an exception.

Bulk Insert into SQL Server 2008

for (int i = 0; i < myClass.Length; i++)
{
string upSql = "UPDATE CumulativeTable SET EngPosFT = #EngPosFT,EngFTAv=#EngFTAv WHERE RegNumber =#RegNumber AND Session=#Session AND Form=#Form AND Class=#Class";
SqlCommand cmdB = new SqlCommand(upSql, connection);
cmdB.CommandTimeout = 980000;
cmdB.Parameters.AddWithValue("#EngPosFT", Convert.ToInt32(Pos.GetValue(i)));
cmdB.Parameters.AddWithValue("#RegNumber", myClass.GetValue(i));
cmdB.Parameters.AddWithValue("#EngFTAv", Math.Round((engtot / arrayCount), 2));
cmdB.Parameters.AddWithValue("#Session", drpSess.SelectedValue);
cmdB.Parameters.AddWithValue("#Form", drpForm.SelectedValue);
cmdB.Parameters.AddWithValue("#Class", drpClass.SelectedValue);
int idd = Convert.ToInt32(cmdB.ExecuteScalar());
}
assuming myClass.Length is 60. This does 60 update statements. How can I limit it to 1 update statement. Please code example using the above code will be appreciated. Thanks
Tried using this
StringBuilder command = new StringBuilder();
SqlCommand cmdB = null;
for (int i = 0; i < myClass.Length; i++)
{
command.Append("UPDATE CumulativeTable SET" + " EngPosFT = " + Convert.ToInt32(Pos.GetValue(i)) + "," + " EngFTAv = " + Math.Round((engtot / arrayCount), 2) +
" WHERE RegNumber = " + myClass.GetValue(i) + " AND Session= " + drpSess.SelectedValue + " AND Form= " + drpForm.SelectedValue + " AND Class= " + drpClass.SelectedValue + ";");
//or command.AppendFormat("UPDATE CumulativeTable SET EngPosFT = {0},EngFTAv={1} WHERE RegNumber ={2} AND Session={3} AND Form={4} AND Class={5};", Convert.ToInt32(Pos.GetValue(i)), Math.Round((engtot / arrayCount), 2), myClass.GetValue(i), drpSess.SelectedValue, drpForm.SelectedValue, drpClass.SelectedValue);
}//max length is 128 error is encountered
Look at the BULK INSERT T-SQL command. But since I don't have a lot of personal experience with that command, I do see some immediate opportunity to improve this code using the same sql by creating the command and parameters outside of the loop, and only making the necessary changes inside the loop:
string upSql = "UPDATE CumulativeTable SET EngPosFT = #EngPosFT,EngFTAv=#EngFTAv WHERE RegNumber =#RegNumber AND Session=#Session AND Form=#Form AND Class=#Class";
SqlCommand cmdB = new SqlCommand(upSql, connection);
cmdB.CommandTimeout = 980000;
//I had to guess at the sql types you used here.
//Adjust this to match your actual column data types
cmdB.Parameters.Add("#EngPosFT", SqlDbType.Int);
cmdB.Parameters.Add("#RegNumber", SqlDbType.Int);
//It's really better to use explicit types here, too.
//I'll just update the first parameter as an example of how it looks:
cmdB.Parameters.Add("#EngFTAv", SqlDbType.Decimal).Value = Math.Round((engtot / arrayCount), 2));
cmdB.Parameters.AddWithValue("#Session", drpSess.SelectedValue);
cmdB.Parameters.AddWithValue("#Form", drpForm.SelectedValue);
cmdB.Parameters.AddWithValue("#Class", SqlDbTypedrpClass.SelectedValue);
for (int i = 0; i < myClass.Length; i++)
{
cmdB.Parameters[0].Value = Convert.ToInt32(Pos.GetValue(i)));
cmdB.Parameters[1].Value = myClass.GetValue(i));
int idd = Convert.ToInt32(cmdB.ExecuteScalar());
}
It would be better in this case to create a stored procedure that accepts a Table Valued Parameter. On the .NET side of things, you create a DataTable object containing a row for each set of values you want to use.
On the SQL Server side of things, you can treat the parameter as another table in a query. So inside the stored proc, you'd have:
UPDATE a
SET
EngPosFT = b.EngPosFT,
EngFTAv=b.EngFTAv
FROM
CumulativeTable a
inner join
#MyParm b
on
a.RegNumber =b.RegNumber AND
a.Session=b.Session AND
a.Form=b.Form AND
a.Class=b.Class
Where #MyParm is your table valued parameter.
This will then be processed as a single round-trip to the server.
In such scenarios it is always best to write a Stored Procedure and call that stored proc in the for loop, passing the necessary arguments at each call.
using System;
using System.Data;
using System.Data.SqlClient;
namespace DataTableExample
{
class Program
{
static void Main(string[] args)
{
DataTable prodSalesData = new DataTable("ProductSalesData");
// Create Column 1: SaleDate
DataColumn dateColumn = new DataColumn();
dateColumn.DataType = Type.GetType("System.DateTime");
dateColumn.ColumnName = "SaleDate";
// Create Column 2: ProductName
DataColumn productNameColumn = new DataColumn();
productNameColumn.ColumnName = "ProductName";
// Create Column 3: TotalSales
DataColumn totalSalesColumn = new DataColumn();
totalSalesColumn.DataType = Type.GetType("System.Int32");
totalSalesColumn.ColumnName = "TotalSales";
// Add the columns to the ProductSalesData DataTable
prodSalesData.Columns.Add(dateColumn);
prodSalesData.Columns.Add(productNameColumn);
prodSalesData.Columns.Add(totalSalesColumn);
// Let's populate the datatable with our stats.
// You can add as many rows as you want here!
// Create a new row
DataRow dailyProductSalesRow = prodSalesData.NewRow();
dailyProductSalesRow["SaleDate"] = DateTime.Now.Date;
dailyProductSalesRow["ProductName"] = "Nike";
dailyProductSalesRow["TotalSales"] = 10;
// Add the row to the ProductSalesData DataTable
prodSalesData.Rows.Add(dailyProductSalesRow);
// Copy the DataTable to SQL Server using SqlBulkCopy
using (SqlConnection dbConnection = new SqlConnection("Data Source=ProductHost;Initial Catalog=dbProduct;Integrated Security=SSPI;Connection Timeout=60;Min Pool Size=2;Max Pool Size=20;"))
{
dbConnection.Open();
using (SqlBulkCopy s = new SqlBulkCopy(dbConnection))
{
s.DestinationTableName = prodSalesData.TableName;
foreach (var column in prodSalesData.Columns)
s.ColumnMappings.Add(column.ToString(), column.ToString());
s.WriteToServer(prodSalesData);
}
}
}
}
}