Tuesday, March 27, 2012

Using EPPlus Library to Convert XLSX to CSV

The EPPlus library for Excel data is great, but it has no built-in functionality for converting .xlsx to .csv.

Here is a sample (posted here because I discovered a pitfall, and it's not a slam dunk) for doing just that.


using System;

using System.Collections.Generic;

using System.Linq;

using OfficeOpenXml;

using System.IO;



namespace KillMeConsole

{



    public static class ListExtension

    {

        public static string ToDelimitedString( this List<string> list, string separator = ":", bool insertSpaces = false )

        {

            var result = string.Empty;

            for ( int i = 0; i < list.Count; i++ )

            {

                var currentString = list[i];

                if ( i < list.Count - 1 )

                {

                    currentString += separator;

                    if ( insertSpaces )

                    {

                        currentString += ' ';

                    }

                }

                result += currentString;

            }

            return result;

        }

    }



    class Program

    {



        static void Main( string[] args )

        {

            var inputFile = new FileInfo( @"C:\temp\killme\Book1.xlsx" );

            var outputFile = @"C:\temp\killme\Book1.csv";

            using ( var doc = new ExcelPackage( inputFile ) )

            {

                var workbook = doc.Workbook;

                if ( workbook != null )

                {

                    if ( workbook.Worksheets.Count > 0 )

                    {

                        var worksheet = workbook.Worksheets[1];

                        int maxColumnNumber = worksheet.Dimension.End.Column;

                        var convertedRecords = new List<List<string>>( worksheet.Dimension.End.Row );

                        var excelRows = worksheet.Cells.GroupBy( c => c.Start.Row ).ToList();

                        excelRows.ForEach( r =>

                        {

                            var currentRecord = new List<string>( maxColumnNumber );

                            var cells = r.OrderBy( cell => cell.Start.Column ).ToList();

                            for ( int i = 1; i <= maxColumnNumber; i++ )

                            {

                                var currentCell = cells.Where( c => c.Start.Column == i ).FirstOrDefault();

                                if ( currentCell == null )

                                {

                                    /// Add a cell value for empty cells to keep data aligned.

                                    AddCellValue( string.Empty, currentRecord );

                                }

                                else

                                {

                                    /// Can't use .Text: http://epplus.codeplex.com/discussions/349696

                                    AddCellValue( currentCell.Value == null ? string.Empty : currentCell.Value.ToString(), currentRecord );

                                }

                            }

                            convertedRecords.Add( currentRecord );

                        } );

                        WriteToFile( convertedRecords, outputFile );

                    }

                }

            }



            Console.WriteLine( "chekkit: {0}", outputFile );

            Console.ReadKey();

        }



        private static void AddCellValue( string s, List<string> record )

        {

            record.Add( string.Format( "{0}{1}{0}", '"', s ) );

        }



        /// <summary>

        /// Assumes file isn't massive

        /// </summary>

        private static void WriteToFile( List<List<string>> records, string path )

        {

            var commaDelimited = new List<string>( records.Count );

            records.ForEach( r => commaDelimited.Add( r.ToDelimitedString( "," ) ) );

            File.WriteAllLines( path, commaDelimited );

        }



    }



}

Wednesday, January 25, 2012

WPF Mapping / Association Control

When importing data from one store to another, many applications allow visual "mapping" of source fields to destination fields where lines are drawn to connect the two.
I tried in vain to find some kind of WPF/Xaml control that would do that, so I created one:

The control (regular user control) would be useful for any kind of association between two sets of data. The user creates associations by dragging from source to destination. It has a mode for one-to-one (a source item can only be mapped to one destination item) and one-to-many. Moving the scroll bars of the lists redraws the association lines appropriately. It's done MVVC style with models for the control as a whole and for the lists that hold the fields.

I was amazed at how easy this was to do with Xaml. I'm only a recent Xaml user, and when I use it I'm so pleased and dismayed at the same time. Pleased because it's such a rich, well thought out technology. Dismayed because MS has been forced to essentially abandon it in favor of the tyranny of HTML. I wonder how much more difficult this same thing would be to do using HTML5.

Source code here. Hope someone else can make use of it.

Labels:

Monday, November 28, 2011

WPF Wizard Control

There doesn't seem to really be anything in the way of a wizard control for WPF. Developer Express has one for Winforms, but when it came to WPF, their recent advice is to look at this project until they create one.

I took that project (not a control; merely an example of a wizard implementation) and made it into a reusable user control. It's available on codeplex. Just as with my last post, I hope someone can make use of it.

Labels:

Thursday, November 03, 2011

Programmatically Creating SSIS Packages

The last post was specific; this one very general.

I created a project on codeplex that is a functional import system using the SSIS runtime. If you are heading down the road of using the SSIS API, the project may be very helpful in helping to figure out just how.

I sure hope it saves somebody a bunch of time.

Thursday, October 13, 2011

Programmatically Creating an SSIS Package with a Flat File Connection Source

I'm using the SSIS API to programmatically create (and execute) a package. The requirement for the first package is to get .csv data from a flat file into a SQL Server table. So I start checking into the MS docs, looks easy enough. Not so fast. When it comes to a flat file connection, there seems to be holes in the API. I ended up having to use one class that (according to the docs) I shouldn't be referencing in my code, but there doesn't seem to be any other way to make the flat file connection work because it simply won't load column metadata from the file. Won't do it.

I found exactly one other sample of doing this (which was helpful), but that sample's method of reading the flat file's columns seemed less than optimal.

Following is a working sample. Sorry for formatting; I simply don't know any way to prevent the software from stripping out my formatting...

The data file looks like the following. Column names in first row.
"this","that"
"data","more data"
"other data","you get the point"

The SQL table is like yay:
CREATE TABLE [dbo].[bubba](
[this] [varchar](max) NULL,
[that] [varchar](max) NULL
) ON [PRIMARY]

The method below uses this helper class which makes use of the TextFieldParser class which seems very useful. I have no idea why it's in the Microsoft.VisualBasic.FileIO namespace, but whatever works:


class FlatFileColumnReader

{

    public List<string> Columns( string path, char delimiter, FieldType ft )

    {

        var tfp = new TextFieldParser( path )

        {

            TextFieldType = ft

        };

        tfp.Delimiters = new string[] {delimiter.ToString()};

        return tfp.ReadFields().ToList();

    }

}



Here is the (big fat, but working) method. Requires following usings. The referenced assemblies were found on my machine in C:\Program Files (x86)\Microsoft SQL Server\100\SDK\Assemblies. Note that they're in the x86 dir. Thus, my application is set to target x86.
using System.Data.Common;
using Microsoft.SqlServer.Dts.Runtime;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.VisualBasic.FileIO;


public Microsoft.SqlServer.Dts.Runtime.Package Generate( Microsoft.SqlServer.Dts.Runtime.Application app )

{

    /// Objects

    Microsoft.SqlServer.Dts.Runtime.Package _package;

    Executable _dataFlowTask;

    IDTSComponentMetaData100 _dataSource;

    IDTSComponentMetaData100 _dataDest;

    CManagedComponentWrapper _sourceInstance;

    CManagedComponentWrapper _destinationInstance;

    ConnectionManager _conMgrSource;

    ConnectionManager _conMgrDest;



    /// Create package and data flow task

    _package = new Microsoft.SqlServer.Dts.Runtime.Package();

    _package.DelayValidation = true;

    _dataFlowTask = _package.Executables.Add( "STOCK:PipelineTask" );  // PipelineTask is a DataFlowTask ??

    var pipe = (MainPipe)( (Microsoft.SqlServer.Dts.Runtime.TaskHost)_dataFlowTask ).InnerObject;

    pipe.Events = DtsConvert.GetExtendedInterface( new ComponentEvents() as IDTSComponentEvents );  // my ComponentEvents() just writes some stuff to debug for now



    /// Create connections

    _conMgrSource = _package.Connections.Add( "FLATFILE" );

    _conMgrSource.Properties["Format"].SetValue( _conMgrSource, "Delimited" );

    _conMgrSource.Properties["Name"].SetValue( _conMgrSource, "Flat File Connection" );

    _conMgrSource.Properties["ConnectionString"].SetValue( _conMgrSource, @"C:\temp\Eeemport\bubba.txt" );

    _conMgrSource.Properties["ColumnNamesInFirstDataRow"].SetValue( _conMgrSource, true );

    _conMgrSource.Properties["HeaderRowDelimiter"].SetValue( _conMgrSource, "\r\n" );

    /// If you set the delimiter like this, it'll look correct if you open the resulting package in the UI, but it won't execute (unless you click "Reset Columns")

    //_conMgrSource.Properties["RowDelimiter"].SetValue( _conMgrSource, "{CR}{LF}" );

    _conMgrSource.Properties["TextQualifier"].SetValue( _conMgrSource, "\"" );

    _conMgrSource.Properties["DataRowsToSkip"].SetValue( _conMgrSource, 0 );



    _conMgrDest = _package.Connections.Add( "OLEDB" );

    // This provider wouldn't work

    //_conMgrDest.ConnectionString = @"Provider=Native OLE DB\SQL Server Native Client 10.0;Data Source=.\SQLEXPRESS;Initial Catalog=FASClient;Integrated Security=True";

    _conMgrDest.ConnectionString = @"Provider=SQLOLEDB.1;Integrated Security=SSPI;Persist Security Info=False;Initial Catalog=FASClient;Data Source=.\SQLEXPRESS";

    _conMgrDest.Name = "OLE DB Connection";

    _conMgrDest.Description = "OLE DB Connection";

    _conMgrDest.Properties["RetainSameConnection"].SetValue( _conMgrDest, true );



    /// Create the columns in the flat file connection

    var flatFileConnection = _conMgrSource.InnerObject as IDTSConnectionManagerFlatFile100;

    var fileColumns = new FlatFileColumnReader().Columns( @"C:\temp\Eeemport\bubba.txt", ',', FieldType.Delimited );

    for ( int i = 0; i < fileColumns.Count; i++ )

    {

        /// This object (IDTSConnectionManagerFlatFileColumn100) is not supposed to be referenced by my code according to doc:

        /// http://msdn.microsoft.com/en-us/library/microsoft.sqlserver.dts.runtime.wrapper.idtsconnectionmanagerflatfilecolumn100.aspx

        var column = flatFileConnection.Columns.Add();

        /// Last column delimiter must be newline.

        /// If you select "," for the column delimiter in the designer for a Flat File Connection, and the row delimiter is newline, it does this same thing...

        column.ColumnDelimiter = ( i == fileColumns.Count - 1 ) ? "\r\n" : ",";

        column.TextQualified = true;

        column.ColumnType = "Delimited";

        /// Here's one benefit of creating my own columns:

        /// My destination column in Sql Server is varchar.  The columns seem to be defaulted to DT_WSTR which won't go into a varchar column w/o being

        /// manually changed or run through a data converter component.

        column.DataType = DataType.DT_TEXT;

        column.DataPrecision = 0;

        column.DataScale = 0;

        ( (IDTSName100)column ).Name = fileColumns[i];

    }



    /// Create Data Flow Components

    _dataSource = pipe.ComponentMetaDataCollection.New();

    _dataSource.Name = "Flat File Source";

    _dataSource.ComponentClassID = app.PipelineComponentInfos["Flat File Source"].CreationName;

    _dataSource.ValidateExternalMetadata = false;



    _dataDest = pipe.ComponentMetaDataCollection.New();

    _dataDest.Name = "Sql Server Destination";

    _dataDest.ComponentClassID = app.PipelineComponentInfos["SQL Server Destination"].CreationName;



    ///// Create design instances

    _sourceInstance = _dataSource.Instantiate();

    _sourceInstance.ProvideComponentProperties();



    /// I think this junk must come after ProvideComponentProperties() above

    _dataSource.RuntimeConnectionCollection[0].ConnectionManager = DtsConvert.GetExtendedInterface( _conMgrSource );

    _dataSource.RuntimeConnectionCollection[0].ConnectionManagerID = _conMgrSource.ID;



    _sourceInstance.AcquireConnections( null );  // do we need to do this since we created our own columns?

    _sourceInstance.ReinitializeMetaData();

    _sourceInstance.ReleaseConnections();



    _destinationInstance = _dataDest.Instantiate();

    _destinationInstance.ProvideComponentProperties();



    /// I know SetComponentProperty can only be called after ProvideComponentProperties()

    /// To see available component properties, open an existing package (the XML) with an existing component of that type

    _destinationInstance.SetComponentProperty( "BulkInsertTableName", "[dbo].[bubba]" );

    _dataDest.RuntimeConnectionCollection[0].ConnectionManager = DtsConvert.GetExtendedInterface( _conMgrDest );

    _dataDest.RuntimeConnectionCollection[0].ConnectionManagerID = _conMgrDest.ID;

    _destinationInstance.AcquireConnections( null );

    _destinationInstance.ReinitializeMetaData();

    _destinationInstance.ReleaseConnections();



    //// Hook the path from source to dest

    var path = pipe.PathCollection.New();

    path.AttachPathAndPropagateNotifications( _dataSource.OutputCollection[0], _dataDest.InputCollection[0] );



    /// Do stuff with the virtual input (whatever the @#$% that is)

    var virtualInput = _dataDest.InputCollection[0].GetVirtualInput();

    foreach ( IDTSVirtualInputColumn100 column in virtualInput.VirtualInputColumnCollection )

    {

        _destinationInstance.SetUsageType( _dataDest.InputCollection[0].ID, virtualInput, column.LineageID, DTSUsageType.UT_READONLY );

    }



    /// MapColumns();

    foreach ( IDTSInputColumn100 inputColumn in _dataDest.InputCollection[0].InputColumnCollection )

    {

        var outputColumn = _dataDest.InputCollection[0].ExternalMetadataColumnCollection[inputColumn.Name];

        outputColumn.Name = inputColumn.Name;

        _destinationInstance.MapInputColumn( _dataDest.InputCollection[0].ID, inputColumn.ID, outputColumn.ID );

    }



    //_package.Validate( _package.Connections, null, null, null );



    return _package;

}

Friday, May 06, 2011

File or Folder Delete Utility

I originally put this utility together for one very specific purpose: to delete all files in a given folder whose name started with a number (cleaning up after other processes). It has since been used for other purposes, so I made it easier to extend by using the "pipeline" pattern just like is commonly done with data filtering. File or folder names from the requested directory are passed through filters (Filters.cs). Those meeting filter requirements are processed (either logged or deleted).

The currently implemented filters are:
  • Date (always taken into account)
  • Starts with numeric
  • Name is a guid
It will target either files or folders. Launch the utility with no parameters to see instructions.

To extend with another filter:
  • Add methods to Filters.cs following same pattern as existing (returns IEnumerable of FileInfo for files; IEnumerable of string for folders).
  • Add parameter metadata for the new filter parameter in AppParams.cs.PARAM_STRINGS
  • Add a "mode" property for the new filter in AppParams.cs (Like existing NumericMode).
  • Add a method to read the new param from the command line and set the new property you created above (like existing SetNumericMode).
  • Update the help text that prints when no params or bad params are passed to the utility in Program.cs

Test before using, of course, but I hope it's of use to somebody. If you just want the utility, get it here; if you want the code, it's here.

Wednesday, December 22, 2010

C# Decimal to English Money Converter



I needed a way to convert a money value (decimal) into the kind of English text that appears on a legal document (like a check or a contract), e.g., 1245.36 becomes "One Thousand Two Hundred Forty Five and 36/100 Dollars."
After searching for awhile, I couldn't find anything. Maybe somebody else will make use of this (took a bit longer than I estimated). Has a limitation on millions (I'm not working on a government contract), but this can easily be changed. Requires .Net 4 (uses Tuples).

Sorry for poor code formatting.

First the decimal extension class that contains a method used by the actual converter class and also has a method to call the converter itself:


public static class DecimalExtension
{

/// <summary>
/// Evaluates just the portion of the value to the right of the decimal place and returns it as a 2 character string.
/// Returns 00 if the value is a whole number.
/// </summary>
/// <returns>The value to the right of the decimal place. Returns 0 if the value is a whole number.</returns>
public static string GetDecimalNumbers( this decimal number )
{
int divint = Convert.ToInt32( Decimal.Floor( number ) );
decimal decValue = number - divint;

var result = decValue.ToString();

if ( result.Length > 1 )
{
result = result.Substring( 2 );
if ( result.Length > 2 )
{
result = result.Substring( 0, 2 );
}
else if ( result.Length < 2 )
{
result += "0";
}
}
else
{
result = "00";
}

return result;
}

public static string ToEnglishMoney( this decimal d )
{
return new DecimalToEnglishMoney( d ).ToString();
}

}

Now for the actual converter class:


public class DecimalToEnglishMoney
{

private enum DigitPlace
{
Ones = 0,
Tens = 1,
Hundreds = 2
}

private string _result;

public DecimalToEnglishMoney( decimal d )
{
_result = FormatWholePortion( d );
_result += FormatDecimalPortion( d );
}

public override string ToString()
{
return _result;
}

private string FormatDecimalPortion( decimal d )
{
var result = string.Empty;
var decimalPortion = d.GetDecimalNumbers();
result += string.Format( " and {0}/100 Dollars", decimalPortion );
return result;
}

private string FormatWholePortion( decimal d )
{
var result = string.Empty;
var wholePortion = (int)Math.Floor( (double)d );
var groups = FormatToNumberGroups( wholePortion );
groups.ForEach( g =>
{
result += (result == string.Empty) ? string.Empty : " ";
var formattedChunk = FormatChunkToEnglish( g.Item2 );
var amountDescriptor = (g.Item1 == string.Empty) ? string.Empty : " " + g.Item1; // "Thousand"
result += formattedChunk;
result += (amountDescriptor == string.Empty) ? string.Empty : amountDescriptor;
} );
return result;
}

/// <summary>
/// Returns the chunk name (blank, Thousand, Million) and the chunk number
/// </summary>
/// <param name="i"></param>
/// <returns>"Million" "NNN"</returns>
private List<Tuple<string, string>> FormatToNumberGroups( int i )
{
var result = new List<Tuple<string, string>>();
var asString = i.ToString();
var count = 1;
while ( asString.Length > 0 )
{
var chunkStartPos = (asString.Length < 3) ? 0 : asString.Length - 3;
var chunkLength = (asString.Length < 3) ? asString.Length : 3;
var chunk = asString.Substring( chunkStartPos, chunkLength );
switch ( count )
{
case 2:
result.Add( new Tuple<string, string>( "Thousand", chunk ) );
break;
case 3:
result.Add( new Tuple<string, string>( "Million", chunk ) );
break;
default:
/// First chunk is blank and we're not expecting anything > than million
result.Add( new Tuple<string, string>( string.Empty, chunk ) );
break;
}
count++;
asString = (asString.Length > 3) ? asString.Substring( 0, asString.Length - 3 ) : string.Empty;
}
result.Reverse();
return result;
}

public string FormatChunkToEnglish( string chunk )
{
Debug.Assert( (chunk.Length <= 3) && (chunk.Length >= 1), "Expecting 1-3 digit portion of a number to format to english." );

var onesDigit = OnesDigit( chunk );
var tensDigit = TensDigit( chunk );
var hundredsDigit = HundredsDigit( chunk );

var onesDigitFormatted = FormatDigit( onesDigit, DigitPlace.Ones, chunk );
var tensDigitFormatted = (tensDigit == string.Empty) ? string.Empty : FormatDigit( tensDigit, DigitPlace.Tens, chunk );
var hundredsDigitFormatted = (hundredsDigit == string.Empty) ? string.Empty : FormatDigit( hundredsDigit, DigitPlace.Hundreds, chunk );

var result = onesDigitFormatted;
if ( tensDigitFormatted != string.Empty )
{
result = ( result == string.Empty ) ? tensDigitFormatted : string.Format( "{0} {1}", tensDigitFormatted, result );
}
if ( hundredsDigitFormatted != string.Empty )
{
result = string.Format( "{0} {1}", hundredsDigitFormatted, result );
}

return result;
}

#region These all deal with the 1-3 digit chunks

private string HundredsDigit( string chunk )
{
return (chunk.Length > 2) ? chunk.Substring( chunk.Length - 3, 1 ) : string.Empty;
}

private string TensDigit( string chunk )
{
return (chunk.Length > 1) ? chunk.Substring( chunk.Length - 2, 1 ) : string.Empty;
}

private string OnesDigit( string chunk )
{
return chunk.Substring( chunk.Length - 1 );
}

private bool ContainsTeen( string allDigits )
{
return TensDigit( allDigits ) == "1";
}

#endregion

private string FormatDigit( string digit, DigitPlace digitPlace, string allDigits )
{
/// Param must be string due to the way it's constructed
Debug.Assert( digit.Length == 1, "Expecting to format single digit while converting number to english, but received multiple digits" );

switch ( digit )
{
case "0":
return string.Empty;
case "1":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "One";
case DigitPlace.Tens:
return FormatTeen(allDigits);
case DigitPlace.Hundreds:
return "One Hundred";
}
break;
case "2":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Two";
case DigitPlace.Tens:
return "Twenty";
case DigitPlace.Hundreds:
return "Two Hundred";
}
break;
case "3":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Three";
case DigitPlace.Tens:
return "Thirty";
case DigitPlace.Hundreds:
return "Three Hundred";
}
break;
case "4":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Four";
case DigitPlace.Tens:
return "Forty";
case DigitPlace.Hundreds:
return "Four Hundred";
}
break;
case "5":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Five";
case DigitPlace.Tens:
return "Fifty";
case DigitPlace.Hundreds:
return "Five Hundred";
}
break;
case "6":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Six";
case DigitPlace.Tens:
return "Sixty";
case DigitPlace.Hundreds:
return "Six Hundred";
}
break;
case "7":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Seven";
case DigitPlace.Tens:
return "Seventy";
case DigitPlace.Hundreds:
return "Seven Hundred";
}
break;
case "8":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Eight";
case DigitPlace.Tens:
return "Eighty";
case DigitPlace.Hundreds:
return "Eight Hundred";
}
break;
case "9":
switch ( digitPlace )
{
case DigitPlace.Ones:
return ContainsTeen(allDigits) ? string.Empty : "Nine";
case DigitPlace.Tens:
return "Ninety";
case DigitPlace.Hundreds:
return "Nine Hundred";
}
break;
default:
return string.Empty;
}
return string.Empty; // not a good sign when you have to add code just to make it compile
}

/// <summary>
/// </summary>
/// <param name="allDigits">Either 2 or 3 characters long and the tens digit is a one</param>
/// <returns></returns>
private string FormatTeen( string allDigits )
{
Debug.Assert( (allDigits.Length == 2) || (allDigits.Length == 3) );
switch ( OnesDigit(allDigits) )
{
case "0":
return "Ten";
case "1":
return "Eleven";
case "2":
return "Twelve";
case "3":
return "Thirteen";
case "4":
return "Fourteen";
case "5":
return "Fifteen";
case "6":
return "Sixteen";
case "7":
return "Seventeen";
case "8":
return "Eighteen";
case "9":
return "Nineteen";
default:
return string.Empty;
}
}

}