The next thing you need in order to start creating data mining models with DMX in SSMS besides a data source is a data source view. For a UDM cube, it can be very complicated. For data mining models you usually need a singled table or two related tables only. I will create two versions of the procedure, i.e. two overloaded methods in a class. One will create a data source view with a single table, and the other one a data source view with two related tables, with relation limited to a single column only. As I mentioned, this should be enough for most of the cases; anyway, if you need more complex data source view, you can always expand existing methods or add another one.

I will use similar technique as I used for the procedure that creates a data source, which is described in my previous blog. You will have to be in the context of another AS database when executing the procedure. Besides that, the procedure will check whether the data source referred exists.

Data source view is presented in relational way, as collection of related tables. You create the schema very simply – you are actually creating an ADO.NET DataSet. Data source view inherits the schema from the DataSet. So, I will use DataAdapter to fill the schema of the DataTables in the DataSet, and then add a DataRelation to the Relations collection of the DataSet. The code:

 

using System;

using System.Collections.Generic;

using System.Text;

using System.Data;

using System.Data.OleDb;

using AMO = Microsoft.AnalysisServices;

using ADOMD = Microsoft.AnalysisServices.AdomdServer;

 

namespace ServerProcs

{

    public class ASProcs

    {

 

        // variant with a single table

        public static void CreateDataSourceView(string asDbName, string inpRds, string dsvName, string ownName1, string tblName1)

            // Call in DMX:

            // Call ServerProcs.CreateDataSourceView(ASDatabaseName, Existing_DataSource, New_DataSourceView,

            // Table1_Schema, Table1_Name)

            // Example:

            // Call ServerProcs.CreateDataSourceView(‘TestDb’, ‘pubs’, ‘pubs’,

            // ‘dbo’, ‘sales’)

 

        {

            AMO.Server currSvr = new AMO.Server();

            currSvr.CaptureXml = false;

            currSvr.Connect(“DataSource=” + ADOMD.Context.CurrentServerID + “;”);

            if (asDbName == ADOMD.Context.CurrentDatabaseName)

            {

                throw new ArgumentException(” Can’t update current database! “);

            }

            // Data source must exist

            AMO.Database targetDb = currSvr.Databases.GetByName(asDbName);

            if (!targetDb.DataSources.Contains(inpRds))

            {

                throw new ArgumentException(” Create Data Source first! “);

            }

            AMO.RelationalDataSource existingRds = (AMO.RelationalDataSource)targetDb.DataSources.GetByName(inpRds);

            // New data source view

            AMO.DataSourceView newDsv = new AMO.DataSourceView(dsvName, dsvName);

            if (targetDb.DataSourceViews.Contains(dsvName))

            {

                targetDb.DataSourceViews.Remove(dsvName);

            }

            // Schema is created with ADO.NET DataSet

            OleDbDataAdapter relAdapter = new OleDbDataAdapter(“”, existingRds.ConnectionString);

            DataSet myDataSet = new DataSet();

            relAdapter.SelectCommand.CommandText = “SELECT * FROM ” + ownName1 + “.” + tblName1;

            relAdapter.FillSchema(myDataSet, SchemaType.Mapped, tblName1);

            // Schema is then mapped to the data source view

            targetDb.DataSourceViews.Add(newDsv.ID);

            targetDb.DataSourceViews[dsvName].DataSourceID = existingRds.ID;

            targetDb.DataSourceViews[dsvName].Schema = myDataSet;

            // Update the database

            targetDb.Update(AMO.UpdateOptions.ExpandFull);

        }

 

        // overloaded variant with two tables connected with a foreign key on a single column

        public static void CreateDataSourceView(string asDbName, string inpRds, string dsvName,

            string ownName1, string tblName1, string ownName2, string tblName2, string colName)

        // Call in DMX:

        // Call ServerProcs.CreateDataSourceView(ASDatabaseName, Existing_DataSource, New_DataSourceView,

        // Table1_Schema, Table1_Name, Table2_Schema, Table2_Name, FK_Column)

        // Table1 is the parent (one side) table, Table2 is the child (many side) table

        // Example:

        // Call ServerProcs.CreateDataSourceView(‘TestDb’, ‘Northwind’, ‘Northwind’,

        // ‘dbo’, ‘Customers’, ‘dbo’, ‘Orders’, ‘CustomerID’)

        {

            AMO.Server currSvr = new AMO.Server();

            currSvr.CaptureXml = false;

            currSvr.Connect(“DataSource=” + ADOMD.Context.CurrentServerID + “;”);

            if (asDbName == ADOMD.Context.CurrentDatabaseName)

            {

                throw new ArgumentException(” Can’t update current database! “);

            }

            AMO.Database targetDb = currSvr.Databases.GetByName(asDbName);

            if (!targetDb.DataSources.Contains(inpRds))

            {

                throw new ArgumentException(” Create Data Source first! “);

            }

            AMO.RelationalDataSource existingRds = (AMO.RelationalDataSource)targetDb.DataSources.GetByName(inpRds);

            AMO.DataSourceView newDsv = new AMO.DataSourceView(dsvName, dsvName);

            if (targetDb.DataSourceViews.Contains(dsvName))

            {

                targetDb.DataSourceViews.Remove(dsvName);

            }

            OleDbDataAdapter relAdapter = new OleDbDataAdapter(“”, existingRds.ConnectionString);

            DataSet myDataSet = new DataSet();

            relAdapter.SelectCommand.CommandText = “SELECT * FROM ” + ownName1 + “.” + tblName1;

            relAdapter.FillSchema(myDataSet, SchemaType.Mapped, tblName1);

            relAdapter.SelectCommand.CommandText = “SELECT * FROM ” + ownName2 + “.” + tblName2;

            relAdapter.FillSchema(myDataSet, SchemaType.Mapped, tblName2);

            myDataSet.Relations.Add(myDataSet.Tables[tblName1].Columns[colName],

                myDataSet.Tables[tblName2].Columns[colName]);

            targetDb.DataSourceViews.Add(newDsv.ID);

            targetDb.DataSourceViews[dsvName].DataSourceID = existingRds.ID;

            targetDb.DataSourceViews[dsvName].Schema = myDataSet;

            targetDb.Update(AMO.UpdateOptions.ExpandFull);

        }

 

    }

}

 

Now it is very easy to create the data source view. here is the DMX code for both cases:

 

— Create a data source view 

Call ServerProcs.CreateDataSourceView(‘TestDb’,

     ‘AdventureWorksDW’, ‘AdventureWorksDW’,

     ‘dbo’, ‘vTargetMail’);

    

— Another data source view, this time with two tables

Call ServerProcs.CreateDataSourceView(‘TestDb’,

     ‘AdventureWorksDW’, ‘AdventureWorksDW1’,

     ‘dbo’, ‘vAssocSeqOrders’,

     ‘dbo’, ‘vAssocSeqLineItems’,

     ‘OrderNumber’);    

 

Finally, you have use the context of your target database by selecting it from the drop-down list in SSMS, and you can start creating data mining models:

 

— Create a model with structure

CREATE MINING MODEL [NBSample]

(

    CustomerKey LONG KEY,

    Gender TEXT DISCRETE,

    [Number Cars Owned] LONG DISCRETE,

    [Bike Buyer] LONG DISCRETE PREDICT

)

USING Microsoft_Naive_Bayes;

 

— Process the structure

INSERT INTO [NBSample_Structure]

 (CustomerKey, Gender, [Number Cars Owned],[Bike Buyer])

OPENQUERY([AdventureWorksDW],

  ‘SELECT CustomerKey, Gender, [NumberCarsOwned], [BikeBuyer]

     FROM [vTargetMail]’);

 

— Browse the model

SELECT *

  FROM [NBSample].CONTENT;

 

— Prediction Join

— Singleton query

SELECT [NBSample].[Bike Buyer],

        PredictHistogram([Bike Buyer])

  FROM [NBSample] NATURAL PREDICTION JOIN

       (SELECT ‘M’ AS [Gender],

               0 AS [Number Cars Owned]) AS t;

 

— Openquery

SELECT t.[LastName],

       t.[FirstName],

       [NBSample].[Bike Buyer],

       PredictProbability([Bike Buyer]) AS NBPredictProbability,

       t.[Gender],

       t.[NumberCarsOwned]

  FROM [NBSample] PREDICTION JOIN

       OPENQUERY([AdventureWorksDW],

        ‘SELECT [LastName],

                [FirstName],

                [Gender],

                [NumberCarsOwned]

           FROM [dbo].[vTargetMail]’) AS t

        ON [NBSample].[Gender] = t.[Gender] AND

           [NBSample].[Number Cars Owned] = t.[NumberCarsOwned];

 

That’s it – we are done. You can browse your model from SSMS with data mining viewers and you will see the model is perfect, just like it would be created with BIDS.

 

Dejan Sarka