/*------------------------------------------------------------------------ File : csvImport.p Purpose : Provides functionality to create subjects out of CSV files Should be added to the super-procedure stack of the AppServer CSVToTT can then be used to Author(s) : Nick Finch Created : 15/10/09 Notes : ---------------------------------------------------------------------- Copyright 2009 DataPA Limited and other contributors as listed below. All Rights Reserved. ---------------------------------------------------------------------- Amendments ========== Date Author Description ======== ============== ============================================== 26/04/13 Michael Hambley Minor changes to stop missing a line if autogenerating Column Type, and handle empty fields ------------------------------------------------------------------------ */ /* ************************ Function Prototypes ********************** */ FUNCTION CSVToTT RETURNS HANDLE (INPUT filename AS CHARACTER, INPUT autoGenerateTypes AS LOGICAL, INPUT firstColumnContainsHeadings AS LOGICAL, INPUT ColumnHeadings AS CHARACTER, INPUT ColumnTypes AS CHARACTER, INPUT DateFormat AS CHARACTER) FORWARD. FUNCTION GetColumnDataType RETURNS CHARACTER ( INPUT columnVal AS CHARACTER ) FORWARD. FUNCTION GetColumnTypes RETURNS CHARACTER ( INPUT dataline AS CHARACTER ) FORWARD. FUNCTION getValidFieldName RETURNS CHARACTER ( INPUT ipcFieldName AS CHARACTER ) FORWARD. FUNCTION GetValidFieldNames RETURNS CHARACTER ( INPUT LabelList AS CHARACTER ) FORWARD. FUNCTION Tokenise RETURNS CHARACTER ( INPUT iopcstring AS CHARACTER) FORWARD. /* *************************** Main Block *************************** */ /* ************************ Function Implementations ***************** */ FUNCTION CSVToTT RETURNS HANDLE (INPUT filename AS CHARACTER, INPUT autoGenerateTypes AS LOGICAL, INPUT firstColumnContainsHeadings AS LOGICAL, INPUT ColumnHeadings AS CHARACTER, INPUT ColumnTypes AS CHARACTER, INPUT DateFormat AS CHARACTER) : /*------------------------------------------------------------------------------ Purpose: Imports a CSV file into a Dynamic Temp Table Notes: filename is the name of the CSV file If autoGenerateTypes is true, ------------------------------------------------------------------------------*/ DEFINE VARIABLE FieldNames AS CHARACTER NO-UNDO. DEFINE VARIABLE cDataline AS CHARACTER NO-UNDO. DEFINE VARIABLE hTempTable AS HANDLE NO-UNDO. DEFINE VARIABLE iNum AS INTEGER NO-UNDO. DEFINE VARIABLE cFormats AS CHARACTER NO-UNDO. DEFINE VARIABLE hTempTableBuffer AS HANDLE NO-UNDO. DEFINE VARIABLE hFieldBuffer AS HANDLE NO-UNDO. DEFINE VARIABLE cOldDateFormat AS CHARACTER NO-UNDO. DEFINE VARIABLE iNumFields AS INTEGER NO-UNDO. /* Check the csv file exists */ IF SEARCH(filename) = ? THEN RETURN ?. /* If a date format has been specified, make sure the session date format is set appropriately */ IF (DateFormat = "dmy" OR DateFormat = "mdy") AND DateFormat <> SESSION:DATE-FORMAT THEN DO: /* Store the old date format so we can set it back */ cOldDateFormat = SESSION:DATE-FORMAT. SESSION:DATE-FORMAT = DateFormat. END. /* Open the file for input */ INPUT FROM VALUE(filename) NO-ECHO. /* Read the first line */ IMPORT UNFORMATTED cDataline. cDataline = Tokenise(cDataline). /* If first line contains headings, assign them and read the next line */ IF firstColumnContainsHeadings THEN DO: ColumnHeadings = cDataline. IMPORT UNFORMATTED cDataline. cDataline = Tokenise(cDataline). END. /* Otherwise, prepare the list the user supplied */ ELSE ColumnHeadings = REPLACE(ColumnHeadings,",",CHR(1)). /* Store the number of fileds to save doing this repeatedly */ iNumFields = NUM-ENTRIES(ColumnHeadings,CHR(1)). /* Check the first line contains the same number of items as the heading */ IF iNumFields <> NUM-ENTRIES(cDataline,CHR(1)) THEN DO: MESSAGE "ERROR IMPORTING CSV: " + STRING(iNumFields) + " Column Headings, " + STRING(NUM-ENTRIES(cDataline,CHR(1))) + " data entries". /* Reset the old date format if we need to */ IF cOldDatEFORMAT <> ? THEN SESSION:DATE-FORMAT = cOldDateFormat. RETURN ?. END. /* If autoGenerateTypes, generate the headings and data types */ IF autoGenerateTypes THEN DO: ColumnTypes = GetColumnTypes(cDataline). END. /* Not autoGenerateTypes, so just prepare the lists we've been given */ ELSE ColumnTypes = REPLACE(ColumnTypes,",",CHR(1)). /* Check we have the correct number of column types */ IF NUM-ENTRIES(ColumnTypes,CHR(1)) <> iNumFields THEN DO: MESSAGE "ERROR IMPORTING CSV: " + STRING(iNumFields) + " Column Headings, " + STRING(NUM-ENTRIES(cDataline,CHR(1))) + " data entries". /* Reset the old date format if we need to */ IF cOldDatEFORMAT <> ? THEN SESSION:DATE-FORMAT = cOldDateFormat. RETURN ?. END. /* Build a valid list of field names from the column headings */ FieldNames = GetValidFieldNames(ColumnHeadings). /* Create a temp table using the temp table handle. */ CREATE TEMP-TABLE hTempTable. /* Now add each field to the temp table */ REPEAT iNum = 1 TO iNumFields ON ERROR UNDO, LEAVE: CASE ENTRY(iNum,ColumnTypes,CHR(1)): WHEN "DECIMAL" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "DECIMAL", 0, ">>,>>>,>>9.99", 0, ENTRY(iNum,ColumnHeadings,CHR(1))). WHEN "INTEGER" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "INTEGER", 0, ">>,>>>,>>9", 0, ENTRY(iNum,ColumnHeadings,CHR(1))). WHEN "DATE" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "DATE", 0, "99/99/9999", ?, ENTRY(iNum,ColumnHeadings,CHR(1))). WHEN "DATETIME-TZ" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "DATETIME-TZ", 0, "99/99/9999 HH:MM:SS.SSS+HH:MM", ?, ENTRY(iNum,ColumnHeadings,CHR(1))). WHEN "DATETIME" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "DATETIME", 0, "99/99/9999 HH:MM:SS.SSS", ?, ENTRY(iNum,ColumnHeadings,CHR(1))). WHEN "LOGICAL" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "LOGICAL", 0, "yes/no", ?, ENTRY(iNum,ColumnHeadings,CHR(1))). WHEN "CHARACTER" THEN hTempTable:ADD-NEW-FIELD(ENTRY(iNum,FieldNames,CHR(1)), "CHARACTER" , 0, "X(200)", ?, ENTRY(iNum,ColumnHeadings,CHR(1))). END CASE. END. /* No prepare the temp table */ hTempTable:TEMP-TABLE-PREPARE("CSVImport"). /* capture a buffer handle to it. */ hTempTableBuffer = hTempTable:DEFAULT-BUFFER-HANDLE. /* Now, read all the data and populate the temp table */ OuterRepeat: REPEAT: /* Check we have the correct number of data items before processing this row */ IF NUM-ENTRIES(cDataline,CHR(1)) <> iNumFields THEN MESSAGE "ERROR IMPORTING CSV: " + STRING(iNumFields) + " Fields, " + STRING(NUM-ENTRIES(cDataline,CHR(1))) + " data entries". ELSE DO TRANSACTION: hTempTableBuffer:BUFFER-CREATE(). REPEAT iNum = 1 TO iNumFields ON ERROR UNDO, LEAVE OuterRepeat: ASSIGN hFieldBuffer = hTempTableBuffer:BUFFER-FIELD(iNum). ASSIGN hFieldBuffer:BUFFER-VALUE = ENTRY(iNum, cDataline, CHR(1)) NO-ERROR. IF ERROR-STATUS:ERROR THEN MESSAGE "UNABLE TO LOAD LINE """ + cDataline + """ FROM CSV. ERROR: " + ERROR-STATUS:GET-MESSAGE(1). END. END. IMPORT UNFORMATTED cDataline. cDataline = Tokenise(cDataline). END. /* Reset the old date format if we need to */ IF cOldDatEFORMAT <> ? AND cOldDatEFORMAT <> "" THEN SESSION:DATE-FORMAT = cOldDateFormat. RETURN hTempTable. /* Function return value. */ END FUNCTION. FUNCTION GetColumnDataType RETURNS CHARACTER ( INPUT columnVal AS CHARACTER ) : /*------------------------------------------------------------------------------ Purpose: Notes: ------------------------------------------------------------------------------*/ DEFINE VARIABLE iNum AS DECIMAL NO-UNDO INITIAL ?. DEFINE VARIABLE dte AS DATE NO-UNDO INITIAL ?. iNum = DECIMAL(columnVal) NO-ERROR. IF iNum <> ? THEN DO: IF NUM-ENTRIES(columnVal,SESSION:NUMERIC-DECIMAL-POINT) = 2 THEN RETURN "DECIMAL". ELSE RETURN "INTEGER". END. dte = DATE(columnVal) NO-ERROR. IF dte <> ? THEN DO: IF LOOKUP(" ", columnVal) = 0 THEN RETURN "DATE". IF LOOKUP("+", columnVal) <> 0 OR LOOKUP("-", columnVal) <> 0 THEN RETURN "DATETIME-TZ". RETURN "DATETIME". END. IF CAN-DO("yes,no,true,false",columnVal) THEN RETURN "LOGICAL". RETURN "CHARACTER". /* Function return value. */ END FUNCTION. FUNCTION GetColumnTypes RETURNS CHARACTER ( INPUT dataline AS CHARACTER ) : /*------------------------------------------------------------------------------ Purpose: Notes: ------------------------------------------------------------------------------*/ DEFINE VARIABLE iNum AS INTEGER NO-UNDO. DEFINE VARIABLE cDataTypes AS CHARACTER NO-UNDO. REPEAT WITH iNum = 1 TO NUM-ENTRIES(dataline,CHR(1)): IF iNum > 1 THEN cDataTypes = cDataTypes + CHR(1). cDataTypes = cDataTypes + GetColumnDataType(ENTRY(iNum,dataline,CHR(1))). END. /* REPEAT */ RETURN cDataTypes. /* Function return value. */ END FUNCTION. FUNCTION getValidFieldName RETURNS CHARACTER ( INPUT ipcFieldName AS CHARACTER ) : /*------------------------------------------------------------------------------ Purpose: Notes: ------------------------------------------------------------------------------*/ DEFINE VARIABLE outputName AS CHARACTER NO-UNDO. DEFINE VARIABLE iNum AS INTEGER NO-UNDO. DEFINE VARIABLE cThisChar AS CHARACTER NO-UNDO. /* If blank, return arbitary field name */ IF ipcFieldName = "" THEN RETURN "unnamedField". /* Remove invalid characters */ REPEAT WITH iNum = 1 TO LENGTH(ipcFieldName): cThisChar = SUBSTRING(ipcFieldName,iNum,1). IF NOT ((ASC(cThisChar) >= 48 AND ASC(cThisChar) <= 57) OR (ASC(cThisChar) >= 65 AND ASC(cThisChar) <= 90) OR (ASC(cThisChar) >= 97 AND ASC(cThisChar) <= 122)) THEN cThisChar = "_". outputName = outputName + cThisChar. END. /* Make sure it begins with a letter */ cThisChar = SUBSTRING(outputName,1,1). IF NOT ((ASC(cThisChar) >= 65 AND ASC(cThisChar) <= 90) OR (ASC(cThisChar) >= 97 AND ASC(cThisChar) <= 122)) THEN outputName = "abc" + outputName. /* Make sure it's not a reserved word */ IF KEYWORD-ALL(outputName) <> ? THEN outputName = "abc" + outputName. RETURN outputName. /* Function return value. */ END FUNCTION. FUNCTION GetValidFieldNames RETURNS CHARACTER ( INPUT LabelList AS CHARACTER ) : /*------------------------------------------------------------------------------ Purpose: Notes: ------------------------------------------------------------------------------*/ DEFINE VARIABLE iNum AS INTEGER NO-UNDO. DEFINE VARIABLE iUniqueNum AS INTEGER NO-UNDO. DEFINE VARIABLE cThisField AS CHARACTER NO-UNDO. DEFINE VARIABLE cFieldList AS CHARACTER NO-UNDO. /* Builds a valid list of field names from the label list */ REPEAT WITH iNum = 1 TO NUM-ENTRIES(LabelList, CHR(1)): /* get a valid field name */ cThisField = getValidFieldName(ENTRY(iNum,LabelList,CHR(1))). /* ensure it's unique */ IF LOOKUP(cThisField, cFieldList , CHR(1)) <> 0 THEN DO: iUniqueNum = 1. REPEAT WHILE LOOKUP(cThisField + STRING(iUniqueNum), cFieldList , CHR(1)) <> 0: iUniqueNum = iUniqueNum + 1. END. cThisField = cThisField + STRING(iUniqueNum). END. /* Add it to the list */ IF cFieldList <> "" THEN cFieldList = cFieldList + CHR(1). cFieldList = cFieldList + cThisField. END. RETURN cFieldList. /* Function return value. */ END FUNCTION. FUNCTION Tokenise RETURNS CHARACTER ( INPUT iopcstring AS CHARACTER) : /*------------------------------------------------------------------------------ Purpose: Notes: ------------------------------------------------------------------------------*/ DEFINE VARIABLE cTemp AS CHARACTER NO-UNDO. DEFINE VARIABLE cchr AS CHARACTER NO-UNDO. DEFINE VARIABLE cChar AS CHARACTER NO-UNDO. DEFINE VARIABLE lLastDelim AS LOGICAL NO-UNDO. DEFINE VARIABLE iCnt AS INTEGER NO-UNDO. DEFINE VARIABLE lInQuote AS LOGICAL NO-UNDO. DEFINE VARIABLE lInQuote1 AS LOGICAL NO-UNDO. DEFINE VARIABLE lLiteral AS LOGICAL NO-UNDO. cTemp = "". lInQuote = FALSE. DO iCnt = 1 TO LENGTH(iopcstring): cChar = SUBSTRING(iopcstring,icnt,1). IF cChar = CHR(34) THEN DO: IF NOT lInQuote1 AND NOT (lInQuote AND lLiteral) THEN DO: lInQuote = NOT lInQuote. NEXT. END. END. IF cChar = "'" THEN DO: IF NOT lInQuote AND NOT (lInQuote1 AND lLiteral) THEN DO: lInQuote1 = NOT lInQuote1. NEXT. END. END. IF NOT lInQuote AND NOT lInQuote1 AND cChar = "," THEN DO: /* IF lLastDelim THEN NEXT.*/ cTemp = cTemp + CHR(1). lLastDelim = TRUE. END. ELSE DO: cTemp = cTemp + cChar. lLastDelim = FALSE. END. lLiteral = (cChar = CHR(126)). END. RETURN cTemp. END FUNCTION.