Type tFolderAndFile(strFolder As String, _
strFile As String)
Sub Class_Globals
Private mtFF As tFolderAndFile
Private bFinalLineDone As Boolean
Private mstrCharSet As String
Private mbtEndOfLineByte As Byte
Private mbtEncloserByte As Byte
Private mstrSeparatorChar As String
Private mstrEncloser As String
Private bFirstArrayDone As Boolean
Private lFilePos As Long
Private lArrayPos As Long
Private bNoEndOfLineFound As Boolean
Private bFinalArrayDone As Boolean
Private mlMaxBytes As Long
Private RAF As RandomAccessFile
Private lTotalBytes As Long
Private iBytes As Int
Private arrBytes() As Byte
Private strLine As String
Private miCols As Int
End Sub
Public Sub Initialize(tFF As tFolderAndFile, strCharset As String, btEndOfLineByte As Byte, btEncloserByte As Byte, lMaxBytes As Long, iColumns As Int)
mtFF = tFF
mstrCharSet = strCharset
mbtEndOfLineByte = btEndOfLineByte
mbtEncloserByte = btEncloserByte
If btEncloserByte <> -1 Then
mstrEncloser = Chr(btEncloserByte)
End If
mlMaxBytes = lMaxBytes
RAF.Initialize(tFF.strFolder, tFF.strFile, True)
lFilePos = 0
lArrayPos = 0
If iColumns = -1 Then
miCols = 0
Else
miCols = iColumns
End If
lTotalBytes = RAF.Size
bNoEndOfLineFound = False
bFinalArrayDone = False
bFinalLineDone = False
mstrSeparatorChar = ""
iBytes = Min(lTotalBytes, mlMaxBytes)
End Sub
Public Sub InitRAF
RAF.Initialize(mtFF.strFolder, mtFF.strFile, True)
lTotalBytes = RAF.Size
iBytes = Min(lTotalBytes, mlMaxBytes)
End Sub
Public Sub Close
RAF.Close
Dim arrBytes() As Byte
lFilePos = 0
lArrayPos = 0
miCols = 0
bFirstArrayDone = False
bNoEndOfLineFound = False
bFinalArrayDone = False
bFinalLineDone = False
mstrSeparatorChar = ""
End Sub
Public Sub ReadLine As String
Dim i As Long
Dim bInsideQuotes As Boolean = False
Do While bFinalLineDone = False
If bFinalArrayDone = False Then
If bFirstArrayDone = False Then
Dim arrBytes(iBytes) As Byte
iBytes = RAF.ReadBytes(arrBytes, 0, iBytes, 0)
bNoEndOfLineFound = False
bFirstArrayDone = True
Else
If bNoEndOfLineFound Then
Dim arrBytes(iBytes) As Byte
iBytes = RAF.ReadBytes(arrBytes, 0, iBytes, lFilePos)
lArrayPos = 0
bNoEndOfLineFound = False
End If
End If
End If
For i = lArrayPos To iBytes - 1
If arrBytes(i) = mbtEncloserByte Then
bInsideQuotes = bInsideQuotes = False
End If
If bInsideQuotes = False Then
If arrBytes(i) = mbtEndOfLineByte Then
If i > 0 Then
If arrBytes(i - 1) = 13 Then
strLine = BytesToString(arrBytes, lArrayPos, (i - lArrayPos) - 1, mstrCharSet)
Else
strLine = BytesToString(arrBytes, lArrayPos, i - lArrayPos, mstrCharSet)
End If
lArrayPos = i + 1
Return strLine
End If
End If
End If
Next
bNoEndOfLineFound = True
If bInsideQuotes Then
lFilePos = lFilePos + lArrayPos
bInsideQuotes = False
Else
If iBytes < mlMaxBytes Then
If lFilePos < lTotalBytes - 1 Then
strLine = BytesToString(arrBytes, lArrayPos, iBytes - lArrayPos, mstrCharSet)
bFinalLineDone = True
Return strLine
Else
bFinalLineDone = True
End If
Else
lFilePos = lFilePos + lArrayPos
End If
End If
Loop
End Sub
Public Sub ReadList(bIgnoreEmptyLines As Boolean, iMaxLines As Int, arrEmptyLines() As Int) As ResumableSub
Dim lstLines As List
If iMaxLines = 0 Then iMaxLines = -1
lstLines.Initialize
Do While bFinalLineDone = False
strLine = ReadLine
If strLine.Length > 0 Then
lstLines.Add(strLine)
Else
If bIgnoreEmptyLines = False Then
arrEmptyLines(0) = arrEmptyLines(0) + 1
lstLines.Add(strLine)
End If
End If
If lstLines.Size = iMaxLines Then
Return lstLines
Exit
End If
Loop
Return lstLines
End Sub
Public Sub ReadCSVList(bIgnoreEmptyLines As Boolean, iMaxLines As Int, arrEmptyLines() As Int) As ResumableSub
Dim c As Int
Dim lstLines As List
Dim bEncloser As Boolean = mstrEncloser.Length > 0
If mstrSeparatorChar.Length = 0 Then
Dim rs3 As ResumableSub = GetSeparatorCharFromFile(mtFF, mstrCharSet, mstrEncloser, 4)
Wait For (rs3) Complete (oSeparatorChar As Object)
mstrSeparatorChar = CStr(oSeparatorChar)
End If
lstLines.Initialize
If bEncloser Then
Do While bFinalLineDone = False
strLine = ReadLine
If strLine.Length > 0 Then
If miCols = 0 Then
miCols = CountFieldsInTextLine(strLine, mstrSeparatorChar, mstrEncloser)
End If
If strLine.Length > miCols Then
Dim arr() As Object = ParseCSVLine(strLine, miCols, mstrSeparatorChar, mstrEncloser)
lstLines.Add(arr)
End If
Else
arrEmptyLines(0) = arrEmptyLines(0) + 1
If bIgnoreEmptyLines = False Then
Dim arrNulls(miCols) As Object
For c = 0 To miCols - 1
arrNulls(c) = Null
Next
lstLines.Add(arrNulls)
End If
End If
If lstLines.Size = iMaxLines Then
Return lstLines
Exit
End If
Loop
Else
Do While bFinalLineDone = False
strLine = ReadLine
If strLine.Length > 0 Then
If miCols = 0 Then
miCols = CountFieldsInTextLine(strLine, mstrSeparatorChar, mstrEncloser)
End If
If strLine.Length > miCols Then
Dim arr() As Object = ParseCSVLineNoEnclosers(strLine, miCols, mstrSeparatorChar)
lstLines.Add(arr)
End If
Else
arrEmptyLines(0) = arrEmptyLines(0) + 1
If bIgnoreEmptyLines = False Then
Dim arrNulls(miCols) As Object
For c = 0 To miCols - 1
arrNulls(c) = Null
Next
lstLines.Add(arrNulls)
End If
End If
If lstLines.Size = iMaxLines Then
Return lstLines
Exit
End If
Loop
End If
Return lstLines
End Sub
Sub CountFieldsInTextLine(strCSVLine As String, strSeparator As String, strEncloser As String) As Int
Dim c As Int
Dim iEndIndex As Int
Dim bInsideQuotes As Boolean = False
If strSeparator.Length = 0 Then
Return 1
End If
If strEncloser.Length = 0 Then
For iEndIndex = 0 To strCSVLine.Length - 1
If strCSVLine.CharAt(iEndIndex) = strSeparator Then
c = c + 1
End If
Next
Else
For iEndIndex = 0 To strCSVLine.Length - 1
If strCSVLine.CharAt(iEndIndex) = strEncloser Then
bInsideQuotes = bInsideQuotes = False
Else
If strCSVLine.CharAt(iEndIndex) = strSeparator Then
If bInsideQuotes = False Then
c = c + 1
End If
End If
End If
Next
End If
Return c + 1
End Sub
Sub GetCharCounts(strCSVLine As String, strEncloser As String) As Map
Dim i As Int
Dim bInsideQuotes As Boolean
Dim mapCountChars As Map
Dim iCount As Int
Dim oChar As Char
mapCountChars.Initialize
If strEncloser.Length = 0 Then
For i = 0 To strCSVLine.Length - 1
oChar = strCSVLine.CharAt(i)
If mapCountChars.ContainsKey(oChar) Then
iCount = mapCountChars.Get(oChar)
mapCountChars.Put(oChar, iCount + 1)
Else
mapCountChars.Put(oChar, 1)
End If
Next
Else
For i = 0 To strCSVLine.Length - 1
oChar = strCSVLine.CharAt(i)
If oChar = strEncloser Then
bInsideQuotes = bInsideQuotes = False
Else
If bInsideQuotes = False Then
If mapCountChars.ContainsKey(oChar) Then
iCount = mapCountChars.Get(oChar)
mapCountChars.Put(oChar, iCount + 1)
Else
mapCountChars.Put(oChar, 1)
End If
End If
End If
Next
End If
Return mapCountChars
End Sub
Public Sub GetSeparatorCharFromFile(tFF As tFolderAndFile, strCharSet As String, strEncloser As String, iLines As Int) As ResumableSub
Dim i As Int
Dim n As Int
Dim oTR2 As TextReader
Dim strLine As String
Dim iCount As Int
Dim lstMaps As List
Dim oMap As Map
Dim oMapCharAndCount As Map
Dim strKey As String
Dim strPossibleSeparator As Char
Dim strExclude As String = " ' "" 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
oTR2.Initialize2(File.OpenInput(tFF.strFolder, tFF.strFile), strCharSet)
lstMaps.Initialize
Do While True
strLine = oTR2.ReadLine
If strLine = Null Then
iLines = n
Exit
End If
lstMaps.Add(GetCharCounts(strLine, strEncloser))
n = n + 1
If n = iLines Then
Exit
End If
Loop
oTR2.Close
oMapCharAndCount.Initialize
For i = 0 To lstMaps.Size - 1
oMap = lstMaps.Get(i)
For Each oKey As Object In oMap.Keys
strKey = oKey & oMap.Get(oKey)
If oMapCharAndCount.ContainsKey(strKey) Then
iCount = oMapCharAndCount.Get(strKey)
oMapCharAndCount.Put(strKey, iCount + 1)
Else
oMapCharAndCount.Put(strKey, 1)
End If
Next
Next
For Each oKey As Object In oMapCharAndCount.Keys
If iLines = oMapCharAndCount.Get(oKey) Then
strPossibleSeparator = CStr(oKey).CharAt(0)
If strExclude.Contains(strPossibleSeparator) = False Then
Return strPossibleSeparator
End If
End If
Next
Return ""
End Sub
Sub CStr(o As String) As String
Return o
End Sub
Sub ParseCSVLine(strCSVLine As String, iCols As Int, strSeparator As String, strEncloser As String) As Object()
Dim c As Int
Dim arrValues(iCols) As Object
Dim iStartIndex As Int
Dim iEndIndex As Int
Dim bInsideQuotes As Boolean = False
Dim bIgnoreFurtherFields As Boolean
If strSeparator.Length = 0 Then
arrValues(0) = strCSVLine
Return arrValues
End If
For iEndIndex = 0 To strCSVLine.Length - 1
If strCSVLine.CharAt(iEndIndex) = strEncloser Then
bInsideQuotes = bInsideQuotes = False
Else
If strCSVLine.CharAt(iEndIndex) = strSeparator Then
If bInsideQuotes = False Then
arrValues(c) = strCSVLine.SubString2(iStartIndex, iEndIndex)
iStartIndex = iEndIndex + 1
c = c + 1
If c = iCols Then
bIgnoreFurtherFields = True
Exit
End If
End If
End If
End If
Next
If bIgnoreFurtherFields = False Then
arrValues(c) = strCSVLine.SubString2(iStartIndex, iEndIndex)
End If
Return arrValues
End Sub
Sub ParseCSVLineNoEnclosers(strCSVLine As String, iCols As Int, strSeparator As String) As String()
Dim c As Int
Dim arrValues(iCols) As String
Dim iStartIndex As Int
Dim iEndIndex As Int
Dim bIgnoreFurtherFields As Boolean
If strSeparator.Length = 0 Then
arrValues(0) = strCSVLine
Return arrValues
End If
For iEndIndex = 0 To strCSVLine.Length - 1
If strCSVLine.CharAt(iEndIndex) = strSeparator Then
arrValues(c) = strCSVLine.SubString2(iStartIndex, iEndIndex)
iStartIndex = iEndIndex + 1
c = c + 1
If c = iCols Then
bIgnoreFurtherFields = True
Exit
End If
End If
Next
If bIgnoreFurtherFields = False Then
arrValues(c) = strCSVLine.SubString2(iStartIndex, iEndIndex)
End If
Return arrValues
End Sub
Public Sub GetLineCount(tFF As tFolderAndFile, btEndOfLineByte As Byte, _
iMaxRows As Int, iMaxBytesInArray As Int, bCloseRAFAfter As Boolean) As Int
Dim i As Int
Dim iBytes As Int
Dim lPosition As Long
Dim iLines As Int
RAF.Initialize(tFF.strFolder, tFF.strFile, True)
iBytes = iMaxBytesInArray
Do While lPosition < RAF.Size
Dim arrBytes(iBytes) As Byte
iBytes = RAF.ReadBytes(arrBytes, 0, iBytes, lPosition)
For i = 0 To iBytes - 1
If arrBytes(i) = btEndOfLineByte Then
iLines = iLines + 1
If iMaxRows > 0 Then
If iLines = iMaxRows Then Exit
End If
End If
Next
lPosition = lPosition + iBytes
Loop
If bCloseRAFAfter Then
RAF.Close
End If
If arrBytes(iBytes - 1) = btEndOfLineByte Then
Return iLines
Else
Return iLines + 1
End If
End Sub
Sub getFinalLineDone As Boolean
Return bFinalLineDone
End Sub
Sub getTotalFileBytes As Long
Return lTotalBytes
End Sub