xml|数据
'我在设计这个类时,认为处理数据的最好方式还是dataset,而dataset可以从xml文件中打开。但dataset将xml打开时,要将全部文件读入到内存中,这种情况在单机上可以容忍,但是在服务器上就是一件非常龌龊的事情了,所以我设计了一个能够处理大型xml文件的类,它只返回指定的datatable,并且对内存的占用很小。我把它发布出来的目的就是希望做xml数据服务的朋友一起来优化。
Public Class LargeXmlFile
Implements ILargeXmlFile
Protected mXmlFile As String
Protected mXsdFile As String
Protected Const mRootElement As String = "Data"
'保存当前临时文件号
Protected Shared mFileNo As Int32
'datatable被保存于xmlfile中,而每个datatable对应于一个独立的xsd文件,如果文件不存在,那么抛出异常
Public Sub New(ByVal xmlfile As String, ByVal xsdfile As String)
'检查文件是否存在
If File.Exists(xmlfile) = False Then Throw New Exception(xmlfile & "文件不存在")
If File.Exists(xsdfile) = False Then Throw New Exception(xsdfile & "文件不存在")
mXmlFile = xmlfile
mXsdFile = xsdfile
End Sub
Public Function GetTable(ByVal tbName As String) As System.Data.DataTable Implements ILargeXmlFile.GetTable
'先判断xsd文件是否存在,如果存在将其以Stream的方式打开
Dim fsXsd As FileStream
Dim srXsd As StreamReader
Try
fsXsd = New FileStream(Me.mXsdFile, FileMode.Open)
srXsd = New StreamReader(fsXsd, System.Text.Encoding.UTF8, True, 1024)
'检查xsd文件是否能够打开
Catch ex As Exception
Return Nothing
End Try
'打开保存有datatable的xml文件
Dim fs As FileStream
Dim sr As StreamReader
Try
fs = New FileStream(Me.mXmlFile, FileMode.Open)
sr = New StreamReader(fs, System.Text.Encoding.UTF8, True, 1024)
Catch ex As Exception
Return Nothing
End Try
Dim xmlsr As New XmlTextReader(sr)
xmlsr.WhitespaceHandling = WhitespaceHandling.None
'声明要获取datatable的内存空间
Dim mem As New MemoryStream
Dim sw As New StreamWriter(mem, System.Text.Encoding.UTF8, 1024)
Dim xmlsw As New XmlTextWriter(sw)
xmlsw.Formatting = Formatting.Indented
xmlsw.Indentation = 4
'将读取的文件写入到内存流中
Dim writeToEndFlag As Boolean
'写入 根元素
xmlsw.WriteStartElement(Nothing, Me.mRootElement, Nothing)
xmlsw.WriteWhitespace(" ")
'写入内联的xsd文件
sw.WriteLine()
sw.Write(srXsd.ReadToEnd)
srXsd.Close()
fsXsd.Close()
While xmlsr.Read
'检查是否开始将数据写入到目标内存流
If xmlsr.NodeType = XmlNodeType.Element And xmlsr.Name = tbName Then
xmlsw.WriteStartElement(Nothing, xmlsr.Name, Nothing)
'读取表
While xmlsr.Read
'检查表是否已经读完
If xmlsr.NodeType = XmlNodeType.EndElement And xmlsr.Name = tbName Then
xmlsw.WriteEndElement()
writeToEndFlag = True
Exit While
End If
Select Case xmlsr.NodeType
Case XmlNodeType.Element
xmlsw.WriteStartElement(Nothing, xmlsr.Name, Nothing)
Case XmlNodeType.EndElement
xmlsw.WriteEndElement()
Case XmlNodeType.Text
xmlsw.WriteString(xmlsr.Value)
Case XmlNodeType.Whitespace
xmlsw.WriteString(" ")
End Select
End While
End If
'检查datatable的数据是否已经读取完毕
If writeToEndFlag = True Then
xmlsw.WriteEndElement()
Exit While
End If
End While
'检查是否已经找到了表
If writeToEndFlag = False Then
'没有找到
xmlsr.Close()
sr.Close()
fs.Close()
fs = Nothing
sr = Nothing
xmlsr = Nothing
xmlsw.Close()
sw.Close()
xmlsw = Nothing
sw = Nothing
mem.Close()
mem = Nothing
Return Nothing
End If
'关闭资源
xmlsr.Close()
sr.Close()
fs.Close()
fs = Nothing
sr = Nothing
xmlsr = Nothing
'重置数据
xmlsw.Flush()
mem.Position = 0
'将数据读入到DataTable中
Dim dst As New DataSet
Dim sr1 As StreamReader
Dim xmlsr1 As XmlTextReader
Try
sr1 = New StreamReader(mem, System.Text.Encoding.UTF8, True, 1024)
xmlsr1 = New XmlTextReader(sr1)
xmlsr1.WhitespaceHandling = WhitespaceHandling.None
dst.ReadXml(xmlsr1, XmlReadMode.ReadSchema)
Return dst.Tables(0).Copy
Catch ex As Exception
MsgBox(ex.ToString)
Return Nothing
Finally
sr1.Close()
xmlsr1.Close()
xmlsw.Close()
sw.Close()
xmlsw = Nothing
sw = Nothing
dst.Dispose()
End Try
End Function
Public Function SaveTable(ByVal tb As System.Data.DataTable) As Boolean Implements ILargeXmlFile.SaveTable
'将datatable转换成字节数据
Dim fileNo As Int32
SyncLock Me
Me.mFileNo += 1
fileNo = Me.mFileNo
End SyncLock
Dim tmpFileTb As String = "_tmp" & CStr(fileNo) '临时保存datatable的文件
Dim tmpFileTbWithoutRoot As String = "_tmp" & CStr(fileNo) & "_noroot" '临时保存经过处理后,去掉root的datatable的文件
Dim tmpSavedXmlFile As String = "_tmp" & CStr(fileNo) & "_save" '临时保存更新后的xml文件
Dim tbName As String = tb.TableName
Dim fs As New FileStream(tmpFileTb, FileMode.Create)
Dim sw As New StreamWriter(fs, System.Text.Encoding.UTF8, 1024)
Dim xmlsw As New XmlTextWriter(sw)
xmlsw.Formatting = Formatting.Indented
xmlsw.Indentation = 4
Dim dst As New DataSet("Data")
dst.Tables.Add(tb)
dst.WriteXml(xmlsw, XmlWriteMode.IgnoreSchema)
xmlsw.Flush()
sw.Flush()
'去掉根
fs.Position = 0
Dim tbsr As New StreamReader(fs, System.Text.Encoding.UTF8, True, 1024)
Dim tbxmlsr As New XmlTextReader(tbsr)
tbxmlsr.WhitespaceHandling = WhitespaceHandling.None
Dim fsWithNoRoot As New FileStream(tmpFileTbWithoutRoot, FileMode.Create)
Dim swNoRoot As New StreamWriter(fsWithNoRoot, System.Text.Encoding.UTF8, 1024)
Dim xmlswNoRoot As New XmlTextWriter(swNoRoot)
xmlswNoRoot.Formatting = Formatting.Indented
xmlswNoRoot.Indentation = 4
Try
'开始写入数据
While tbxmlsr.Read
If tbxmlsr.NodeType = XmlNodeType.Element And tbxmlsr.Name = tbName Then
xmlswNoRoot.WriteStartElement(Nothing, tbName, Nothing)
'写入表中剩下的数据
While tbxmlsr.Read
'检查是否读到了末尾
If tbxmlsr.NodeType = XmlNodeType.EndElement And tbxmlsr.Name = tbName Then
xmlswNoRoot.WriteEndElement()
Exit While
End If
Select Case tbxmlsr.NodeType
Case XmlNodeType.Element
xmlswNoRoot.WriteStartElement(Nothing, tbxmlsr.Name, Nothing)
Case XmlNodeType.EndElement
xmlswNoRoot.WriteEndElement()
Case XmlNodeType.Text
xmlswNoRoot.WriteString(tbxmlsr.Value)
Case XmlNodeType.Whitespace
xmlswNoRoot.WriteWhitespace(" ")
End Select
End While
End If
End While
Catch ex As Exception
xmlswNoRoot.Close()
swNoRoot.Close()
fsWithNoRoot.Close()
File.Delete(tmpFileTbWithoutRoot)
Return False
Finally
'关闭文件
xmlsw.Close()
sw.Close()
fs.Close()
tbsr.Close()
tbxmlsr.Close()
File.Delete(tmpFileTb)
dst.Dispose()
End Try
xmlswNoRoot.Flush()
swNoRoot.Flush()
fsWithNoRoot.Position = 0
'将数据写入到指定的xml文件
Dim xmlFs As New FileStream(Me.mXmlFile, FileMode.Open, FileAccess.Read, FileShare.None)
Dim xmlsr As New StreamReader(xmlFs, System.Text.Encoding.UTF8, True, 1024)
Dim xmlXmlSr As New XmlTextReader(xmlsr)
xmlXmlSr.WhitespaceHandling = WhitespaceHandling.None
Dim xmlFsNew As New FileStream(tmpSavedXmlFile, FileMode.Create)
Dim xmlSwNew As New StreamWriter(xmlFsNew, System.Text.Encoding.UTF8, 1024)
Dim xmlXmlSwNew As New XmlTextWriter(xmlSwNew)
xmlXmlSwNew.Formatting = Formatting.Indented
xmlXmlSwNew.Indentation = 4
Dim srTb As New StreamReader(fsWithNoRoot, System.Text.Encoding.UTF8, True, 1024)
Dim xmlsrTb As New XmlTextReader(srTb)
xmlsrTb.WhitespaceHandling = WhitespaceHandling.None
Try
'读取并写入根名称
Dim RootName As String
xmlXmlSr.Read()
RootName = xmlXmlSr.Name
xmlXmlSwNew.WriteStartElement(Nothing, RootName, Nothing)
'开始读取数据
While xmlXmlSr.Read
'不能读取要保存的tb
If xmlXmlSr.NodeType = XmlNodeType.Element And xmlXmlSr.Name = tbName Then
While xmlXmlSr.Read
If xmlXmlSr.NodeType = XmlNodeType.EndElement And xmlXmlSr.Name = tbName Then Exit While
End While
Else
'如果已经达到了数据的结尾,那么加入tb中的内容
If xmlXmlSr.NodeType = XmlNodeType.EndElement And xmlXmlSr.Name = RootName Then
'写入tb中的数据
While xmlsrTb.Read
Select Case xmlsrTb.NodeType
Case XmlNodeType.Element
xmlXmlSwNew.WriteStartElement(Nothing, xmlsrTb.Name, Nothing)
Case XmlNodeType.EndElement
xmlXmlSwNew.WriteEndElement()
Case XmlNodeType.Whitespace
xmlXmlSwNew.WriteWhitespace(" ")
Case XmlNodeType.Text
xmlXmlSwNew.WriteString(xmlsrTb.Value)
End Select
End While
'写入结束符
xmlXmlSwNew.WriteEndElement()
Exit While
End If
'写入xml数据中的其它内容
Select Case xmlXmlSr.NodeType
Case XmlNodeType.Element
xmlXmlSwNew.WriteStartElement(Nothing, xmlXmlSr.Name, Nothing)
Case XmlNodeType.EndElement
xmlXmlSwNew.WriteEndElement()
Case XmlNodeType.Text
xmlXmlSwNew.WriteString(xmlXmlSr.Value)
Case XmlNodeType.Whitespace
xmlXmlSwNew.WriteWhitespace(" ")
End Select
End If
End While
'关闭文件
xmlXmlSr.Close()
xmlsr.Close()
xmlFs.Close()
xmlsrTb.Close()
srTb.Close()
fsWithNoRoot.Close()
File.Delete(tmpFileTbWithoutRoot)
'保存更新文件
xmlXmlSwNew.Close()
xmlSwNew.Close()
xmlFsNew.Close()
SyncLock Me
File.Delete(Me.mXmlFile)
File.Move(tmpSavedXmlFile, Me.mXmlFile)
End SyncLock
Return True
Catch ex As Exception
Return False
End Try
End Function
End Class