通过大型xml文件来提供数据服务

80酷酷网    80kuku.com

  xml|数据

'我在设计这个类时,认为处理数据的最好方式还是dataset,而dataset可以从xml文件中打开。但dataset将xml打开时,要将全部文件读入到内存中,这种情况在单机上可以容忍,但是在服务器上就是一件非常龌龊的事情了,所以我设计了一个能够处理大型xml文件的类,它只返回指定的datatable,并且对内存的占用很小。我把它发布出来的目的就是希望做xml数据服务的朋友一起来优化。

Public Class LargeXmlFile
        Implements ILargeXmlFile

        Protected mXmlFile As String
        Protected mXsdFile As String
        Protected Const mRootElement As String = "Data"
        '保存当前临时文件号
        Protected Shared mFileNo As Int32

        'datatable被保存于xmlfile中,而每个datatable对应于一个独立的xsd文件,如果文件不存在,那么抛出异常
        Public Sub New(ByVal xmlfile As String, ByVal xsdfile As String)
            '检查文件是否存在
            If File.Exists(xmlfile) = False Then Throw New Exception(xmlfile & "文件不存在")
            If File.Exists(xsdfile) = False Then Throw New Exception(xsdfile & "文件不存在")

            mXmlFile = xmlfile
            mXsdFile = xsdfile

        End Sub

        Public Function GetTable(ByVal tbName As String) As System.Data.DataTable Implements ILargeXmlFile.GetTable
            '先判断xsd文件是否存在,如果存在将其以Stream的方式打开
            Dim fsXsd As FileStream
            Dim srXsd As StreamReader
            Try
                fsXsd = New FileStream(Me.mXsdFile, FileMode.Open)
                srXsd = New StreamReader(fsXsd, System.Text.Encoding.UTF8, True, 1024)
                '检查xsd文件是否能够打开
            Catch ex As Exception
                Return Nothing
            End Try

            '打开保存有datatable的xml文件
            Dim fs As FileStream
            Dim sr As StreamReader
            Try
                fs = New FileStream(Me.mXmlFile, FileMode.Open)
                sr = New StreamReader(fs, System.Text.Encoding.UTF8, True, 1024)
            Catch ex As Exception
                Return Nothing
            End Try

            Dim xmlsr As New XmlTextReader(sr)
            xmlsr.WhitespaceHandling = WhitespaceHandling.None

            '声明要获取datatable的内存空间
            Dim mem As New MemoryStream
            Dim sw As New StreamWriter(mem, System.Text.Encoding.UTF8, 1024)
            Dim xmlsw As New XmlTextWriter(sw)
            xmlsw.Formatting = Formatting.Indented
            xmlsw.Indentation = 4

            '将读取的文件写入到内存流中
            Dim writeToEndFlag As Boolean

            '写入 根元素
            xmlsw.WriteStartElement(Nothing, Me.mRootElement, Nothing)
            xmlsw.WriteWhitespace(" ")

            '写入内联的xsd文件
            sw.WriteLine()
            sw.Write(srXsd.ReadToEnd)
            srXsd.Close()
            fsXsd.Close()

            While xmlsr.Read
                '检查是否开始将数据写入到目标内存流
                If xmlsr.NodeType = XmlNodeType.Element And xmlsr.Name = tbName Then
                    xmlsw.WriteStartElement(Nothing, xmlsr.Name, Nothing)

                    '读取表
                    While xmlsr.Read
                        '检查表是否已经读完
                        If xmlsr.NodeType = XmlNodeType.EndElement And xmlsr.Name = tbName Then
                            xmlsw.WriteEndElement()

                            writeToEndFlag = True
                            Exit While
                        End If

                        Select Case xmlsr.NodeType
                            Case XmlNodeType.Element
                                xmlsw.WriteStartElement(Nothing, xmlsr.Name, Nothing)
                            Case XmlNodeType.EndElement
                                xmlsw.WriteEndElement()
                            Case XmlNodeType.Text
                                xmlsw.WriteString(xmlsr.Value)
                            Case XmlNodeType.Whitespace
                                xmlsw.WriteString(" ")
                        End Select
                    End While
                End If

                '检查datatable的数据是否已经读取完毕
                If writeToEndFlag = True Then
                    xmlsw.WriteEndElement()
                    Exit While
                End If

            End While

            '检查是否已经找到了表
            If writeToEndFlag = False Then
                '没有找到
                xmlsr.Close()
                sr.Close()
                fs.Close()
                fs = Nothing
                sr = Nothing
                xmlsr = Nothing
                xmlsw.Close()
                sw.Close()
                xmlsw = Nothing
                sw = Nothing
                mem.Close()
                mem = Nothing

                Return Nothing
            End If

            '关闭资源
            xmlsr.Close()
            sr.Close()
            fs.Close()
            fs = Nothing
            sr = Nothing
            xmlsr = Nothing

            '重置数据
            xmlsw.Flush()
            mem.Position = 0

            '将数据读入到DataTable中

            Dim dst As New DataSet
            Dim sr1 As StreamReader
            Dim xmlsr1 As XmlTextReader

            Try
                sr1 = New StreamReader(mem, System.Text.Encoding.UTF8, True, 1024)
                xmlsr1 = New XmlTextReader(sr1)
                xmlsr1.WhitespaceHandling = WhitespaceHandling.None
                dst.ReadXml(xmlsr1, XmlReadMode.ReadSchema)

                Return dst.Tables(0).Copy
            Catch ex As Exception
                MsgBox(ex.ToString)
                Return Nothing
            Finally
                sr1.Close()
                xmlsr1.Close()
                xmlsw.Close()
                sw.Close()
                xmlsw = Nothing
                sw = Nothing
                dst.Dispose()
            End Try

        End Function

        Public Function SaveTable(ByVal tb As System.Data.DataTable) As Boolean Implements ILargeXmlFile.SaveTable
            '将datatable转换成字节数据

            Dim fileNo As Int32

            SyncLock Me
                Me.mFileNo += 1
                fileNo = Me.mFileNo
            End SyncLock

            Dim tmpFileTb As String = "_tmp" & CStr(fileNo) '临时保存datatable的文件
            Dim tmpFileTbWithoutRoot As String = "_tmp" & CStr(fileNo) & "_noroot" '临时保存经过处理后,去掉root的datatable的文件
            Dim tmpSavedXmlFile As String = "_tmp" & CStr(fileNo) & "_save" '临时保存更新后的xml文件

            Dim tbName As String = tb.TableName

            Dim fs As New FileStream(tmpFileTb, FileMode.Create)
            Dim sw As New StreamWriter(fs, System.Text.Encoding.UTF8, 1024)
            Dim xmlsw As New XmlTextWriter(sw)
            xmlsw.Formatting = Formatting.Indented
            xmlsw.Indentation = 4

            Dim dst As New DataSet("Data")
            dst.Tables.Add(tb)
            dst.WriteXml(xmlsw, XmlWriteMode.IgnoreSchema)
            xmlsw.Flush()
            sw.Flush()

            '去掉根
            fs.Position = 0
            Dim tbsr As New StreamReader(fs, System.Text.Encoding.UTF8, True, 1024)
            Dim tbxmlsr As New XmlTextReader(tbsr)
            tbxmlsr.WhitespaceHandling = WhitespaceHandling.None

            Dim fsWithNoRoot As New FileStream(tmpFileTbWithoutRoot, FileMode.Create)
            Dim swNoRoot As New StreamWriter(fsWithNoRoot, System.Text.Encoding.UTF8, 1024)
            Dim xmlswNoRoot As New XmlTextWriter(swNoRoot)
            xmlswNoRoot.Formatting = Formatting.Indented
            xmlswNoRoot.Indentation = 4

            Try
                '开始写入数据
                While tbxmlsr.Read
                    If tbxmlsr.NodeType = XmlNodeType.Element And tbxmlsr.Name = tbName Then
                        xmlswNoRoot.WriteStartElement(Nothing, tbName, Nothing)

                        '写入表中剩下的数据
                        While tbxmlsr.Read
                            '检查是否读到了末尾
                            If tbxmlsr.NodeType = XmlNodeType.EndElement And tbxmlsr.Name = tbName Then
                                xmlswNoRoot.WriteEndElement()
                                Exit While
                            End If

                            Select Case tbxmlsr.NodeType
                                Case XmlNodeType.Element
                                    xmlswNoRoot.WriteStartElement(Nothing, tbxmlsr.Name, Nothing)
                                Case XmlNodeType.EndElement
                                    xmlswNoRoot.WriteEndElement()
                                Case XmlNodeType.Text
                                    xmlswNoRoot.WriteString(tbxmlsr.Value)
                                Case XmlNodeType.Whitespace
                                    xmlswNoRoot.WriteWhitespace(" ")
                            End Select
                        End While
                    End If
                End While

            Catch ex As Exception
                xmlswNoRoot.Close()
                swNoRoot.Close()
                fsWithNoRoot.Close()
                File.Delete(tmpFileTbWithoutRoot)
                Return False
            Finally
                '关闭文件
                xmlsw.Close()
                sw.Close()
                fs.Close()
                tbsr.Close()
                tbxmlsr.Close()
                File.Delete(tmpFileTb)
                dst.Dispose()

            End Try

            xmlswNoRoot.Flush()
            swNoRoot.Flush()
            fsWithNoRoot.Position = 0

            '将数据写入到指定的xml文件
            Dim xmlFs As New FileStream(Me.mXmlFile, FileMode.Open, FileAccess.Read, FileShare.None)
            Dim xmlsr As New StreamReader(xmlFs, System.Text.Encoding.UTF8, True, 1024)
            Dim xmlXmlSr As New XmlTextReader(xmlsr)
            xmlXmlSr.WhitespaceHandling = WhitespaceHandling.None

            Dim xmlFsNew As New FileStream(tmpSavedXmlFile, FileMode.Create)
            Dim xmlSwNew As New StreamWriter(xmlFsNew, System.Text.Encoding.UTF8, 1024)
            Dim xmlXmlSwNew As New XmlTextWriter(xmlSwNew)
            xmlXmlSwNew.Formatting = Formatting.Indented
            xmlXmlSwNew.Indentation = 4

            Dim srTb As New StreamReader(fsWithNoRoot, System.Text.Encoding.UTF8, True, 1024)
            Dim xmlsrTb As New XmlTextReader(srTb)
            xmlsrTb.WhitespaceHandling = WhitespaceHandling.None

            Try
                '读取并写入根名称
                Dim RootName As String
                xmlXmlSr.Read()
                RootName = xmlXmlSr.Name
                xmlXmlSwNew.WriteStartElement(Nothing, RootName, Nothing)

                '开始读取数据
                While xmlXmlSr.Read
                    '不能读取要保存的tb
                    If xmlXmlSr.NodeType = XmlNodeType.Element And xmlXmlSr.Name = tbName Then
                        While xmlXmlSr.Read
                            If xmlXmlSr.NodeType = XmlNodeType.EndElement And xmlXmlSr.Name = tbName Then Exit While
                        End While
                    Else
                        '如果已经达到了数据的结尾,那么加入tb中的内容
                        If xmlXmlSr.NodeType = XmlNodeType.EndElement And xmlXmlSr.Name = RootName Then
                            '写入tb中的数据
                            While xmlsrTb.Read
                                Select Case xmlsrTb.NodeType
                                    Case XmlNodeType.Element
                                        xmlXmlSwNew.WriteStartElement(Nothing, xmlsrTb.Name, Nothing)
                                    Case XmlNodeType.EndElement
                                        xmlXmlSwNew.WriteEndElement()
                                    Case XmlNodeType.Whitespace
                                        xmlXmlSwNew.WriteWhitespace(" ")
                                    Case XmlNodeType.Text
                                        xmlXmlSwNew.WriteString(xmlsrTb.Value)
                                End Select
                            End While

                            '写入结束符
                            xmlXmlSwNew.WriteEndElement()
                            Exit While
                        End If

                        '写入xml数据中的其它内容
                        Select Case xmlXmlSr.NodeType
                            Case XmlNodeType.Element
                                xmlXmlSwNew.WriteStartElement(Nothing, xmlXmlSr.Name, Nothing)
                            Case XmlNodeType.EndElement
                                xmlXmlSwNew.WriteEndElement()
                            Case XmlNodeType.Text
                                xmlXmlSwNew.WriteString(xmlXmlSr.Value)
                            Case XmlNodeType.Whitespace
                                xmlXmlSwNew.WriteWhitespace(" ")
                        End Select
                    End If
                End While

                '关闭文件
                xmlXmlSr.Close()
                xmlsr.Close()
                xmlFs.Close()

                xmlsrTb.Close()
                srTb.Close()
                fsWithNoRoot.Close()
                File.Delete(tmpFileTbWithoutRoot)

                '保存更新文件
                xmlXmlSwNew.Close()
                xmlSwNew.Close()
                xmlFsNew.Close()

                SyncLock Me
                    File.Delete(Me.mXmlFile)
                    File.Move(tmpSavedXmlFile, Me.mXmlFile)
                End SyncLock
                Return True
            Catch ex As Exception
                Return False
            End Try

        End Function
    End Class

 



分享到
  • 微信分享
  • 新浪微博
  • QQ好友
  • QQ空间
点击: