Skip to content

Commit 15733e6

Browse files
committed
Merge PR #123
2 parents a5f9024 + 121dfc1 commit 15733e6

6 files changed

Lines changed: 182 additions & 78 deletions

File tree

cached_reader.go

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,48 @@ import (
55
)
66

77
type cachedReader struct {
8-
buffer *bufio.Reader
9-
cache []byte
10-
cacheCap int
11-
cacheLen int
8+
buffer *bufio.Reader
9+
cache []byte
1210
caching bool
1311
}
1412

1513
func newCachedReader(r *bufio.Reader) *cachedReader {
1614
return &cachedReader{
17-
buffer: r,
18-
cache: make([]byte, 4096),
19-
cacheCap: 4096,
20-
cacheLen: 0,
21-
caching: false,
15+
buffer: r,
16+
cache: make([]byte, 0, 4096),
17+
caching: false,
2218
}
2319
}
2420

2521
func (c *cachedReader) StartCaching() {
26-
c.cacheLen = 0
22+
c.cache = c.cache[:0]
2723
c.caching = true
2824
}
2925

30-
func (c *cachedReader) ReadByte() (byte, error) {
31-
if !c.caching {
32-
return c.buffer.ReadByte()
33-
}
34-
b, err := c.buffer.ReadByte()
26+
func (c *cachedReader) ReadByte() (b byte, err error) {
27+
b, err = c.buffer.ReadByte()
3528
if err != nil {
36-
return b, err
29+
return
3730
}
38-
if c.cacheLen < c.cacheCap {
39-
c.cache[c.cacheLen] = b
40-
c.cacheLen++
31+
if c.caching {
32+
c.cacheByte(b)
4133
}
42-
return b, err
34+
return
4335
}
4436

4537
func (c *cachedReader) Cache() []byte {
46-
return c.cache[:c.cacheLen]
38+
return c.cache
39+
}
40+
41+
func (c *cachedReader) CacheWithLimit(n int) []byte {
42+
if n < 1 {
43+
return nil
44+
}
45+
l := len(c.cache)
46+
if n > l {
47+
n = l
48+
}
49+
return c.cache[:n]
4750
}
4851

4952
func (c *cachedReader) StopCaching() {
@@ -55,15 +58,22 @@ func (c *cachedReader) Read(p []byte) (int, error) {
5558
if err != nil {
5659
return n, err
5760
}
58-
if c.caching && c.cacheLen < c.cacheCap {
61+
if c.caching {
5962
for i := 0; i < n; i++ {
60-
c.cache[c.cacheLen] = p[i]
61-
c.cacheLen++
62-
if c.cacheLen >= c.cacheCap {
63+
if !c.cacheByte(p[i]) {
6364
break
6465
}
6566
}
6667
}
6768
return n, err
6869
}
6970

71+
func (c *cachedReader) cacheByte(b byte) bool {
72+
n := len(c.cache)
73+
if n == cap(c.cache) {
74+
return false
75+
}
76+
c.cache = c.cache[:n+1]
77+
c.cache[n] = b
78+
return true
79+
}

cached_reader_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,19 @@ func TestCaching(t *testing.T) {
3939
if !bytes.Equal(cached, []byte("BCDEF")) {
4040
t.Fatalf("Incorrect cached buffer value")
4141
}
42+
43+
cached = cachedReader.CacheWithLimit(-1)
44+
if cached != nil {
45+
t.Fatalf("Incorrect cached buffer value")
46+
}
47+
48+
cached = cachedReader.CacheWithLimit(3)
49+
if !bytes.Equal(cached, []byte("BCD")) {
50+
t.Fatalf("Incorrect cached buffer value")
51+
}
52+
53+
cached = cachedReader.CacheWithLimit(1000)
54+
if !bytes.Equal(cached, []byte("BCDEF")) {
55+
t.Fatalf("Incorrect cached buffer value")
56+
}
4257
}

node.go

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ func WithPreserveSpace() OutputOption {
9292
}
9393
}
9494

95+
// WithoutPreserveSpace will not preserve spaces in output
96+
func WithoutPreserveSpace() OutputOption {
97+
return func(oc *outputConfiguration) {
98+
oc.preserveSpaces = false
99+
}
100+
}
101+
95102
// WithIndentation sets the indentation string used for formatting the output.
96103
func WithIndentation(indentation string) OutputOption {
97104
return func(oc *outputConfiguration) {
@@ -328,7 +335,9 @@ func (n *Node) Write(writer io.Writer, self bool) error {
328335

329336
// WriteWithOptions writes xml with given options to given writer.
330337
func (n *Node) WriteWithOptions(writer io.Writer, opts ...OutputOption) (err error) {
331-
config := &outputConfiguration{}
338+
config := &outputConfiguration{
339+
preserveSpaces: true,
340+
}
332341
// Set the options
333342
for _, opt := range opts {
334343
opt(config)
@@ -400,11 +409,7 @@ func AddChild(parent, n *Node) {
400409
parent.LastChild = n
401410
}
402411

403-
// AddSibling adds a new node 'n' as a sibling of a given node 'sibling'.
404-
// Note it is not necessarily true that the new node 'n' would be added
405-
// immediately after 'sibling'. If 'sibling' isn't the last child of its
406-
// parent, then the new node 'n' will be added at the end of the sibling
407-
// chain of their parent.
412+
// AddSibling adds a new node 'n' as a last node of sibling chain for a given node 'sibling'.
408413
func AddSibling(sibling, n *Node) {
409414
for t := sibling.NextSibling; t != nil; t = t.NextSibling {
410415
sibling = t
@@ -418,6 +423,19 @@ func AddSibling(sibling, n *Node) {
418423
}
419424
}
420425

426+
// AddImmediateSibling adds a new node 'n' as immediate sibling a given node 'sibling'.
427+
func AddImmediateSibling(sibling, n *Node) {
428+
n.Parent = sibling.Parent
429+
n.NextSibling = sibling.NextSibling
430+
sibling.NextSibling = n
431+
n.PrevSibling = sibling
432+
if n.NextSibling != nil {
433+
n.NextSibling.PrevSibling = n
434+
} else if n.Parent != nil {
435+
sibling.Parent.LastChild = n
436+
}
437+
}
438+
421439
// RemoveFromTree removes a node and its subtree from the document
422440
// tree it is in. If the node is the root of the tree, then it's no-op.
423441
func RemoveFromTree(n *Node) {
@@ -445,3 +463,15 @@ func RemoveFromTree(n *Node) {
445463
n.PrevSibling = nil
446464
n.NextSibling = nil
447465
}
466+
467+
// GetRoot returns a root of the tree where 'n' is a node.
468+
func GetRoot(n *Node) *Node {
469+
if n == nil {
470+
return nil
471+
}
472+
root := n
473+
for root.Parent != nil {
474+
root = root.Parent
475+
}
476+
return root
477+
}

node_test.go

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ func TestRemoveFromTree(t *testing.T) {
250250
testTrue(t, n != nil)
251251
RemoveFromTree(n)
252252
verifyNodePointers(t, doc)
253-
testValue(t, doc.OutputXML(false),
253+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
254254
`<?procinst?><!--comment--><aaa><bbb></bbb><ddd></ddd><ggg></ggg></aaa>`)
255255
})
256256

@@ -260,7 +260,7 @@ func TestRemoveFromTree(t *testing.T) {
260260
testTrue(t, n != nil)
261261
RemoveFromTree(n)
262262
verifyNodePointers(t, doc)
263-
testValue(t, doc.OutputXML(false),
263+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
264264
`<?procinst?><!--comment--><aaa><ddd><eee><fff></fff></eee></ddd><ggg></ggg></aaa>`)
265265
})
266266

@@ -270,7 +270,7 @@ func TestRemoveFromTree(t *testing.T) {
270270
testTrue(t, n != nil)
271271
RemoveFromTree(n)
272272
verifyNodePointers(t, doc)
273-
testValue(t, doc.OutputXML(false),
273+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
274274
`<?procinst?><!--comment--><aaa><bbb></bbb><ggg></ggg></aaa>`)
275275
})
276276

@@ -280,7 +280,7 @@ func TestRemoveFromTree(t *testing.T) {
280280
testTrue(t, n != nil)
281281
RemoveFromTree(n)
282282
verifyNodePointers(t, doc)
283-
testValue(t, doc.OutputXML(false),
283+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
284284
`<?procinst?><!--comment--><aaa><bbb></bbb><ddd><eee><fff></fff></eee></ddd></aaa>`)
285285
})
286286

@@ -290,7 +290,7 @@ func TestRemoveFromTree(t *testing.T) {
290290
testValue(t, procInst.Type, DeclarationNode)
291291
RemoveFromTree(procInst)
292292
verifyNodePointers(t, doc)
293-
testValue(t, doc.OutputXML(false),
293+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
294294
`<!--comment--><aaa><bbb></bbb><ddd><eee><fff></fff></eee></ddd><ggg></ggg></aaa>`)
295295
})
296296

@@ -300,19 +300,44 @@ func TestRemoveFromTree(t *testing.T) {
300300
testValue(t, commentNode.Type, CommentNode)
301301
RemoveFromTree(commentNode)
302302
verifyNodePointers(t, doc)
303-
testValue(t, doc.OutputXML(false),
303+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
304304
`<?procinst?><aaa><bbb></bbb><ddd><eee><fff></fff></eee></ddd><ggg></ggg></aaa>`)
305305
})
306306

307307
t.Run("remove call on root does nothing", func(t *testing.T) {
308308
doc := parseXML()
309309
RemoveFromTree(doc)
310310
verifyNodePointers(t, doc)
311-
testValue(t, doc.OutputXML(false),
311+
testValue(t, doc.OutputXMLWithOptions(WithoutPreserveSpace()),
312312
`<?procinst?><!--comment--><aaa><bbb></bbb><ddd><eee><fff></fff></eee></ddd><ggg></ggg></aaa>`)
313313
})
314314
}
315315

316+
func TestAddImmediateSibling(t *testing.T) {
317+
s := `<?xml version="1.0" encoding="UTF-8"?>
318+
<AAA>
319+
<BBB id="1"/>
320+
<CCC id="2">
321+
<DDD/>
322+
</CCC>
323+
<CCC id="3">
324+
<DDD/>
325+
</CCC>
326+
</AAA>`
327+
root, err := Parse(strings.NewReader(s))
328+
if err != nil {
329+
t.Error(err)
330+
}
331+
332+
aaa := findNode(root, "AAA")
333+
n := aaa.SelectElement("BBB")
334+
if n == nil {
335+
t.Fatalf("n is nil")
336+
}
337+
AddImmediateSibling(n, &Node{Type: ElementNode, Data: "r"})
338+
testValue(t, root.OutputXMLWithOptions(WithoutPreserveSpace()), `<?xml version="1.0" encoding="UTF-8"?><AAA><BBB id="1"></BBB><r></r><CCC id="2"><DDD></DDD></CCC><CCC id="3"><DDD></DDD></CCC></AAA>`)
339+
}
340+
316341
func TestSelectElement(t *testing.T) {
317342
s := `<?xml version="1.0" encoding="UTF-8"?>
318343
<AAA>
@@ -497,7 +522,6 @@ func TestWriteWithNamespacePrefix(t *testing.T) {
497522
}
498523
}
499524

500-
501525
func TestQueryWithPrefix(t *testing.T) {
502526
s := `<?xml version="1.0" encoding="UTF-8"?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body test="1"><ns2:Fault xmlns:ns2="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns3="http://www.w3.org/2003/05/soap-envelope"><faultcode>ns2:Client</faultcode><faultstring>This is a client fault</faultstring></ns2:Fault></S:Body></S:Envelope>`
503527
doc, _ := Parse(strings.NewReader(s))
@@ -582,7 +606,7 @@ func TestOutputXMLWithSpaceDirect(t *testing.T) {
582606
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
583607
}
584608

585-
output := html.UnescapeString(doc.OutputXML(true))
609+
output := html.UnescapeString(doc.OutputXMLWithOptions(WithOutputSelf(), WithoutPreserveSpace()))
586610
if strings.Contains(output, "\n") {
587611
t.Errorf("the outputted xml contains newlines")
588612
}
@@ -606,7 +630,7 @@ func TestOutputXMLWithSpaceOverwrittenToPreserve(t *testing.T) {
606630
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
607631
}
608632

609-
output := html.UnescapeString(doc.OutputXML(true))
633+
output := html.UnescapeString(doc.OutputXMLWithOptions(WithOutputSelf(), WithoutPreserveSpace()))
610634
if strings.Contains(output, "\n") {
611635
t.Errorf("the outputted xml contains newlines")
612636
}
@@ -680,8 +704,8 @@ func TestOutputXMLWithPreserveSpaceOption(t *testing.T) {
680704
</student>
681705
</class_list>`
682706
doc, _ := Parse(strings.NewReader(s))
683-
resultWithSpace := doc.OutputXMLWithOptions(WithPreserveSpace())
684-
resultWithoutSpace := doc.OutputXMLWithOptions()
707+
resultWithSpace := doc.OutputXMLWithOptions()
708+
resultWithoutSpace := doc.OutputXMLWithOptions(WithoutPreserveSpace())
685709
if !strings.Contains(resultWithSpace, "> Robert <") {
686710
t.Errorf("output was not expected. expected %v but got %v", " Robert ", resultWithSpace)
687711
}

parse.go

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package xmlquery
22

33
import (
44
"bufio"
5+
"bytes"
56
"encoding/xml"
67
"fmt"
78
"io"
@@ -39,15 +40,31 @@ func Parse(r io.Reader) (*Node, error) {
3940
func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
4041
p := createParser(r)
4142
options.apply(p)
42-
for {
43-
_, err := p.parse()
44-
if err == io.EOF {
45-
return p.doc, nil
43+
var err error
44+
for err == nil {
45+
_, err = p.parse()
46+
}
47+
48+
if err == io.EOF {
49+
// additional check for validity
50+
// according to: https://www.w3.org/TR/xml
51+
// the document MUST contain at least ONE element
52+
valid := false
53+
for doc := p.doc; doc != nil; doc = doc.NextSibling {
54+
for node := doc.FirstChild; node != nil; node = node.NextSibling {
55+
if node.Type == ElementNode {
56+
valid = true
57+
break
58+
}
59+
}
4660
}
47-
if err != nil {
48-
return nil, err
61+
if !valid {
62+
return nil, fmt.Errorf("xmlquery: invalid XML document")
4963
}
64+
return p.doc, nil
5065
}
66+
67+
return nil, err
5168
}
5269

5370
type parser struct {
@@ -168,7 +185,7 @@ func (p *parser) parse() (*Node, error) {
168185

169186
if node.NamespaceURI != "" {
170187
if v, ok := p.space2prefix[node.NamespaceURI]; ok {
171-
cached := string(p.reader.Cache())
188+
cached := string(p.reader.CacheWithLimit(len(v.name) + len(node.Data) + 2))
172189
if strings.HasPrefix(cached, fmt.Sprintf("%s:%s", v.name, node.Data)) || strings.HasPrefix(cached, fmt.Sprintf("<%s:%s", v.name, node.Data)) {
173190
node.Prefix = v.name
174191
}
@@ -228,12 +245,11 @@ func (p *parser) parse() (*Node, error) {
228245
}
229246
case xml.CharData:
230247
// First, normalize the cache...
231-
cached := strings.ToUpper(string(p.reader.Cache()))
248+
cached := bytes.ToUpper(p.reader.CacheWithLimit(9))
232249
nodeType := TextNode
233-
if strings.HasPrefix(cached, "<![CDATA[") || strings.HasPrefix(cached, "![CDATA[") {
250+
if bytes.HasPrefix(cached, []byte("<![CDATA[")) || bytes.HasPrefix(cached, []byte("![CDATA[")) {
234251
nodeType = CharDataNode
235252
}
236-
237253
node := &Node{Type: nodeType, Data: string(tok), level: p.level}
238254
if p.level == p.prev.level {
239255
AddSibling(p.prev, node)

0 commit comments

Comments
 (0)