-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmdict_def.go
More file actions
192 lines (169 loc) · 5.71 KB
/
mdict_def.go
File metadata and controls
192 lines (169 loc) · 5.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
//
// Copyright (C) 2023 Quan Chen <chenquan_act@163.com>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package mdx
// MdictType represents the type of the dictionary file (MDX or MDD).
type MdictType int
const (
// MdictTypeMdd indicates an MDD file.
MdictTypeMdd MdictType = 1
// MdictTypeMdx indicates an MDX file.
MdictTypeMdx MdictType = 2
// EncryptNoEnc indicates no encryption.
EncryptNoEnc = 0
// EncryptRecordEnc indicates record block encryption.
EncryptRecordEnc = 1
// EncryptKeyInfoEnc indicates key info block encryption.
EncryptKeyInfoEnc = 2
// NumfmtBe8bytesq represents big-endian 8-byte unsigned integer.
NumfmtBe8bytesq = 0
// NumfmtBe4bytesi represents big-endian 4-byte unsigned integer.
NumfmtBe4bytesi = 1
// EncodingUtf8 represents UTF-8 encoding.
EncodingUtf8 = 0
// EncodingUtf16 represents UTF-16 encoding.
EncodingUtf16 = 1
// EncodingBig5 represents Big5 encoding.
EncodingBig5 = 2
// EncodingGbk represents GBK encoding.
EncodingGbk = 3
// EncodingGb2312 represents GB2312 encoding.
EncodingGb2312 = 4
// EncodingGb18030 represents GB18030 encoding.
EncodingGb18030 = 5
)
// MdictBase is the base structure for handling MDict file parsing.
// It contains all the necessary metadata and data structures read from the file.
type MdictBase struct {
filePath string
fileType MdictType
meta *mdictMeta
header *mdictHeader
keyBlockMeta *mdictKeyBlockMeta
keyBlockInfo *mdictKeyBlockInfo
keyBlockData *mdictKeyBlockData
recordBlockMeta *mdictRecordBlockMeta
recordBlockInfo *mdictRecordBlockInfo
//RecordBlockData *MDictRecordBlockData
rangeTreeRoot *RecordBlockRangeTreeNode
exactLookup map[string]*MDictKeywordEntry
comparableLookup map[string]*MDictKeywordEntry
resourceComparableLookup map[string]*MDictKeywordEntry
}
/********************************
* private data type *
********************************/
type mdictHeader struct {
headerBytesSize uint32
headerInfoBytes []byte
headerInfo string
adler32Checksum uint32
dictionaryHeaderByteSize int64
}
type mdictMeta struct {
encryptType int
version float32
numberWidth int
numberFormat int
encoding int
// key-block part bytes start offset in the mdx/mdd file
keyBlockMetaStartOffset int64
description string
title string
creationDate string
generatedByEngineVersion string
}
type mdictKeyBlockMeta struct {
// keyBlockNum key block number size
keyBlockNum int64
// entriesNums entries number size
entriesNum int64
// key-block information size (decompressed)
keyBlockInfoDecompressSize int64
// key-block information size (compressed)
keyBlockInfoCompressedSize int64
// key-block Data Size (decompressed)
keyBlockDataTotalSize int64
// key-block information start position in the mdx/mdd file
keyBlockInfoStartOffset int64
}
type mdictKeyBlockInfo struct {
keyBlockEntriesStartOffset int64
keyBlockInfoList []*mdictKeyBlockInfoItem
}
type mdictKeyBlockInfoItem struct {
firstKey string
firstKeySize int
lastKey string
lastKeySize int
keyBlockInfoIndex int
keyBlockCompressSize int64
keyBlockCompAccumulator int64
keyBlockDeCompressSize int64
keyBlockDeCompressAccumulator int64
}
type mdictKeyBlockData struct {
keyEntries []*MDictKeywordEntry
keyEntriesSize int64
recordBlockMetaStartOffset int64
}
type mdictRecordBlockMeta struct {
keyRecordMetaStartOffset int64
keyRecordMetaEndOffset int64
recordBlockNum int64
entriesNum int64
recordBlockInfoCompSize int64
recordBlockCompSize int64
}
type mdictRecordBlockInfo struct {
recordInfoList []*MdictRecordBlockInfoListItem
recordBlockInfoStartOffset int64
recordBlockInfoEndOffset int64
recordBlockDataStartOffset int64
}
// MdictRecordBlockInfoListItem holds information about a single record block.
type MdictRecordBlockInfoListItem struct {
compressSize int64
deCompressSize int64
compressAccumulatorOffset int64
deCompressAccumulatorOffset int64
}
/********************************
* public data type *
********************************/
// MDictKeywordEntry represents a single keyword entry from the key block.
type MDictKeywordEntry struct {
RecordStartOffset int64
RecordEndOffset int64
KeyWord string
KeyBlockIdx int64
}
// MDictKeywordIndex provides a detailed index for a keyword,
// linking it to its specific location within a record block.
type MDictKeywordIndex struct {
//encoding int
//encryptType int
KeywordEntry MDictKeywordEntry
RecordBlock MDictKeywordIndexRecordBlock
}
// MDictKeywordIndexRecordBlock contains information about the record block
// where a specific keyword's definition is stored.
type MDictKeywordIndexRecordBlock struct {
DataStartOffset int64
CompressSize int64
DeCompressSize int64
KeyWordPartStartOffset int64
KeyWordPartDataEndOffset int64
}