-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathupload.go
More file actions
249 lines (235 loc) · 6.6 KB
/
upload.go
File metadata and controls
249 lines (235 loc) · 6.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
// These functions will upload data to a database
package dbIO
import (
"bufio"
"bytes"
"database/sql"
"fmt"
"math"
"os"
"strconv"
"strings"
"unicode/utf8"
)
// Insert executes the given INSERT command
func (d *DBIO) Insert(table, command string) error {
var err error
cmd, err := d.DB.Prepare(command)
if err != nil {
d.logger.Printf("[Error] Formatting command for upload to %s: %v\n", table, err)
}
_, err = cmd.Exec()
cmd.Close()
if err != nil {
d.logger.Printf("[Error] Uploading to %s: %v\n", table, err)
}
return err
}
func getDenominator(list [][]string) int {
// Returns denominator for subsetting upload slice (size in bytes / 16Mb)
max := 10000000.0
size := 0
for _, i := range list {
for _, j := range i {
size += len([]byte(j))
}
}
return int(math.Ceil(float64(size*8) / max))
}
// UploadSlice formats two-dimensional string slice for upload to database and splits uploads into chunks if it exceeds SQL size limit.
func (d *DBIO) UploadSlice(table string, values [][]string) error {
var err error
if len(values) > 0 {
// Upload in chunks
idx := len(values) / getDenominator(values)
var start, end int
for start < len(values) {
// Advance indeces
end = start + idx
if end > len(values) {
// Get last less than idx rows
end = len(values)
}
vals, _ := FormatSlice(values[start:end])
err = d.Insert(table, fmt.Sprintf("INSERT INTO %s (%s) VALUES %s;", table, d.Columns[table], vals))
if err == nil {
fmt.Printf("\r\tUploaded %d of %d rows to %s.", end, len(values), table)
} else {
break
}
start += idx
}
fmt.Println()
}
return err
}
// UpdateDB adds new rows to table. Values must be formatted using FormatMap or FormatSlice.
func (d *DBIO) UpdateDB(table, values string, l int) int {
cmd := fmt.Sprintf("INSERT INTO %s (%s) VALUES %s;", table, d.Columns[table], values)
err := d.Insert(table, cmd)
if err != nil {
return 0
}
fmt.Printf("\tUploaded %d rows to %s.\n", l, table)
return 1
}
// Returns value with any reserved characters escaped and standarizes NAs.
func escapeChars(v string) string {
chars := []string{"'", "\"", "_"}
na := []string{" na ", " Na ", "N/A"}
// Reset backslashes to dashes
v = strings.Replace(v, `\`, "-", -1)
for _, i := range chars {
idx := 0
for strings.Contains(v[idx:], i) == true {
// Escape each occurance of a character
ind := strings.Index(v[idx:], i)
idx = idx + ind
v = v[:idx] + `\` + v[idx:]
idx++
idx++
}
}
for _, i := range na {
// Standardize NA values
if strings.Contains(v, i) == true {
v = strings.Replace(v, i, "NA", -1)
} else if strings.TrimSpace(i) == strings.TrimSpace(v) {
v = "NA"
}
}
return v
}
// Returns valid string for upload to database.
func validateString(v string) string {
if _, err := strconv.Atoi(v); err != nil {
// Avoid assigning NA to numerical value
if utf8.ValidString(v) == false || strings.Contains(v, `\xEF\xBF\xBD`) == true {
v = "NA"
}
}
return escapeChars(v)
}
// FormatMap converts a map of string slices to a string formatted with parentheses, commas, and appostrophe's where needed. Returns the number of rows formatted.
func FormatMap(data map[string][]string) (string, int) {
buffer := bytes.NewBufferString("")
first := true
count := 0
for _, val := range data {
f := true
if first == false {
// Add sepearating comma
buffer.WriteByte(',')
}
buffer.WriteByte('(')
for _, v := range val {
if f == false {
buffer.WriteByte(',')
}
// Wrap in apostrophes to preserve spaces and reserved characters
buffer.WriteByte('\'')
buffer.WriteString(validateString(v))
buffer.WriteByte('\'')
f = false
}
buffer.WriteByte(')')
first = false
count++
}
return buffer.String(), count
}
// FormatSlice converts a two-dimensional string slice to a string formatted with parentheses, commas, and appostrophe's where needed. Returns the number of rows formatted.
func FormatSlice(data [][]string) (string, int) {
// Organizes input data into n rows for upload
buffer := bytes.NewBufferString("")
count := 0
for idx, row := range data {
if idx != 0 {
buffer.WriteByte(',')
}
buffer.WriteByte('(')
for i, v := range row {
if i != 0 {
buffer.WriteByte(',')
}
// Wrap in apostrophes to preserve spaces and reserved characters
buffer.WriteByte('\'')
buffer.WriteString(validateString(v))
buffer.WriteByte('\'')
}
buffer.WriteByte(')')
count++
}
return buffer.String(), count
}
// Converts sql query result to map of strings.
func (d *DBIO) columnMap(rows *sql.Rows) {
columns, _ := rows.Columns()
count := len(columns)
values := make([]interface{}, count)
pointers := make([]interface{}, count)
for rows.Next() {
for i := range columns {
pointers[i] = &values[i]
}
// Maps items to values via pointers
rows.Scan(pointers...)
// Use Sprintf to convert interface to string
k := fmt.Sprintf("%s", values[0])
v := fmt.Sprintf("%s", values[1])
d.Columns[k] = v
}
}
// GetTableColumns extracts table and column names from the database and stores them in the Columns map.
func (d *DBIO) GetTableColumns() {
d.Columns = make(map[string]string)
cmd := `SELECT table_name,GROUP_CONCAT(column_name ORDER BY ordinal_position) FROM information_schema.columns
WHERE table_schema = DATABASE() GROUP BY table_name ORDER BY table_name;`
rows, err := d.DB.Query(cmd)
if err != nil {
d.logger.Printf("[ERROR] Extracting table and column names: %v\n\n", err)
}
defer rows.Close()
d.columnMap(rows)
}
// ReadColumns builds a map of column statements with types from infile. See README for infile formatting.
func (d *DBIO) ReadColumns(infile string) []string {
var ret []string
var table string
f, err := os.Open(infile)
if err != nil {
d.logger.Fatalf("[ERROR] Reading %s: %v\n\n", infile, err)
}
defer f.Close()
input := bufio.NewScanner(f)
for input.Scan() {
line := strings.TrimSpace(string(input.Text()))
if len(line) >= 1 {
table += line + " "
if strings.Contains(line, ";") {
ret = append(ret, strings.TrimSpace(table))
table = ""
}
}
}
return ret
}
// NewTables executes new table commands from infile. See README for infile formatting.
func (d *DBIO) NewTables(infile string) {
fmt.Println("\n\tInitializing new tables...")
for _, i := range d.ReadColumns(infile) {
// Table name is last word before (
cmd, err := d.DB.Prepare(i)
if err != nil {
d.logger.Fatalf("[Error] Formatting command %s. %v\n\n", i, err)
}
_, err = cmd.Exec()
if err != nil {
d.logger.Println(i)
d.logger.Fatalf("[Error] Executing %s. %v\n\n", i, err)
} else {
d.logger.Printf("Successfully executed %s...\n", i[:20])
}
}
d.GetTableColumns()
}