-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathLexer.php
More file actions
140 lines (122 loc) · 4.42 KB
/
Lexer.php
File metadata and controls
140 lines (122 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
<?php
/**
* TechDivision\Import\Adapter\Goodby\Lexer
*
* PHP version 7
*
* @author Tim Wagner <t.wagner@techdivision.com>
* @copyright 2016 TechDivision GmbH <info@techdivision.com>
* @license https://opensource.org/licenses/MIT
* @link https://github.com/techdivision/import
* @link http://www.techdivision.com
*/
namespace TechDivision\Import\Adapter\Goodby;
use Goodby\CSV\Import\Protocol\LexerInterface;
use Goodby\CSV\Import\Protocol\InterpreterInterface;
use Goodby\CSV\Import\Standard\LexerConfig;
use Goodby\CSV\Import\Standard\StreamFilter\ConvertMbstringEncoding;
/**
* Custom exporter implementation which resets row consistency on every import.
*
* @author Tim Wagner <t.wagner@techdivision.com>
* @copyright 2016 TechDivision GmbH <info@techdivision.com>
* @license https://opensource.org/licenses/MIT
* @link https://github.com/techdivision/import
* @link http://www.techdivision.com
*/
class Lexer implements LexerInterface
{
/**
* The exporter configuration.
*
* @var \Goodby\CSV\Import\Standard\LexerConfig
*/
private $config;
/**
* Initialize the instance with the passed configuration.
*
* @param \Goodby\CSV\Import\Standard\LexerConfig $config The lexer configuration
*/
public function __construct(?LexerConfig $config = null)
{
// query whether or not a configuration has been passed
if ($config instanceof LexerConfig) {
$this->config = $config;
} else {
$this->config = new LexerConfig();
}
// register the encoding filter
ConvertMbstringEncoding::register();
}
/**
* Returns the lexer configuration.
*
* @return \Goodby\CSV\Import\Standard\LexerConfig The configuration instance
*/
public function getConfig()
{
return $this->config;
}
/**
* Parse the passed CSV file.
*
* @param string $filename The filename to parse
* @param \Goodby\CSV\Import\Protocol\InterpreterInterface $interpreter The interpreter instance
*
* @return void
*/
public function parse($filename, InterpreterInterface $interpreter)
{
// for mac's office excel csv
@ini_set('auto_detect_line_endings', true);
// initialize the configuration
$delimiter = $this->config->getDelimiter();
$enclosure = $this->config->getEnclosure();
$escape = $this->config->getEscape();
$escape = empty($escape) ? "\0" : $escape;
$fromCharset = $this->config->getFromCharset();
$toCharset = $this->config->getToCharset();
$flags = $this->config->getFlags();
$ignoreHeader = $this->config->getIgnoreHeaderLine();
// query whether or not the charset has to be converted
if ($fromCharset === null) {
$url = $filename;
} else {
$url = ConvertMbstringEncoding::getFilterURL($filename, $fromCharset, $toCharset);
}
// initialize the CSV file object
$csv = new \SplFileObject($url);
$csv->setCsvControl($delimiter, $enclosure, $escape);
$csv->setFlags($flags);
// backup current locale
$originalLocale = setlocale(LC_ALL, '0');
setlocale(LC_ALL, 'en_US.UTF-8');
// http://en.wikipedia.org/wiki/Byte_order_mark#UTF-8
$bom = pack('CCC', 0xEF, 0xBB, 0xBF);
// process each line of the CSV file
foreach ($csv as $lineNumber => $line) {
if ($lineNumber == 0 && isset($line[0])) {
// remove windwos BOM if exists
if (substr($line[0], 0, 3) === $bom) {
$line[0] = substr($line[0], 3);
}
// Remove quotes in first row first cell
if (strpos($line[0], '"') !== false) {
$line[0] = str_replace('"', '', $line[0]);
}
}
if ($ignoreHeader && $lineNumber == 0
|| (count($line) === 1
&& ($line[0] === null || trim($line[0]) === '')
)
) {
continue;
}
$interpreter->interpret($line);
}
// reset locale
$localeArray = array();
parse_str(str_replace(';', '&', $originalLocale), $localeArray);
setlocale(LC_ALL, $localeArray);
}
}