-
-
Notifications
You must be signed in to change notification settings - Fork 253
Expand file tree
/
Copy pathSettingsExamples.java
More file actions
200 lines (159 loc) · 8.26 KB
/
SettingsExamples.java
File metadata and controls
200 lines (159 loc) · 8.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*******************************************************************************
* Copyright 2014 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.examples;
import com.univocity.parsers.common.processor.*;
import com.univocity.parsers.csv.*;
import com.univocity.parsers.fixed.*;
import org.testng.annotations.*;
import java.util.*;
public class SettingsExamples extends Example {
@Test
public void example001ColumnSelection() {
CsvParserSettings parserSettings = new CsvParserSettings();
//the file used in the example uses '\n' as the line separator sequence.
//the line separator sequence is defined here to ensure systems such as MacOS and Windows
//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
parserSettings.getFormat().setLineSeparator("\n");
//##CODE_START
// Here we select only the columns "Price", "Year" and "Make".
// The parser just skips the other fields
parserSettings.selectFields("Price", "Year", "Make");
// let's parse with these settings and print the parsed rows.
List<String[]> parsedRows = parseWithSettings(parserSettings);
//##CODE_END
printAndValidate(parsedRows);
}
@Test
public void example002ColumnSelectionWithNoReordering() {
CsvParserSettings parserSettings = new CsvParserSettings();
//the file used in the example uses '\n' as the line separator sequence.
//the line separator sequence is defined here to ensure systems such as MacOS and Windows
//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
parserSettings.getFormat().setLineSeparator("\n");
//##CODE_START
// Here we select only the columns "Price", "Year" and "Make".
// The parser just skips the other fields
parserSettings.selectFields("Price", "Year", "Make");
// Column reordering is enabled by default. When you disable it,
// all columns will be produced in the order they are defined in the file.
// Fields that were not selected will be null, as they are not processed by the parser
parserSettings.setColumnReorderingEnabled(false);
// Let's parse with these settings and print the parsed rows.
List<String[]> parsedRows = parseWithSettings(parserSettings);
//##CODE_END
printAndValidate(parsedRows);
}
@Test
public void example003ColumnSelectionByIndex() {
CsvParserSettings parserSettings = new CsvParserSettings();
//the file used in the example uses '\n' as the line separator sequence.
//the line separator sequence is defined here to ensure systems such as MacOS and Windows
//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
parserSettings.getFormat().setLineSeparator("\n");
//##CODE_START
// Here we select only the columns by their indexes.
// The parser just skips the values in other columns
parserSettings.selectIndexes(4, 0, 1);
// let's parse with these settings and print the parsed rows.
List<String[]> parsedRows = parseWithSettings(parserSettings);
//##CODE_END
printAndValidate(parsedRows);
}
@Test
public void example004LotsOfDifferentSettings() {
CsvParserSettings parserSettings = new CsvParserSettings();
//##CODE_START
//You can configure the parser to automatically detect what line separator sequence is in the input
parserSettings.setLineSeparatorDetectionEnabled(true);
// sets what is the default value to use when the parsed value is null
parserSettings.setNullValue("<NULL>");
// sets what is the default value to use when the parsed value is empty
parserSettings.setEmptyValue("<EMPTY>"); // for CSV only
// sets the headers of the parsed file. If the headers are set then 'setHeaderExtractionEnabled(true)'
// will make the parser simply ignore the first input row.
parserSettings.setHeaders("a", "b", "c", "d", "e");
// prints the columns in reverse order.
// NOTE: when fields are selected, all rows produced will have the exact same number of columns
parserSettings.selectFields("e", "d", "c", "b", "a");
// does not skip leading whitespaces
parserSettings.setIgnoreLeadingWhitespaces(false);
// does not skip trailing whitespaces
parserSettings.setIgnoreTrailingWhitespaces(false);
// reads a fixed number of records then stop and close any resources
parserSettings.setNumberOfRecordsToRead(9);
// does not skip empty lines
parserSettings.setSkipEmptyLines(false);
// sets the maximum number of characters to read in each column.
// The default is 4096 characters. You need this to avoid OutOfMemoryErrors in case a file
// does not have a valid format. In such cases the parser might just keep reading from the input
// until its end or the memory is exhausted. This sets a limit which avoids unwanted JVM crashes.
parserSettings.setMaxCharsPerColumn(100);
// for the same reasons as above, this sets a hard limit on how many columns an input row can have.
// The default is 512.
parserSettings.setMaxColumns(10);
// Sets the number of characters held by the parser's buffer at any given time.
parserSettings.setInputBufferSize(1000);
// Disables the separate thread that loads the input buffer. By default, the input is going to be loaded incrementally
// on a separate thread if the available processor number is greater than 1. Leave this enabled to get better performance
// when parsing big files (> 100 Mb).
parserSettings.setReadInputOnSeparateThread(false);
// let's parse with these settings and print the parsed rows.
List<String[]> parsedRows = parseWithSettings(parserSettings);
//##CODE_END
printAndValidate(parsedRows);
}
@Test
public void example005FixedWidthSettings() {
//##CODE_START
// For the sake of the example, we will not read the last 8 characters (for the Year column).
// We will also NOT set the padding character to '_' so the output makes more sense for reading
// and you can see what characters are being processed
FixedWidthParserSettings parserSettings = new FixedWidthParserSettings(new FixedWidthFields(4, 5, 40, 40 /*, 8*/));
//the file used in the example uses '\n' as the line separator sequence.
//the line separator sequence is defined here to ensure systems such as MacOS and Windows
//are able to process this file correctly (MacOS uses '\r'; and Windows uses '\r\n').
parserSettings.getFormat().setLineSeparator("\n");
// The fixed width parser settings has most of the settings for CSV.
// These are the only extra settings you need:
// If a row has more characters than what is defined, skip them until the end of the line.
parserSettings.setSkipTrailingCharsUntilNewline(true);
// If a record has less characters than what is expected and a new line is found,
// this record is considered parsed. Data in the next row will be parsed as a new record.
parserSettings.setRecordEndsOnNewline(true);
RowListProcessor rowProcessor = new RowListProcessor();
parserSettings.setProcessor(rowProcessor);
parserSettings.setHeaderExtractionEnabled(true);
FixedWidthParser parser = new FixedWidthParser(parserSettings);
parser.parse(getReader("/examples/example.txt"));
List<String[]> rows = rowProcessor.getRows();
//##CODE_END
printAndValidate(rows);
}
/**
* Parses the example input file (/examples/example.csv) with a given setting.
* @param parserSettings settings used to parse the example.csv file
* @return a list with all parsed rows.
*/
private List<String[]> parseWithSettings(CsvParserSettings parserSettings) {
RowListProcessor rowProcessor = new RowListProcessor();
parserSettings.setProcessor(rowProcessor);
parserSettings.setHeaderExtractionEnabled(true);
CsvParser parser = new CsvParser(parserSettings);
parser.parse(getReader("/examples/example.csv"));
List<String[]> rows = rowProcessor.getRows();
return rows;
}
}