diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs index 36f80ec90a95..d81f1afee8ee 100644 --- a/arrow-csv/src/reader/mod.rs +++ b/arrow-csv/src/reader/mod.rs @@ -215,7 +215,12 @@ impl InferredDataType { self.packed |= if string.starts_with('"') { 1 << 8 // Utf8 } else if let Some(m) = REGEX_SET.matches(string).into_iter().next() { - 1 << m + if m == 1 && string.len() >= 19 && string.parse::().is_err() { + // if overflow i64, fallback to utf8 + 1 << 8 + } else { + 1 << m + } } else { 1 << 8 // Utf8 } @@ -1819,6 +1824,8 @@ mod tests { infer_field_schema("2021-12-19T13:12:30.123456789"), DataType::Timestamp(TimeUnit::Nanosecond, None) ); + assert_eq!(infer_field_schema("–9223372036854775809"), DataType::Utf8); + assert_eq!(infer_field_schema("9223372036854775808"), DataType::Utf8); } #[test]