|
1 | 1 | import unittest |
2 | | -from davidkhala.data.format.avro import read, is_avro |
| 2 | + |
| 3 | +from pyarrow import table, array, int32, string, list_, float32 |
| 4 | + |
| 5 | +from davidkhala.data.format.avro import read, is_avro, write |
| 6 | +from davidkhala.data.format.parquet import Parquet |
| 7 | +from davidkhala.data.format.transform import Arrow2Avro |
3 | 8 |
|
4 | 9 |
|
5 | 10 | class AvroTestCase(unittest.TestCase): |
| 11 | + _path = 'fixtures/gcp-data-davidkhala.dbt_davidkhala.country_codes.avro' |
| 12 | + def setUp(self): |
| 13 | + parquet = Parquet('fixtures/gcp-data-davidkhala.dbt_davidkhala.country_codes.parquet') |
| 14 | + |
| 15 | + t = Arrow2Avro(parquet.read_batch()) |
| 16 | + with open(self._path,'wb' ) as output_stream: |
| 17 | + write(output_stream, t.schema, t.records) |
| 18 | + |
| 19 | + |
| 20 | + def test_transform(self): |
| 21 | + sample_table = table({ |
| 22 | + "id": array([1, 2, 3], type=int32()), |
| 23 | + "name": array(["Alice", "Bob", "Charlie"], type=string()), |
| 24 | + "scores": array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], type=list_(float32())) |
| 25 | + }) |
| 26 | + t= Arrow2Avro(sample_table) |
| 27 | + |
| 28 | + with open("artifacts/dummy.avro", "wb") as out: |
| 29 | + write(out, t.schema, t.records) |
| 30 | + |
6 | 31 | def test_read(self): |
7 | | - _path = 'fixtures/gcp-data-davidkhala.dbt_davidkhala.country_codes.avro' |
8 | | - self.assertTrue(is_avro(_path)) |
9 | | - with open(_path, 'rb') as file: |
| 32 | + self.assertTrue(is_avro(self._path)) |
| 33 | + with open(self._path, 'rb') as file: |
10 | 34 | for record in read(file): |
11 | 35 | print(record) |
12 | 36 |
|
|
0 commit comments