diff --git a/package-lock.json b/package-lock.json index d6c490d9035..218918db832 100644 --- a/package-lock.json +++ b/package-lock.json @@ -49700,10 +49700,13 @@ "@types/mocha": "^9.0.0", "@types/react": "^17.0.5", "@types/sinon-chai": "^3.2.5", + "autoevals": "^0.0.130", + "braintrust": "^0.2.4", "chai": "^4.3.6", "depcheck": "^1.4.1", "electron-mocha": "^12.2.0", "mocha": "^10.2.0", + "mongodb-ns": "^3.1.0", "nyc": "^15.1.0", "p-queue": "^7.4.1", "sinon": "^9.2.3", @@ -49808,6 +49811,13 @@ "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==", "dev": true }, + "packages/compass-generative-ai/node_modules/mongodb-ns": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mongodb-ns/-/mongodb-ns-3.1.0.tgz", + "integrity": "sha512-oe5gL723bQ+C4purCXIQTeDlAfendC2hs4KqlPPat8DozQZU6vtg3TgIJOJKVpcMgmAVyt39FRolOJh4htnsTg==", + "dev": true, + "license": "Apache-2.0" + }, "packages/compass-generative-ai/node_modules/p-queue": { "version": "7.4.1", "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.4.1.tgz", @@ -62656,6 +62666,8 @@ "@types/react": "^17.0.5", "@types/sinon-chai": "^3.2.5", "ai": "^6.0.0-beta.151", + "autoevals": "^0.0.130", + "braintrust": "^0.2.4", "bson": "^6.10.4", "chai": "^4.3.6", "compass-preferences-model": "^2.66.3", @@ -62663,6 +62675,7 @@ "electron-mocha": "^12.2.0", "mocha": "^10.2.0", "mongodb": "^6.19.0", + "mongodb-ns": "^3.1.0", "mongodb-query-parser": "^4.6.0", "mongodb-schema": "^12.6.3", "nyc": "^15.1.0", @@ -62729,6 +62742,12 @@ "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==", "dev": true }, + "mongodb-ns": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mongodb-ns/-/mongodb-ns-3.1.0.tgz", + "integrity": "sha512-oe5gL723bQ+C4purCXIQTeDlAfendC2hs4KqlPPat8DozQZU6vtg3TgIJOJKVpcMgmAVyt39FRolOJh4htnsTg==", + "dev": true + }, "p-queue": { "version": "7.4.1", "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.4.1.tgz", diff --git a/packages/compass-generative-ai/package.json b/packages/compass-generative-ai/package.json index 6a723500b9f..c9f07414b67 100644 --- a/packages/compass-generative-ai/package.json +++ b/packages/compass-generative-ai/package.json @@ -49,7 +49,8 @@ "test-watch": "npm run test -- --watch", "test-ci": "npm run test-cov", "test-ci-electron": "npm run test-electron", - "reformat": "npm run eslint . -- --fix && npm run prettier -- --write ." + "reformat": "npm run eslint . -- --fix && npm run prettier -- --write .", + "eval": "braintrust eval tests/evals" }, "dependencies": { "@mongodb-js/atlas-service": "^0.73.0", @@ -84,10 +85,13 @@ "@types/mocha": "^9.0.0", "@types/react": "^17.0.5", "@types/sinon-chai": "^3.2.5", + "autoevals": "^0.0.130", + "braintrust": "^0.2.4", "chai": "^4.3.6", "depcheck": "^1.4.1", "electron-mocha": "^12.2.0", "mocha": "^10.2.0", + "mongodb-ns": "^3.1.0", "nyc": "^15.1.0", "p-queue": "^7.4.1", "sinon": "^9.2.3", diff --git a/packages/compass-generative-ai/tests/evals/chatbot-api.ts b/packages/compass-generative-ai/tests/evals/chatbot-api.ts new file mode 100644 index 00000000000..0dc4e271191 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/chatbot-api.ts @@ -0,0 +1,42 @@ +import { streamText } from 'ai'; +import { createOpenAI } from '@ai-sdk/openai'; +import type { + ConversationEvalCaseInput, + ConversationTaskOutput, +} from './types'; + +export async function makeChatbotCall( + input: ConversationEvalCaseInput +): Promise { + const openai = createOpenAI({ + baseURL: + process.env.COMPASS_ASSISTANT_BASE_URL_OVERRIDE ?? + 'https://eval.knowledge-dev.mongodb.com/api/v1', + apiKey: '', + headers: { + 'X-Request-Origin': 'compass-gen-ai-braintrust', + 'User-Agent': 'mongodb-compass/x.x.x', + }, + }); + const result = streamText({ + model: openai.responses('mongodb-slim-latest'), + temperature: undefined, + prompt: input.messages, + providerOptions: { + openai: { + instructions: input.instructions.content, + store: false, + }, + }, + }); + + const chunks: string[] = []; + for await (const chunk of result.toUIMessageStream()) { + if (chunk.type === 'text-delta') { + chunks.push(chunk.delta); + } + } + return { + messages: [{ content: chunks.join('') }], + }; +} diff --git a/packages/compass-generative-ai/tests/evals/fixtures/airbnb.listingsAndReviews.ts b/packages/compass-generative-ai/tests/evals/fixtures/airbnb.listingsAndReviews.ts new file mode 100644 index 00000000000..7810ac92e72 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/fixtures/airbnb.listingsAndReviews.ts @@ -0,0 +1,1150 @@ +export default [ + { + _id: '10117617', + listing_url: 'https://www.airbnb.com/rooms/10117617', + name: 'A Casa Alegre é um apartamento T1.', + notes: '', + property_type: 'Apartment', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '7', + maximum_nights: '180', + cancellation_policy: 'moderate', + last_scraped: { + $date: '2019-02-16T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-02-16T05:00:00.000Z', + }, + first_review: { + $date: '2016-04-19T04:00:00.000Z', + }, + last_review: { + $date: '2017-08-27T04:00:00.000Z', + }, + accommodates: 2, + bedrooms: 1, + beds: 1, + number_of_reviews: 12, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'TV', + 'Kitchen', + 'Elevator', + 'Buzzer/wireless intercom', + 'Heating', + 'Family/kid friendly', + 'Washer', + 'First aid kit', + 'Safety card', + 'Fire extinguisher', + 'Essentials', + 'Shampoo', + 'Hangers', + 'Iron', + 'Laptop friendly workspace', + 'translation missing: en.hosting_amenity_49', + 'Bathtub', + 'Beachfront', + ], + price: { + $numberDecimal: '40.00', + }, + security_deposit: { + $numberDecimal: '250.00', + }, + cleaning_fee: { + $numberDecimal: '15.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '2', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/8845f3f6-9775-4c14-9486-fe0997611bda.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '51920973', + host_url: 'https://www.airbnb.com/users/show/51920973', + host_name: 'Manuela', + host_location: 'Porto, Porto District, Portugal', + host_about: + 'Sou uma pessoa que gosta de viajar, conhecer museus, visitar exposições e cinema.\r\nTambém gosto de passear pelas zonas históricas das cidades e contemplar as suas edificações.', + host_response_time: 'within a day', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/bb526001-78b2-472d-9663-c3d02a27f4ce.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/bb526001-78b2-472d-9663-c3d02a27f4ce.jpg?aki_policy=profile_x_medium', + host_neighbourhood: '', + host_response_rate: 100, + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: true, + host_listings_count: 1, + host_total_listings_count: 1, + host_verifications: [ + 'email', + 'phone', + 'reviews', + 'jumio', + 'government_id', + ], + }, + address: { + street: 'Vila do Conde, Porto, Portugal', + suburb: '', + government_area: 'Vila do Conde', + market: 'Porto', + country: 'Portugal', + country_code: 'PT', + location: { + type: 'Point', + coordinates: [-8.75383, 41.3596], + is_location_exact: false, + }, + }, + availability: { + availability_30: 0, + availability_60: 0, + availability_90: 0, + availability_365: 46, + }, + review_scores: { + review_scores_accuracy: 10, + review_scores_cleanliness: 10, + review_scores_checkin: 10, + review_scores_communication: 10, + review_scores_location: 9, + review_scores_value: 10, + review_scores_rating: 96, + }, + }, + { + _id: '10108388', + listing_url: 'https://www.airbnb.com/rooms/10108388', + name: 'Sydney Hyde Park City Apartment (checkin from 6am)', + notes: + 'IMPORTANT: Our apartment is privately owned and serviced. It is not part of the hotel that is operated from within the building. Internet: Our internet connection is wifi and dedicated to our apartment. So there is no sharing with other guests and no need to pay additional fees for internet usage.', + property_type: 'Apartment', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '2', + maximum_nights: '30', + cancellation_policy: 'moderate', + last_scraped: { + $date: '2019-03-07T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-03-07T05:00:00.000Z', + }, + first_review: { + $date: '2016-06-30T04:00:00.000Z', + }, + last_review: { + $date: '2019-03-06T05:00:00.000Z', + }, + accommodates: 2, + bedrooms: 1, + beds: 1, + number_of_reviews: 109, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'TV', + 'Wifi', + 'Air conditioning', + 'Pool', + 'Kitchen', + 'Gym', + 'Elevator', + 'Heating', + 'Washer', + 'Dryer', + 'Smoke detector', + 'Carbon monoxide detector', + 'First aid kit', + 'Fire extinguisher', + 'Essentials', + 'Shampoo', + 'Hangers', + 'Hair dryer', + 'Iron', + 'Laptop friendly workspace', + 'translation missing: en.hosting_amenity_49', + 'translation missing: en.hosting_amenity_50', + 'Self check-in', + 'Building staff', + 'Private living room', + 'Hot water', + 'Bed linens', + 'Extra pillows and blankets', + 'Microwave', + 'Coffee maker', + 'Refrigerator', + 'Dishwasher', + 'Dishes and silverware', + 'Cooking basics', + 'Oven', + 'Stove', + 'Patio or balcony', + 'Cleaning before checkout', + 'Step-free access', + 'Flat path to front door', + 'Well-lit path to entrance', + 'Step-free access', + ], + price: { + $numberDecimal: '185.00', + }, + security_deposit: { + $numberDecimal: '800.00', + }, + cleaning_fee: { + $numberDecimal: '120.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/a2e7de4a-6349-4515-acd3-c788d6f2abcf.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '16187044', + host_url: 'https://www.airbnb.com/users/show/16187044', + host_name: 'Desireé', + host_location: 'Australia', + host_about: + "At the centre of my life is my beautiful family...home is wherever my family is.\r\n\r\nI enjoy filling my life with positive experiences and love to travel and experience new cultures, to meet new people, to read and just enjoy the beauty and wonders of the 'littlest' things in the world around me and my family. \r\n", + host_response_time: 'within an hour', + host_thumbnail_url: + 'https://a0.muscache.com/im/users/16187044/profile_pic/1402737505/original.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/users/16187044/profile_pic/1402737505/original.jpg?aki_policy=profile_x_medium', + host_neighbourhood: 'Darlinghurst', + host_response_rate: 100, + host_is_superhost: true, + host_has_profile_pic: true, + host_identity_verified: true, + host_listings_count: 1, + host_total_listings_count: 1, + host_verifications: [ + 'email', + 'phone', + 'reviews', + 'jumio', + 'government_id', + ], + }, + address: { + street: 'Darlinghurst, NSW, Australia', + suburb: 'Darlinghurst', + government_area: 'Sydney', + market: 'Sydney', + country: 'Australia', + country_code: 'AU', + location: { + type: 'Point', + coordinates: [151.21346, -33.87603], + is_location_exact: false, + }, + }, + availability: { + availability_30: 5, + availability_60: 16, + availability_90: 35, + availability_365: 265, + }, + review_scores: { + review_scores_accuracy: 10, + review_scores_cleanliness: 10, + review_scores_checkin: 10, + review_scores_communication: 10, + review_scores_location: 10, + review_scores_value: 10, + review_scores_rating: 100, + }, + }, + { + _id: '10057826', + listing_url: 'https://www.airbnb.com/rooms/10057826', + name: 'Deluxe Loft Suite', + notes: '', + property_type: 'Apartment', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '3', + maximum_nights: '1125', + cancellation_policy: 'strict_14_with_grace_period', + last_scraped: { + $date: '2019-03-07T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-03-07T05:00:00.000Z', + }, + first_review: { + $date: '2016-01-03T05:00:00.000Z', + }, + last_review: { + $date: '2018-02-18T05:00:00.000Z', + }, + accommodates: 4, + bedrooms: 0, + beds: 2, + number_of_reviews: 5, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'TV', + 'Cable TV', + 'Internet', + 'Wifi', + 'Air conditioning', + 'Kitchen', + 'Doorman', + 'Gym', + 'Elevator', + 'Heating', + 'Family/kid friendly', + 'Washer', + 'Dryer', + 'Smoke detector', + 'Carbon monoxide detector', + 'First aid kit', + 'Fire extinguisher', + 'Essentials', + 'Shampoo', + '24-hour check-in', + 'Hangers', + 'Hair dryer', + 'Iron', + ], + price: { + $numberDecimal: '205.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/40ace1e3-4917-46e5-994f-30a5965f5159.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '47554473', + host_url: 'https://www.airbnb.com/users/show/47554473', + host_name: 'Mae', + host_location: 'US', + host_about: '', + host_response_time: 'within a few hours', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/c680ce22-d6ec-4b00-8ef3-b5b7fc0d76f2.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/c680ce22-d6ec-4b00-8ef3-b5b7fc0d76f2.jpg?aki_policy=profile_x_medium', + host_neighbourhood: 'Greenpoint', + host_response_rate: 100, + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: false, + host_listings_count: 13, + host_total_listings_count: 13, + host_verifications: [ + 'email', + 'phone', + 'google', + 'reviews', + 'jumio', + 'government_id', + ], + }, + address: { + street: 'Brooklyn, NY, United States', + suburb: 'Greenpoint', + government_area: 'Greenpoint', + market: 'New York', + country: 'United States', + country_code: 'US', + location: { + type: 'Point', + coordinates: [-73.94472, 40.72778], + is_location_exact: true, + }, + }, + availability: { + availability_30: 30, + availability_60: 31, + availability_90: 31, + availability_365: 243, + }, + review_scores: { + review_scores_accuracy: 9, + review_scores_cleanliness: 10, + review_scores_checkin: 10, + review_scores_communication: 8, + review_scores_location: 9, + review_scores_value: 9, + review_scores_rating: 88, + }, + }, + { + _id: '10133350', + listing_url: 'https://www.airbnb.com/rooms/10133350', + name: '2 bedroom Upper east side', + notes: '', + property_type: 'Apartment', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '2', + maximum_nights: '7', + cancellation_policy: 'strict_14_with_grace_period', + last_scraped: { + $date: '2019-03-06T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-03-06T05:00:00.000Z', + }, + first_review: { + $date: '2016-05-28T04:00:00.000Z', + }, + last_review: { + $date: '2017-08-19T04:00:00.000Z', + }, + accommodates: 5, + bedrooms: 2, + beds: 2, + number_of_reviews: 9, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'TV', + 'Cable TV', + 'Internet', + 'Wifi', + 'Air conditioning', + 'Kitchen', + 'Pets allowed', + 'Pets live on this property', + 'Buzzer/wireless intercom', + 'Heating', + 'Family/kid friendly', + 'Essentials', + 'Shampoo', + 'Hair dryer', + 'Iron', + 'Laptop friendly workspace', + ], + price: { + $numberDecimal: '275.00', + }, + security_deposit: { + $numberDecimal: '0.00', + }, + cleaning_fee: { + $numberDecimal: '35.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/d9886a79-0633-4ab4-b03a-7686bab13d71.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '52004369', + host_url: 'https://www.airbnb.com/users/show/52004369', + host_name: 'Chelsea', + host_location: 'Sea Cliff, New York, United States', + host_about: '', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/4d361f57-f65e-4885-b934-0e92eebf288d.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/4d361f57-f65e-4885-b934-0e92eebf288d.jpg?aki_policy=profile_x_medium', + host_neighbourhood: '', + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: true, + host_listings_count: 2, + host_total_listings_count: 2, + host_verifications: [ + 'email', + 'phone', + 'reviews', + 'jumio', + 'offline_government_id', + 'government_id', + ], + }, + address: { + street: 'New York, NY, United States', + suburb: 'Upper East Side', + government_area: 'Upper East Side', + market: 'New York', + country: 'United States', + country_code: 'US', + location: { + type: 'Point', + coordinates: [-73.95854, 40.7664], + is_location_exact: false, + }, + }, + availability: { + availability_30: 0, + availability_60: 0, + availability_90: 0, + availability_365: 0, + }, + review_scores: { + review_scores_accuracy: 9, + review_scores_cleanliness: 8, + review_scores_checkin: 10, + review_scores_communication: 9, + review_scores_location: 9, + review_scores_value: 9, + review_scores_rating: 90, + }, + }, + { + _id: '10133554', + listing_url: 'https://www.airbnb.com/rooms/10133554', + name: 'Double and triple rooms Blue mosque', + notes: '', + property_type: 'Bed and breakfast', + room_type: 'Private room', + bed_type: 'Real Bed', + minimum_nights: '1', + maximum_nights: '1125', + cancellation_policy: 'moderate', + last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + first_review: { + $date: '2017-05-04T04:00:00.000Z', + }, + last_review: { + $date: '2018-05-07T04:00:00.000Z', + }, + accommodates: 3, + bedrooms: 1, + beds: 2, + number_of_reviews: 29, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'Internet', + 'Wifi', + 'Air conditioning', + 'Free parking on premises', + 'Smoking allowed', + 'Heating', + 'Family/kid friendly', + 'Suitable for events', + 'Washer', + 'Dryer', + 'Fire extinguisher', + 'Essentials', + 'Shampoo', + 'Hangers', + 'Hair dryer', + 'Iron', + 'Laptop friendly workspace', + 'Self check-in', + 'Building staff', + ], + price: { + $numberDecimal: '121.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/68de30b5-ece5-42ab-8152-c1834d5e25fd.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '52004703', + host_url: 'https://www.airbnb.com/users/show/52004703', + host_name: 'Mehmet Emin', + host_location: 'Istanbul, İstanbul, Turkey', + host_about: '', + host_response_time: 'within a few hours', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/user/4cb6be34-659b-42cc-a93d-77a5d3501e7a.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/user/4cb6be34-659b-42cc-a93d-77a5d3501e7a.jpg?aki_policy=profile_x_medium', + host_neighbourhood: '', + host_response_rate: 100, + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: true, + host_listings_count: 2, + host_total_listings_count: 2, + host_verifications: [ + 'email', + 'phone', + 'facebook', + 'reviews', + 'jumio', + 'offline_government_id', + 'government_id', + ], + }, + address: { + street: 'Fatih , İstanbul, Turkey', + suburb: 'Fatih', + government_area: 'Fatih', + market: 'Istanbul', + country: 'Turkey', + country_code: 'TR', + location: { + type: 'Point', + coordinates: [28.98009, 41.0062], + is_location_exact: false, + }, + }, + availability: { + availability_30: 30, + availability_60: 60, + availability_90: 90, + availability_365: 365, + }, + review_scores: { + review_scores_accuracy: 9, + review_scores_cleanliness: 9, + review_scores_checkin: 10, + review_scores_communication: 10, + review_scores_location: 10, + review_scores_value: 9, + review_scores_rating: 92, + }, + }, + { + _id: '10115921', + listing_url: 'https://www.airbnb.com/rooms/10115921', + name: 'GOLF ROYAL RESİDENCE TAXİM(1+1):3', + notes: '', + property_type: 'Serviced apartment', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '1', + maximum_nights: '1125', + cancellation_policy: 'strict_14_with_grace_period', + last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + first_review: { + $date: '2016-02-01T05:00:00.000Z', + }, + last_review: { + $date: '2017-08-07T04:00:00.000Z', + }, + accommodates: 4, + bedrooms: 2, + beds: 4, + number_of_reviews: 3, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'TV', + 'Cable TV', + 'Internet', + 'Wifi', + 'Air conditioning', + 'Wheelchair accessible', + 'Kitchen', + 'Paid parking off premises', + 'Smoking allowed', + 'Doorman', + 'Elevator', + 'Buzzer/wireless intercom', + 'Heating', + 'Family/kid friendly', + 'Suitable for events', + 'Dryer', + 'Smoke detector', + 'Carbon monoxide detector', + 'First aid kit', + 'Safety card', + 'Fire extinguisher', + 'Essentials', + 'Shampoo', + '24-hour check-in', + 'Hangers', + 'Hair dryer', + 'Iron', + 'Laptop friendly workspace', + 'Self check-in', + 'Building staff', + 'Crib', + 'Hot water', + 'Luggage dropoff allowed', + 'Long term stays allowed', + ], + price: { + $numberDecimal: '838.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/fbdaf067-9682-48a6-9838-f51589d4791a.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '51471538', + host_url: 'https://www.airbnb.com/users/show/51471538', + host_name: 'Ahmet', + host_location: 'Istanbul, İstanbul, Turkey', + host_about: '', + host_response_time: 'within an hour', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/user/d8c830d0-16da-455c-818a-790864132e0a.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/user/d8c830d0-16da-455c-818a-790864132e0a.jpg?aki_policy=profile_x_medium', + host_neighbourhood: 'Şişli', + host_response_rate: 100, + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: false, + host_listings_count: 16, + host_total_listings_count: 16, + host_verifications: ['email', 'phone', 'reviews'], + }, + address: { + street: 'Şişli, İstanbul, Turkey', + suburb: 'Şişli', + government_area: 'Sisli', + market: 'Istanbul', + country: 'Turkey', + country_code: 'TR', + location: { + type: 'Point', + coordinates: [28.98713, 41.04841], + is_location_exact: false, + }, + }, + availability: { + availability_30: 30, + availability_60: 60, + availability_90: 90, + availability_365: 365, + }, + review_scores: { + review_scores_accuracy: 7, + review_scores_cleanliness: 7, + review_scores_checkin: 8, + review_scores_communication: 8, + review_scores_location: 10, + review_scores_value: 7, + review_scores_rating: 67, + }, + }, + { + _id: '10116256', + listing_url: 'https://www.airbnb.com/rooms/10116256', + name: 'GOLF ROYAL RESIDENCE SUİTES(2+1)-2', + notes: '', + property_type: 'Serviced apartment', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '1', + maximum_nights: '1125', + cancellation_policy: 'moderate', + last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + accommodates: 6, + bedrooms: 2, + beds: 5, + number_of_reviews: 0, + bathrooms: { + $numberDecimal: '2.0', + }, + amenities: [ + 'TV', + 'Internet', + 'Wifi', + 'Air conditioning', + 'Wheelchair accessible', + 'Kitchen', + 'Paid parking off premises', + 'Smoking allowed', + 'Doorman', + 'Elevator', + 'Buzzer/wireless intercom', + 'Heating', + 'Family/kid friendly', + 'Suitable for events', + 'Washer', + 'Dryer', + 'Smoke detector', + 'Carbon monoxide detector', + 'Fire extinguisher', + 'Essentials', + 'Shampoo', + '24-hour check-in', + 'Hangers', + 'Hair dryer', + 'Iron', + 'Laptop friendly workspace', + 'Self check-in', + 'Building staff', + 'Hot water', + 'Luggage dropoff allowed', + 'Long term stays allowed', + ], + price: { + $numberDecimal: '997.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/79955df9-923e-44ee-bc3c-5e88041a8c53.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '51471538', + host_url: 'https://www.airbnb.com/users/show/51471538', + host_name: 'Ahmet', + host_location: 'Istanbul, İstanbul, Turkey', + host_about: '', + host_response_time: 'within an hour', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/user/d8c830d0-16da-455c-818a-790864132e0a.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/user/d8c830d0-16da-455c-818a-790864132e0a.jpg?aki_policy=profile_x_medium', + host_neighbourhood: 'Şişli', + host_response_rate: 100, + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: false, + host_listings_count: 16, + host_total_listings_count: 16, + host_verifications: ['email', 'phone', 'reviews'], + }, + address: { + street: 'Şişli, İstanbul, Turkey', + suburb: 'Şişli', + government_area: 'Sisli', + market: 'Istanbul', + country: 'Turkey', + country_code: 'TR', + location: { + type: 'Point', + coordinates: [28.98818, 41.04772], + is_location_exact: false, + }, + }, + availability: { + availability_30: 30, + availability_60: 60, + availability_90: 90, + availability_365: 365, + }, + review_scores: {}, + }, + { + _id: '10047964', + listing_url: 'https://www.airbnb.com/rooms/10047964', + name: 'Charming Flat in Downtown Moda', + notes: '', + property_type: 'House', + room_type: 'Entire home/apt', + bed_type: 'Real Bed', + minimum_nights: '2', + maximum_nights: '1125', + cancellation_policy: 'flexible', + last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-02-18T05:00:00.000Z', + }, + first_review: { + $date: '2016-04-02T04:00:00.000Z', + }, + last_review: { + $date: '2016-04-02T04:00:00.000Z', + }, + accommodates: 6, + bedrooms: 2, + beds: 6, + number_of_reviews: 1, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'TV', + 'Cable TV', + 'Internet', + 'Wifi', + 'Kitchen', + 'Free parking on premises', + 'Pets allowed', + 'Pets live on this property', + 'Cat(s)', + 'Heating', + 'Family/kid friendly', + 'Washer', + 'Essentials', + 'Shampoo', + '24-hour check-in', + 'Hangers', + 'Hair dryer', + 'Iron', + 'Laptop friendly workspace', + ], + price: { + $numberDecimal: '527.00', + }, + cleaning_fee: { + $numberDecimal: '211.00', + }, + extra_people: { + $numberDecimal: '211.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/231120b6-e6e5-4514-93cd-53722ac67de1.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '1241644', + host_url: 'https://www.airbnb.com/users/show/1241644', + host_name: 'Zeynep', + host_location: 'Istanbul, Istanbul, Turkey', + host_about: 'Z.', + host_thumbnail_url: + 'https://a0.muscache.com/im/users/1241644/profile_pic/1426581715/original.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/users/1241644/profile_pic/1426581715/original.jpg?aki_policy=profile_x_medium', + host_neighbourhood: 'Moda', + host_is_superhost: false, + host_has_profile_pic: true, + host_identity_verified: true, + host_listings_count: 2, + host_total_listings_count: 2, + host_verifications: [ + 'email', + 'phone', + 'facebook', + 'reviews', + 'jumio', + 'government_id', + ], + }, + address: { + street: 'Kadıköy, İstanbul, Turkey', + suburb: 'Moda', + government_area: 'Kadikoy', + market: 'Istanbul', + country: 'Turkey', + country_code: 'TR', + location: { + type: 'Point', + coordinates: [29.03133, 40.98585], + is_location_exact: true, + }, + }, + availability: { + availability_30: 27, + availability_60: 57, + availability_90: 87, + availability_365: 362, + }, + review_scores: { + review_scores_accuracy: 10, + review_scores_cleanliness: 10, + review_scores_checkin: 10, + review_scores_communication: 10, + review_scores_location: 10, + review_scores_value: 10, + review_scores_rating: 100, + }, + }, + { + _id: '1003530', + listing_url: 'https://www.airbnb.com/rooms/1003530', + name: 'New York City - Upper West Side Apt', + notes: + 'My cat, Samantha, are in and out during the summer. The apt is layed out in such a way that each bedroom is very private.', + property_type: 'Apartment', + room_type: 'Private room', + bed_type: 'Real Bed', + minimum_nights: '12', + maximum_nights: '360', + cancellation_policy: 'strict_14_with_grace_period', + last_scraped: { + $date: '2019-03-07T05:00:00.000Z', + }, + calendar_last_scraped: { + $date: '2019-03-07T05:00:00.000Z', + }, + first_review: { + $date: '2013-04-29T04:00:00.000Z', + }, + last_review: { + $date: '2018-08-12T04:00:00.000Z', + }, + accommodates: 2, + bedrooms: 1, + beds: 1, + number_of_reviews: 70, + bathrooms: { + $numberDecimal: '1.0', + }, + amenities: [ + 'Internet', + 'Wifi', + 'Air conditioning', + 'Kitchen', + 'Elevator', + 'Buzzer/wireless intercom', + 'Heating', + 'Family/kid friendly', + 'Washer', + 'Dryer', + 'translation missing: en.hosting_amenity_50', + ], + price: { + $numberDecimal: '135.00', + }, + security_deposit: { + $numberDecimal: '0.00', + }, + cleaning_fee: { + $numberDecimal: '135.00', + }, + extra_people: { + $numberDecimal: '0.00', + }, + guests_included: { + $numberDecimal: '1', + }, + images: { + thumbnail_url: '', + medium_url: '', + picture_url: + 'https://a0.muscache.com/im/pictures/15074036/a97119ed_original.jpg?aki_policy=large', + xl_picture_url: '', + }, + host: { + host_id: '454250', + host_url: 'https://www.airbnb.com/users/show/454250', + host_name: 'Greta', + host_location: 'New York, New York, United States', + host_about: + 'By now I have lived longer in the city than the country however I feel equally at home in each. I like to keep one foot in each and help others to do the same!', + host_response_time: 'within an hour', + host_thumbnail_url: + 'https://a0.muscache.com/im/pictures/f1022be4-e72a-4b35-b6d2-3d2736ddaff9.jpg?aki_policy=profile_small', + host_picture_url: + 'https://a0.muscache.com/im/pictures/f1022be4-e72a-4b35-b6d2-3d2736ddaff9.jpg?aki_policy=profile_x_medium', + host_neighbourhood: '', + host_response_rate: 100, + host_is_superhost: true, + host_has_profile_pic: true, + host_identity_verified: true, + host_listings_count: 3, + host_total_listings_count: 3, + host_verifications: [ + 'email', + 'phone', + 'reviews', + 'jumio', + 'offline_government_id', + 'government_id', + ], + }, + address: { + street: 'New York, NY, United States', + suburb: 'Manhattan', + government_area: 'Upper West Side', + market: 'New York', + country: 'United States', + country_code: 'US', + location: { + type: 'Point', + coordinates: [-73.96523, 40.79962], + is_location_exact: false, + }, + }, + availability: { + availability_30: 0, + availability_60: 0, + availability_90: 0, + availability_365: 93, + }, + review_scores: { + review_scores_accuracy: 10, + review_scores_cleanliness: 9, + review_scores_checkin: 10, + review_scores_communication: 10, + review_scores_location: 10, + review_scores_value: 10, + review_scores_rating: 94, + }, + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/fixtures/berlin.cocktailbars.ts b/packages/compass-generative-ai/tests/evals/fixtures/berlin.cocktailbars.ts new file mode 100644 index 00000000000..2b2b7ffd441 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/fixtures/berlin.cocktailbars.ts @@ -0,0 +1,99 @@ +export default [ + { + _id: { + $oid: '5ca652bf56618187558b4de3', + }, + name: 'Bar Zentral', + strasse: 'Lotte-Lenya-Bogen', + hausnummer: 551, + plz: 10623, + webseite: 'barzentralde', + koordinaten: [13.3269283, 52.5050862], + }, + { + _id: { + $oid: '5ca6544a97aed3878f9b090f', + }, + name: 'Hefner Bar', + strasse: 'Kantstr.', + hausnummer: 146, + plz: 10623, + koordinaten: [13.3213093, 52.5055506], + }, + { + _id: { + $oid: '5ca654ec97aed3878f9b0910', + }, + name: 'Bar am Steinplatz', + strasse: 'Steinplatz', + hausnummer: 4, + plz: 10623, + webseite: 'barsteinplatz.com', + koordinaten: [13.3241804, 52.5081672], + }, + { + _id: { + $oid: '5ca6559e97aed3878f9b0911', + }, + name: 'Rum Trader', + strasse: 'Fasanenstr.', + hausnummer: 40, + plz: 10719, + koordinaten: [13.3244667, 52.4984012], + }, + { + _id: { + $oid: '5ca655f597aed3878f9b0912', + }, + name: 'Stairs', + strasse: 'Uhlandstr.', + hausnummer: 133, + plz: 10717, + webseite: 'stairsbar-berlin.com', + koordinaten: [13.3215159, 52.49256], + }, + { + _id: { + $oid: '5ca656a697aed3878f9b0913', + }, + name: 'Green Door', + strasse: 'Winterfeldtstr.', + hausnummer: 50, + plz: 10781, + webseite: 'greendoor.de', + koordinaten: [13.3507105, 52.4970952], + }, + { + _id: { + $oid: '5ca6570597aed3878f9b0914', + }, + name: 'Mister Hu', + strasse: 'Goltzstr.', + hausnummer: 39, + plz: 10781, + webseite: 'misterhu.de', + koordinaten: [13.3511185, 52.4927243], + }, + { + _id: { + $oid: '5ca6576f97aed3878f9b0915', + }, + name: 'Salut!', + strasse: 'Goltzstr.', + hausnummer: 7, + plz: 10781, + webseite: 'salut-berlin.de', + koordinaten: [13.3513021, 52.4911044], + }, + { + _id: { + $oid: '5ca6581197aed3878f9b0916', + }, + name: 'Lebensstern', + strasse: 'Kurfürstenstr.', + hausnummer: 58, + plz: 10785, + webseite: 'lebens-stern.de', + koordinaten: [13.3524999, 52.502059], + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/fixtures/netflix.comments.ts b/packages/compass-generative-ai/tests/evals/fixtures/netflix.comments.ts new file mode 100644 index 00000000000..8afda595db3 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/fixtures/netflix.comments.ts @@ -0,0 +1,128 @@ +export default [ + { + _id: { + $oid: '5a9427648b0beebeb69579f3', + }, + name: 'Jorah Mormont', + email: 'iain_glen@gameofthron.es', + movie_id: { + $oid: '573a1390f29313caabcd44d3', + }, + text: 'Minus sequi incidunt cum magnam. Quam voluptatum vitae ab voluptatum cum. Autem perferendis nisi nulla dolores aut recusandae.', + date: { + $date: '1994-02-18T18:52:31.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb6957a21', + }, + name: "Jaqen H'ghar", + email: 'tom_wlaschiha@gameofthron.es', + movie_id: { + $oid: '573a1390f29313caabcd516c', + }, + text: 'Minima odit officiis minima nam. Aspernatur id reprehenderit eius inventore amet laudantium. Eos unde enim recusandae fugit sint.', + date: { + $date: '1981-11-08T04:32:25.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb6957a32', + }, + name: 'Megan Richards', + email: 'megan_richards@fakegmail.com', + movie_id: { + $oid: '573a1390f29313caabcd56c3', + }, + text: 'Mollitia ducimus consequatur excepturi corrupti expedita fugit rem aut. Nisi repellendus non velit tempora maxime. Ducimus recusandae perspiciatis hic vel voluptates.', + date: { + $date: '1976-02-15T11:21:57.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb6957a78', + }, + name: 'Mercedes Tyler', + email: 'mercedes_tyler@fakegmail.com', + movie_id: { + $oid: '573a1390f29313caabcd6399', + }, + text: 'Voluptate odio minima pariatur recusandae. Architecto illum dicta repudiandae. Nobis aperiam exercitationem placeat repellat dolorum laborum ea. Est impedit totam facilis incidunt itaque facere.', + date: { + $date: '2007-10-17T06:50:56.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb69579e7', + }, + name: 'Mercedes Tyler', + email: 'mercedes_tyler@fakegmail.com', + movie_id: { + $oid: '573a1390f29313caabcd4323', + }, + text: 'Eius veritatis vero facilis quaerat fuga temporibus. Praesentium expedita sequi repellat id. Corporis minima enim ex. Provident fugit nisi dignissimos nulla nam ipsum aliquam.', + date: { + $date: '2002-08-18T04:56:07.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb6957a13', + }, + name: 'John Bishop', + email: 'john_bishop@fakegmail.com', + movie_id: { + $oid: '573a1390f29313caabcd4cfd', + }, + text: 'Soluta aliquam a ullam iste dolor odit consequatur. Nostrum recusandae facilis facere provident distinctio corrupti aliquam recusandae.', + date: { + $date: '1992-07-08T08:01:20.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb6957a7f', + }, + name: 'Javier Smith', + email: 'javier_smith@fakegmail.com', + movie_id: { + $oid: '573a1390f29313caabcd65b2', + }, + text: 'Molestiae omnis deserunt voluptatibus molestias ut assumenda. Nesciunt veniam iste ad praesentium sit saepe. Iusto voluptatum qui alias pariatur velit. Aspernatur cum eius rerum accusamus inventore.', + date: { + $date: '1973-03-31T14:46:20.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb69579f7', + }, + name: 'Yara Greyjoy', + email: 'gemma_whelan@gameofthron.es', + movie_id: { + $oid: '573a1390f29313caabcd4556', + }, + text: 'Dignissimos sunt aspernatur rerum magni debitis neque. Temporibus nisi repudiandae praesentium reprehenderit. Aliquam aliquid asperiores quasi asperiores repellat quasi rerum.', + date: { + $date: '2016-10-05T16:26:16.000Z', + }, + }, + { + _id: { + $oid: '5a9427648b0beebeb6957a08', + }, + name: 'Meera Reed', + email: 'ellie_kendrick@gameofthron.es', + movie_id: { + $oid: '573a1390f29313caabcd4964', + }, + text: 'Harum porro ad dolorum repellendus. Nihil natus aspernatur quaerat aperiam nam neque. Beatae voluptates quas saepe enim facere. Unde sint praesentium numquam molestias nihil.', + date: { + $date: '1971-08-31T07:24:20.000Z', + }, + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/fixtures/netflix.movies.ts b/packages/compass-generative-ai/tests/evals/fixtures/netflix.movies.ts new file mode 100644 index 00000000000..dc696ac485f --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/fixtures/netflix.movies.ts @@ -0,0 +1,74 @@ +export default [ + { + _id: { + $oid: '573b864df29313caabe354ed', + }, + title: 'Dinosaur Planet', + year: 2003, + id: '1', + }, + { + _id: { + $oid: '573b864df29313caabe354ef', + }, + title: 'Isle of Man TT 2004 Review', + year: 2004, + id: '2', + }, + { + _id: { + $oid: '573b864df29313caabe354f0', + }, + title: "Paula Abdul's Get Up & Dance", + year: 1994, + id: '4', + }, + { + _id: { + $oid: '573b864df29313caabe354f1', + }, + title: 'The Rise and Fall of ECW', + year: 2004, + id: '5', + }, + { + _id: { + $oid: '573b864df29313caabe354f2', + }, + title: 'Sick', + year: 1997, + id: '6', + }, + { + _id: { + $oid: '573b864df29313caabe354f3', + }, + title: '8 Man', + year: 1992, + id: '7', + }, + { + _id: { + $oid: '573b864df29313caabe354f4', + }, + title: 'What the #$*! Do We Know!?', + year: 2004, + id: '8', + }, + { + _id: { + $oid: '573b864df29313caabe354f5', + }, + title: 'Fighter', + year: 2002, + id: '10', + }, + { + _id: { + $oid: '573b864df29313caabe354f6', + }, + title: "Class of Nuke 'Em High 2", + year: 1991, + id: '9', + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/fixtures/nyc.parking.ts b/packages/compass-generative-ai/tests/evals/fixtures/nyc.parking.ts new file mode 100644 index 00000000000..0f872a97f20 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/fixtures/nyc.parking.ts @@ -0,0 +1,450 @@ +export default [ + { + _id: { + $oid: '5735040085629ed4fa83946f', + }, + 'Summons Number': { + $numberLong: '7039084223', + }, + 'Plate ID': 'GSY3857', + 'Registration State': 'NY', + 'Plate Type': 'PAS', + 'Issue Date': '01/31/2015', + 'Violation Code': 38, + 'Vehicle Body Type': '2DSD', + 'Vehicle Make': 'BMW', + 'Issuing Agency': 'T', + 'Street Code1': 34030, + 'Street Code2': 10910, + 'Street Code3': 33390, + 'Vehicle Expiration Date': '01/01/20160908 12:00:00 PM', + 'Violation Location': 6, + 'Violation Precinct': 6, + 'Issuer Precinct': 6, + 'Issuer Code': 340095, + 'Issuer Command': 'T800', + 'Issuer Squad': 'A2', + 'Violation Time': '0941P', + 'Time First Observed': '', + 'Violation County': 'NY', + 'Violation In Front Of Or Opposite': 'O', + 'House Number': 416, + 'Street Name': 'W 13th St', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'h1', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'Y', + 'From Hours In Effect': '0700A', + 'To Hours In Effect': '1100P', + 'Vehicle Color': 'BK', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 2015, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': 'B 77', + 'Violation Description': '38-Failure to Display Muni Rec', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839470', + }, + 'Summons Number': { + $numberLong: '7057883730', + }, + 'Plate ID': 'AM485F', + 'Registration State': 'NJ', + 'Plate Type': 'PAS', + 'Issue Date': '07/24/2014', + 'Violation Code': 18, + 'Vehicle Body Type': 'DELV', + 'Vehicle Make': 'FRUEH', + 'Issuing Agency': 'T', + 'Street Code1': 10110, + 'Street Code2': 17490, + 'Street Code3': 17510, + 'Vehicle Expiration Date': '01/01/88888888 12:00:00 PM', + 'Violation Location': 13, + 'Violation Precinct': 13, + 'Issuer Precinct': 13, + 'Issuer Code': 345238, + 'Issuer Command': 'T102', + 'Issuer Squad': 'C', + 'Violation Time': '0749A', + 'Time First Observed': '', + 'Violation County': 'NY', + 'Violation In Front Of Or Opposite': 'O', + 'House Number': 444, + 'Street Name': '2nd Ave', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'f4', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'YYYYY', + 'From Hours In Effect': '0700A', + 'To Hours In Effect': '1000A', + 'Vehicle Color': 'GREEN', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 0, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': '16 6', + 'Violation Description': '18-No Stand (bus lane)', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839471', + }, + 'Summons Number': { + $numberLong: '7972683426', + }, + 'Plate ID': '40424MC', + 'Registration State': 'NY', + 'Plate Type': 'COM', + 'Issue Date': '10/27/2014', + 'Violation Code': 20, + 'Vehicle Body Type': 'SUBN', + 'Vehicle Make': 'ACURA', + 'Issuing Agency': 'T', + 'Street Code1': 35570, + 'Street Code2': 13610, + 'Street Code3': 44990, + 'Vehicle Expiration Date': '01/01/20141202 12:00:00 PM', + 'Violation Location': 24, + 'Violation Precinct': 24, + 'Issuer Precinct': 24, + 'Issuer Code': 361115, + 'Issuer Command': 'T103', + 'Issuer Squad': 'F', + 'Violation Time': '1125A', + 'Time First Observed': '', + 'Violation County': 'NY', + 'Violation In Front Of Or Opposite': 'O', + 'House Number': 255, + 'Street Name': 'W 90th St', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'd', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'Y', + 'From Hours In Effect': '0800A', + 'To Hours In Effect': '0600P', + 'Vehicle Color': 'BLACK', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 2015, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': '44 7', + 'Violation Description': '20A-No Parking (Non-COM)', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839472', + }, + 'Summons Number': { + $numberLong: '7638712493', + }, + 'Plate ID': 443344, + 'Registration State': 'RI', + 'Plate Type': 'PAS', + 'Issue Date': '09/16/2014', + 'Violation Code': 38, + 'Vehicle Body Type': '4DSD', + 'Vehicle Make': 'CHEVR', + 'Issuing Agency': 'T', + 'Street Code1': 53790, + 'Street Code2': 19740, + 'Street Code3': 19840, + 'Vehicle Expiration Date': '01/01/20140688 12:00:00 PM', + 'Violation Location': 106, + 'Violation Precinct': 106, + 'Issuer Precinct': 106, + 'Issuer Code': 331801, + 'Issuer Command': 'T402', + 'Issuer Squad': 'H', + 'Violation Time': '1225P', + 'Time First Observed': '', + 'Violation County': 'Q', + 'Violation In Front Of Or Opposite': 'F', + 'House Number': '104-07', + 'Street Name': 'Liberty Ave', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'h1', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'Y', + 'From Hours In Effect': '0900A', + 'To Hours In Effect': '0700P', + 'Vehicle Color': 'GREY', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 0, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': '24 4', + 'Violation Description': '38-Failure to Display Muni Rec', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839473', + }, + 'Summons Number': { + $numberLong: '7721537642', + }, + 'Plate ID': 'GMX1207', + 'Registration State': 'NY', + 'Plate Type': 'PAS', + 'Issue Date': '09/18/2014', + 'Violation Code': 38, + 'Vehicle Body Type': '4DSD', + 'Vehicle Make': 'HONDA', + 'Issuing Agency': 'T', + 'Street Code1': 8790, + 'Street Code2': 17990, + 'Street Code3': 18090, + 'Vehicle Expiration Date': '01/01/20160202 12:00:00 PM', + 'Violation Location': 115, + 'Violation Precinct': 115, + 'Issuer Precinct': 115, + 'Issuer Code': 358644, + 'Issuer Command': 'T401', + 'Issuer Squad': 'R', + 'Violation Time': '0433P', + 'Time First Observed': '', + 'Violation County': 'Q', + 'Violation In Front Of Or Opposite': 'F', + 'House Number': '88-22', + 'Street Name': '37th Ave', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'h1', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'Y', + 'From Hours In Effect': '0830A', + 'To Hours In Effect': '0700P', + 'Vehicle Color': 'BK', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 2013, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': '16 4', + 'Violation Description': '38-Failure to Display Muni Rec', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839474', + }, + 'Summons Number': { + $numberLong: '7899927729', + }, + 'Plate ID': '63543JM', + 'Registration State': 'NY', + 'Plate Type': 'COM', + 'Issue Date': '01/22/2015', + 'Violation Code': 14, + 'Vehicle Body Type': 'VAN', + 'Vehicle Make': 'GMC', + 'Issuing Agency': 'T', + 'Street Code1': 34890, + 'Street Code2': 10410, + 'Street Code3': 10510, + 'Vehicle Expiration Date': '01/01/88888888 12:00:00 PM', + 'Violation Location': 18, + 'Violation Precinct': 18, + 'Issuer Precinct': 18, + 'Issuer Code': 353508, + 'Issuer Command': 'T106', + 'Issuer Squad': 'D', + 'Violation Time': '0940A', + 'Time First Observed': '', + 'Violation County': 'NY', + 'Violation In Front Of Or Opposite': 'F', + 'House Number': 5, + 'Street Name': 'W 56th St', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'c', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'YYYYYYY', + 'From Hours In Effect': '', + 'To Hours In Effect': '', + 'Vehicle Color': 'BROWN', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 1990, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': '18 6', + 'Violation Description': '14-No Standing', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839475', + }, + 'Summons Number': { + $numberLong: '7899927729', + }, + 'Plate ID': '63543JM', + 'Registration State': 'NY', + 'Plate Type': 'COM', + 'Issue Date': '01/22/2015', + 'Violation Code': 14, + 'Vehicle Body Type': 'VAN', + 'Vehicle Make': 'GMC', + 'Issuing Agency': 'T', + 'Street Code1': 34890, + 'Street Code2': 10410, + 'Street Code3': 10510, + 'Vehicle Expiration Date': '01/01/88888888 12:00:00 PM', + 'Violation Location': 18, + 'Violation Precinct': 18, + 'Issuer Precinct': 18, + 'Issuer Code': 353508, + 'Issuer Command': 'T106', + 'Issuer Squad': 'D', + 'Violation Time': '0940A', + 'Time First Observed': '', + 'Violation County': 'NY', + 'Violation In Front Of Or Opposite': 'F', + 'House Number': 5, + 'Street Name': 'W 56th St', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'c', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'YYYYYYY', + 'From Hours In Effect': '', + 'To Hours In Effect': '', + 'Vehicle Color': 'BROWN', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 1990, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': '18 6', + 'Violation Description': '14-No Standing', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839476', + }, + 'Summons Number': 1377047714, + 'Plate ID': 'T657080C', + 'Registration State': 'NY', + 'Plate Type': 'SRF', + 'Issue Date': '02/12/2015', + 'Violation Code': 46, + 'Vehicle Body Type': '', + 'Vehicle Make': 'TOYOT', + 'Issuing Agency': 'P', + 'Street Code1': 38643, + 'Street Code2': 10440, + 'Street Code3': 10490, + 'Vehicle Expiration Date': '01/01/20150831 12:00:00 PM', + 'Violation Location': 108, + 'Violation Precinct': 108, + 'Issuer Precinct': 108, + 'Issuer Code': 952146, + 'Issuer Command': 108, + 'Issuer Squad': 0, + 'Violation Time': '1035A', + 'Time First Observed': '', + 'Violation County': 'Q', + 'Violation In Front Of Or Opposite': 'F', + 'House Number': '47-20', + 'Street Name': 'CENTER BLVD', + 'Intersecting Street': '', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'F1', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'BBBBBBB', + 'From Hours In Effect': 'ALL', + 'To Hours In Effect': 'ALL', + 'Vehicle Color': 'BLK', + 'Unregistered Vehicle?': 0, + 'Vehicle Year': 2011, + 'Meter Number': '-', + 'Feet From Curb': 0, + 'Violation Post Code': '', + 'Violation Description': '', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, + { + _id: { + $oid: '5735040085629ed4fa839477', + }, + 'Summons Number': { + $numberLong: '8028772766', + }, + 'Plate ID': '84046MG', + 'Registration State': 'NY', + 'Plate Type': 'COM', + 'Issue Date': '06/25/2015', + 'Violation Code': 10, + 'Vehicle Body Type': 'DELV', + 'Vehicle Make': 'HINO', + 'Issuing Agency': 'T', + 'Street Code1': 10610, + 'Street Code2': 0, + 'Street Code3': 0, + 'Vehicle Expiration Date': '01/01/20160430 12:00:00 PM', + 'Violation Location': 14, + 'Violation Precinct': 14, + 'Issuer Precinct': 14, + 'Issuer Code': 361878, + 'Issuer Command': 'T102', + 'Issuer Squad': 'K', + 'Violation Time': '0110P', + 'Time First Observed': '', + 'Violation County': 'NY', + 'Violation In Front Of Or Opposite': 'I', + 'House Number': 'E', + 'Street Name': '7th Ave', + 'Intersecting Street': '35ft N/of W 42nd St', + 'Date First Observed': '01/05/0001 12:00:00 PM', + 'Law Section': 408, + 'Sub Division': 'b', + 'Violation Legal Code': '', + 'Days Parking In Effect': 'YYYYYYY', + 'From Hours In Effect': '', + 'To Hours In Effect': '', + 'Vehicle Color': 'WH', + 'Unregistered Vehicle?': '', + 'Vehicle Year': 2015, + 'Meter Number': '', + 'Feet From Curb': 0, + 'Violation Post Code': 'MC 9', + 'Violation Description': '10-No Stopping', + 'No Standing or Stopping Violation': '', + 'Hydrant Violation': '', + 'Double Parking Violation': '', + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/gen-ai.eval.ts b/packages/compass-generative-ai/tests/evals/gen-ai.eval.ts new file mode 100644 index 00000000000..cefc8a35ced --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/gen-ai.eval.ts @@ -0,0 +1,23 @@ +import { Eval } from 'braintrust'; +import type { + ConversationEvalCaseExpected, + ConversationEvalCaseInput, + ConversationTaskOutput, +} from './types'; +import { makeChatbotCall } from './chatbot-api'; +import { Factuality } from './scorers'; +import { generateGenAiEvalCases } from './use-cases'; + +const GEN_AI_PROJECT_NAME = 'Compass Gen AI'; + +void Eval< + ConversationEvalCaseInput, + ConversationTaskOutput, + ConversationEvalCaseExpected +>(GEN_AI_PROJECT_NAME, { + data: async () => { + return await generateGenAiEvalCases(); + }, + task: makeChatbotCall, + scores: [Factuality], +}); diff --git a/packages/compass-generative-ai/tests/evals/scorers.ts b/packages/compass-generative-ai/tests/evals/scorers.ts new file mode 100644 index 00000000000..6483b81ba87 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/scorers.ts @@ -0,0 +1,17 @@ +import type { ConversationEvalScorer } from './types'; +import { Factuality as _Factuality } from 'autoevals'; +import { allText } from './utils'; + +export const Factuality: ConversationEvalScorer = ({ + input, + output, + expected, +}) => { + return _Factuality({ + input: allText(input.messages), + output: allText(output.messages), + expected: allText(expected.messages), + model: 'gpt-4.1', + temperature: undefined, + }); +}; diff --git a/packages/compass-generative-ai/tests/evals/types.ts b/packages/compass-generative-ai/tests/evals/types.ts new file mode 100644 index 00000000000..64f0f802148 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/types.ts @@ -0,0 +1,27 @@ +import type { EvalScorer } from 'braintrust'; + +export type Message = { + content: string; +}; +type InputMessage = Message & { role: 'user' }; +type OutputMessage = Message; +type ExpectedMessage = OutputMessage; + +export type ConversationEvalCaseInput = { + messages: InputMessage[]; + instructions: Message; +}; + +export type ConversationEvalCaseExpected = { + messages: OutputMessage[]; +}; + +export type ConversationTaskOutput = { + messages: ExpectedMessage[]; +}; + +export type ConversationEvalScorer = EvalScorer< + ConversationEvalCaseInput, + ConversationTaskOutput, + ConversationEvalCaseExpected +>; diff --git a/packages/compass-generative-ai/tests/evals/use-cases/aggregate-query.ts b/packages/compass-generative-ai/tests/evals/use-cases/aggregate-query.ts new file mode 100644 index 00000000000..e6347b4dcff --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/use-cases/aggregate-query.ts @@ -0,0 +1,296 @@ +import type { GenAiUsecase } from '.'; + +export const aggregateQueries: GenAiUsecase[] = [ + { + namespace: 'netflix.movies', + userInput: 'find all the movies released in 1983', + expectedOutput: `[{$match: {year: 1983}}]`, + name: 'basic aggregate query', + }, + { + namespace: 'netflix.movies', + userInput: + 'find three movies with alien in the title, show earliest movies first, only the _id, title and year', + expectedOutput: ` + [ + {$match: {title: {$regex: "alien", $options: "i"}}}, + {$project: {_id: 1, title: 1, year: 1}}, + {$sort: {year: 1}}, + {$limit: 3} + ] + `, + name: 'aggregate with filter projection sort and limit', + }, + { + namespace: 'nyc.parking', + userInput: + 'find all the violations for the violation code 21 and only return the car plate', + expectedOutput: ` + [{$match: {"Violation Code": 21}}, {$project: {"Plate ID": 1, _id: 0}}] + `, + name: 'aggregate with filter and projection', + }, + { + namespace: 'berlin.cocktailbars', + userInput: + 'find all the bars 10km from the berlin center, only return their names. Berlin center is at longitude 13.4050 and latitude 52.5200. use correct key for coordinates.', + expectedOutput: ` + [ + {$geoNear: { near: {type: "Point", coordinates: [13.4050, 52.5200]}, distanceField: "dist", maxDistance: 10000, spherical: true, key: "koordinaten" }}, + {$project: {name: 1, _id: 0}} + ] + `, + name: 'geo-based aggregate', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'Return all the properties of type "Hotel" and with ratings lte 70', + expectedOutput: ` + [{ + $match: { + property_type: "Hotel", + "review_scores.review_scores_rating": { $lte: 70 } + } + }] + `, + name: 'aggregate with nested fields in $match', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'what is the bed count that occurs the most? return it in a field called bedCount (only return the bedCount field)', + expectedOutput: ` + [ + {$group: {_id: "$beds", count: {$sum: 1}}}, + {$sort: {count: -1}}, + {$limit: 1}, + {$project: {bedCount: "$_id", _id: 0}} + ] + `, + name: 'aggregate with group sort limit and project', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'which host id has the most reviews across all listings? return it in only a field called hostId', + expectedOutput: ` + [ + {$group: {_id: "$host.host_id", totalReviews: {$sum: "$number_of_reviews"}}}, + {$sort: {totalReviews: -1}}, + {$limit: 1}, + {$project: {hostId: "$_id", _id: 0}} + ] + `, + name: 'aggregate with group sort limit and project 2', + }, + { + namespace: 'netflix.movies', + userInput: + 'Which movies were released 30 years ago (consider whole year). return title and year', + expectedOutput: ` + [ + { + $match: { + $and: [ + {year: {$gte: ${new Date().getFullYear() - 30}}}, + {year: {$lt: ${new Date().getFullYear() - 29}}}, + ] + } + }, + {$project: {title: 1, year: 1}} + ] + `, + name: 'relative date aggregate 1', + }, + { + namespace: 'netflix.movies', + userInput: 'find all of the movies from last year', + expectedOutput: ` + [{$match: {year: ${new Date().getFullYear() - 1}}}] + `, + name: 'relative date aggregate 2', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'give me just the price and the first 3 amenities (in a field called amenities) of the listing that has "Step-free access" in its amenities.', + expectedOutput: ` + [ + {$match: {amenities: "Step-free access"}}, + {$project: {price: 1, amenities: {$slice: ["$amenities", 3]}}} + ] + `, + name: 'aggregate with array slice', + }, + { + namespace: 'nyc.parking', + userInput: + 'Return only the Plate IDs of Acura vehicles registered in New York', + expectedOutput: ` + [ + {$match: {$and: [{"Vehicle Make": "ACURA"}, {"Registration State": "NY"}]}}, + {$project: {"Plate ID": 1}} + ] + `, + name: 'aggregate with multiple conditions in match', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + '¿Qué alojamiento tiene el precio más bajo? devolver el número en un campo llamado "precio"', + expectedOutput: ` + [ + {$project: {_id: 0, precio: "$price"}}, + {$sort: {price: 1}}, + {$limit: 1} + ] + `, + name: 'aggregate with non-english prompt', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'give me only cancellation policy and listing url of the most expensive listing', + expectedOutput: ` + [ + {$sort: {price: -1}}, + {$project: {cancellation_policy: 1, "listing_url": 1, _id: 0}}, + {$limit: 1} + ] + `, + name: 'simple aggregate with sort and limit', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'group all the listings based on the amenities tags and return only count and tag name', + expectedOutput: ` + [ + {$unwind: "$amenities"}, + {$group: {_id: "$amenities", count: {$sum: 1}}}, + {$project: {_id: 0, tag: "$_id", count: 1}} + ] + `, + name: 'aggregate with unwind and group', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'which listing has the most amenities? the resulting documents should only have the _id', + expectedOutput: ` + [ + {$project: {_id: 1, numAmenities: {$size: "$amenities"}}}, + {$sort: {numAmenities: -1}}, + {$limit: 1}, + {$project: {_id: 1}} + ] + `, + name: 'aggregate with size operator', + }, + { + namespace: 'netflix.movies', + userInput: + 'What are the 5 most frequent words (case sensitive) used in movie titles in the 1980s and 1990s combined? Sorted first by frequency count then alphabetically. output fields count and word', + expectedOutput: ` + [ + {$match: {year: { $gte: 1980, $lte: 1999 }}}, + {$addFields: {titleWords: {$split: ["$title", " "]}}}, + {$unwind: "$titleWords"}, + {$group: {_id: "$titleWords", count: {$sum: 1}}}, + {$sort: {count: -1, _id: 1}}, + {$limit: 5}, + {$project: {_id: 0, count: 1, word: "$_id"}} + ] + `, + name: 'aggregate with regex, addFields and split', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'what percentage of listings have a "Washer" in their amenities? Only consider listings with more than 2 beds. Return is as a string named "washerPercentage" like "75%", rounded to the nearest whole number.', + expectedOutput: ` + [ + {$match: {beds: {$gt: 2}}}, + { + $group: { + _id: null, + totalListings: {$sum: 1}, + withWasher: { + $sum: { + $cond: [{$in: ["Washer", "$amenities"]}, 1, 0] + } + } + } + }, + { + $project: { + washerPercentage: { + $concat: [ + { + $toString: { + $round: { + $multiply: [ + {$divide: ["$withWasher", "$totalListings"]}, + 100 + ] + } + } + }, + "%" + ] + } + } + } + ] + `, + name: 'super complex aggregate with complex project', + }, + { + namespace: 'nyc.parking', + userInput: + 'Write a query that does the following: find all of the parking incidents that occurred on any ave. Return all of the plate ids involved with their summons number and vehicle make and body type. Put the vehicle make and body type into lower case. No _id, sorted by the summons number lowest first.', + expectedOutput: ` + [ + {$match: {"Street Name": {$regex: "ave", $options: "i"}}}, + {$sort: {"Summons Number": 1}}, + { + $project: { + "Summons Number": 1, + "Plate ID": 1, + "Vehicle Make": {$toLower: "$Vehicle Make"}, + "Vehicle Body Type": {$toLower: "$Vehicle Body Type"}, + _id: 0 + } + } + ] + `, + name: 'complex aggregate with regex and string operators', + }, + { + namespace: 'netflix.comments', + userInput: + 'join with "movies" based on a movie_id and return one document for each comment with movie_title (from movie.title) and comment_text', + expectedOutput: `[ + { + $lookup: { + from: 'movies', + localField: 'movie_id', + foreignField: '_id', + as: 'movies', + }, + }, + { $unwind: '$movies' }, + { $project: { movie_title: '$movies.title', comment_text: '$text', _id: 0 } }, + ]`, + name: 'aggregate prompt with sql join', + }, + { + namespace: 'netflix.comments', + userInput: 'return only the customer email', + expectedOutput: ` + [{$project: {email: 1, _id: 0}}] + `, + name: 'simple projection aggregate', + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/use-cases/find-query.ts b/packages/compass-generative-ai/tests/evals/use-cases/find-query.ts new file mode 100644 index 00000000000..ee99465b850 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/use-cases/find-query.ts @@ -0,0 +1,183 @@ +import type { GenAiUsecase } from '.'; + +export const findQueries: GenAiUsecase[] = [ + { + namespace: 'netflix.movies', + userInput: 'find all the movies released in 1983', + expectedOutput: `{year: 1983}`, + name: 'simple find', + }, + { + namespace: 'netflix.movies', + userInput: + 'find three movies with alien in the title, show earliest movies first, only the _id, title and year', + expectedOutput: ` + {title: {$regex: "alien", $options: "i"}} + {_id: 1, title: 1, year: 1} + {year: 1} + 3 + `, + name: 'find with filter projection sort and limit', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: 'find all the listings with 10km from the instanbul center', + expectedOutput: ` + {location: {$geoWithin: {$centerSphere: [[28.9784, 41.0082], 10 / 3963.2]}}} + `, + name: 'geo-based find', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'Return all the properties of type "Hotel" and with ratings lte 70', + expectedOutput: ` + { + property_type: "Hotel", + "review_scores.review_scores_rating": { $lte: 70 } + } + `, + name: 'find with nested match fields', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'what is the bed count that occurs the most? return it in a field called bedCount (only return the bedCount field)', + expectedOutput: ` + [ + { $group: { _id: "$beds", count: { $sum: 1 } } }, + { $sort: { count: -1 } }, + { $limit: 1 }, + { $project: { bedCount: "$_id" } } + ] + `, + name: 'find query that translates to aggregation 1', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'whats the total number of reviews across all listings? return it in a field called totalReviewsOverall', + expectedOutput: `[ + { + $group: { + _id: null, + totalReviewsOverall: { $sum: "$number_of_reviews" } + } + } + ] + `, + name: 'find query that translates to aggregation 2', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'which host id has the most reviews across all listings? return it in a field called hostId', + expectedOutput: `[ + { + $group: { + _id: "$host.host_id", + totalReviews: { $sum: "$number_of_reviews" } + } + }, + { $sort: { totalReviews: -1 } }, + { $limit: 1 }, + { $project: { hostId: "$_id" } } + ]`, + name: 'find query that translates to aggregation 3', + }, + { + namespace: 'netflix.movies', + userInput: 'find all of the movies from last year', + expectedOutput: `{year: ${new Date().getFullYear() - 1}}`, + name: 'relative date find 1', + }, + { + namespace: 'netflix.movies', + userInput: + 'Which comments were posted 30 years ago. consider all comments from that year. return name and date', + expectedOutput: `{ + $and: [ + { + date: { + $gte: ${new Date().getFullYear() - 30} + } + }, + { + date: { + $lt: ${new Date().getFullYear() - 29} + } + } + ] + } + {name: 1, date: 1} + `, + name: 'relative date find 2', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: 'get all docs where accommodates is 6', + expectedOutput: `{accommodates: 6}`, + name: 'number field find', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + 'give me just the price and the first 3 amenities (in a field called amenities) of the listing has "Step-free access" in its amenities.', + expectedOutput: ` + {amenities: "Step-free access"} + {price: 1, amenities: {$slice: 3}} + `, + name: 'find with complex projection', + }, + { + namespace: 'nyc.parking', + userInput: + 'Return only the Plate IDs of Acura vehicles registered in New York', + expectedOutput: ` + + { + $and: [ + {"Vehicle Make": "ACURA"}, + {"Registration State": "NY"} + ] + } + + {"Plate ID": 1} + `, + name: 'find with $and operator', + }, + { + namespace: 'airbnb.listingsAndReviews', + userInput: + '¿Qué alojamiento tiene el precio más bajo? devolver el número en un campo llamado "precio" en español', + expectedOutput: ` + {_id: 0, precio: "$price"} + {price: 1} + 1 + `, + name: 'find with non-english prompt', + }, + { + namespace: 'nyc.parking', + userInput: + 'Write a query that does the following: find all of the parking incidents that occurred on any ave. Return all of the plate ids involved with their summons number and vehicle make and body type. Put the vehicle make and body type into lower case. No _id, sorted by the summons number lowest first.', + expectedOutput: ` + {"Street Name": {$regex: "ave", $options: "i"}} + {"Summons Number": 1} + { + "Summons Number": 1, + "Plate ID": 1, + "Vehicle Make": {$toLower: "$Vehicle Make"}, + "Vehicle Body Type": {$toLower: "$Vehicle Body Type"}, + _id: 0 + } + `, + name: 'find with regex and string operators', + }, + { + namespace: 'netflix.comments', + userInput: 'return only the customer email', + expectedOutput: `{email: 1, _id: 0}`, + name: 'find with simple projection', + }, +]; diff --git a/packages/compass-generative-ai/tests/evals/use-cases/index.ts b/packages/compass-generative-ai/tests/evals/use-cases/index.ts new file mode 100644 index 00000000000..226714a2247 --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/use-cases/index.ts @@ -0,0 +1,100 @@ +import { findQueries } from './find-query'; +import { aggregateQueries } from './aggregate-query'; +import toNS from 'mongodb-ns'; +import { UUID } from 'bson'; + +export type GenAiUsecase = { + namespace: string; + userInput: string; + expectedOutput: string; + name: string; +}; + +import airbnbListings from '../fixtures/airbnb.listingsAndReviews'; +import berlinBars from '../fixtures/berlin.cocktailbars'; +import netflixMovies from '../fixtures/netflix.movies'; +import netflixComments from '../fixtures/netflix.comments'; +import nycParking from '../fixtures/nyc.parking'; + +import { getSampleAndSchemaFromDataset } from '../utils'; +import { + buildAggregateQueryPrompt, + buildFindQueryPrompt, +} from '../../../src/utils/gen-ai-prompt'; + +type DatasetSamples = { + [key: string]: { + sampleDocuments: unknown[]; + schema: Record; + }; +}; + +async function getDatasets(): Promise { + return { + 'airbnb.listingsAndReviews': await getSampleAndSchemaFromDataset( + airbnbListings + ), + 'berlin.cocktailbars': await getSampleAndSchemaFromDataset(berlinBars), + 'netflix.movies': await getSampleAndSchemaFromDataset(netflixMovies), + 'netflix.comments': await getSampleAndSchemaFromDataset(netflixComments), + 'nyc.parking': await getSampleAndSchemaFromDataset(nycParking), + }; +} + +export async function generateGenAiEvalCases() { + const datasetSamples = await getDatasets(); + const usecases = [ + ...findQueries.map((x) => ({ ...x, type: 'find' as const })), + ...aggregateQueries.map((x) => ({ ...x, type: 'aggregate' as const })), + ]; + + return usecases.map( + ({ namespace, expectedOutput, userInput, name, type }) => { + const { database: databaseName, collection: collectionName } = + toNS(namespace); + const { sampleDocuments, schema } = datasetSamples[namespace] ?? { + sampleDocuments: [], + schema: {}, + }; + const buildPromptData = { + userInput, + sampleDocuments, + schema, + collectionName, + databaseName, + enableStorage: false, + requestId: new UUID().toString(), + userId: 'compass-eval-tests-user', + }; + const { + metadata: { instructions }, + prompt, + } = + type === 'find' + ? buildFindQueryPrompt(buildPromptData) + : buildAggregateQueryPrompt(buildPromptData); + return { + name, + tags: [name, databaseName, collectionName, type], + input: { + messages: [ + { + role: 'user' as const, + content: prompt, + }, + ], + instructions: { + content: instructions, + }, + }, + expected: { + messages: [ + { + content: expectedOutput, + }, + ], + }, + }; + } + ); +} diff --git a/packages/compass-generative-ai/tests/evals/utils.ts b/packages/compass-generative-ai/tests/evals/utils.ts new file mode 100644 index 00000000000..2ace1cdf5ed --- /dev/null +++ b/packages/compass-generative-ai/tests/evals/utils.ts @@ -0,0 +1,36 @@ +import { getSimplifiedSchema } from 'mongodb-schema'; +import type { Message } from './types'; +import { EJSON } from 'bson'; + +export function allText(messages: Message[]): string { + return messages.map((m) => m.content).join('\n'); +} + +export function sampleItems(arr: T[], k: number): T[] { + if (k > arr.length) { + throw new Error('Sample size cannot be greater than array length'); + } + + const result: T[] = []; + const indices = new Set(); + + while (result.length < k) { + const randomIndex = Math.floor(Math.random() * arr.length); + if (!indices.has(randomIndex)) { + indices.add(randomIndex); + result.push(arr[randomIndex]); + } + } + return result; +} + +export async function getSampleAndSchemaFromDataset( + dataset: unknown[], + sampleSize = 2 +): Promise<{ sampleDocuments: any[]; schema: any }> { + const documents = sampleItems(dataset, Math.min(sampleSize, dataset.length)); + // BSON list + const sampleDocuments = EJSON.parse(JSON.stringify(documents)); + const schema = await getSimplifiedSchema(sampleDocuments); + return { sampleDocuments, schema }; +}