relevanceai.utils.migration.mongo_to_relevance_ai
#
Migrate from mongo database to Relevance Ai:
from relevanceai.api.batch import MongoImporter
# Create an object of MongoImporter class
connection_string= "..."
project= "..."
api_key= "..."
mongo_importer = MongoImporter(connection_string, project, api_key)
# Get a summary of the mondo database using "mongo_summary"
mongo_importer.mongo_summary()
# Set the desired source mongo collection using "set_mongo_collection"
db_name = '...'
collection_name = '...'
mongo_importer.set_mongo_collection(db_name, dataset_id)
# Get total number of entries in the mongo collection using "mongo_document_count"
document_count = mongo_importer.mongo_document_count()
# Migrate data from mongo to Relevance AI using "migrate_mongo2relevance_ai"
chunk_size = 5000 # migrate batches of 5000 (default 2000)
start_idx= 12000 # loads from mongo starting at index 12000 (default 0)
dataset_id = "..." # dataset id in the Relevance Ai platform
mongo_importer.migrate(
dataset_id, document_count, chunk_size=chunk_size,
start_idx=start_idx)
Module Contents#
- relevanceai.utils.migration.mongo_to_relevance_ai.PYMONGO_AVAILABLE = True#
- relevanceai.utils.migration.mongo_to_relevance_ai.BSON_AVAILABLE = True#
- class relevanceai.utils.migration.mongo_to_relevance_ai.MongoImporter(connection_string: str)#
Batch API client
- mongo_summary(self)#
returns a dictionary {key:value} key = db names value = collection names in each db
- get_mongo_db(self, db_name: str)#
- get_mongo_collection(self, db_name: str, collection_name: str)#
- set_mongo_db(self, db_name: str)#
- set_mongo_collection(self, db_name: str, collection_name: str)#
- mongo_document_count(self)#
- create_relevance_ai_dataset(self, dataset_id: str)#
- update_id(self, documents: List[dict])#
- static parse_json(data)#
- static flatten_inner_indxs(documents: List[dict])#
- static remove_nan(documents: List[dict], replace_with: str = '')#
- static build_range(document_count: int, chunk_size: int = 2000, start_idx: int = 0)#
- fetch_mongo_collection_data(self, start_idx: int = None, end_idx: int = None)#
- migrate(self, dataset_id: str, document_count: int, chunk_size: int = 2000, start_idx: int = 0, overwite: bool = False)#
Migrate your MongoDB dataset ID.
- Parameters
dataset_id (str) – Name of your dataset
document_count (int) – The number of documents in your collection
chunk_size (int) – The number of chunks
start_idx (int) – The start index in case it breaks
overwrite (bool) – If True, then the dataset ID in Relevance AI will be overwritten