relevanceai.utils.migration.mongo_to_relevance_ai#

Migrate from mongo database to Relevance Ai:

from relevanceai.api.batch import MongoImporter

# Create an object of MongoImporter class
connection_string= "..."
project= "..."
api_key= "..."
mongo_importer = MongoImporter(connection_string, project, api_key)

# Get a summary of the mondo database using "mongo_summary"
mongo_importer.mongo_summary()

# Set the desired source mongo collection using "set_mongo_collection"
db_name = '...'
collection_name = '...'
mongo_importer.set_mongo_collection(db_name, dataset_id)

# Get total number of entries in the mongo collection using "mongo_document_count"
document_count = mongo_importer.mongo_document_count()

# Migrate data from mongo to Relevance AI using "migrate_mongo2relevance_ai"
chunk_size = 5000      # migrate batches of 5000 (default 2000)
start_idx= 12000       # loads from mongo starting at index 12000 (default 0)
dataset_id = "..."     # dataset id in the Relevance Ai platform
mongo_importer.migrate(
    dataset_id, document_count, chunk_size=chunk_size,
    start_idx=start_idx)

Module Contents#

relevanceai.utils.migration.mongo_to_relevance_ai.PYMONGO_AVAILABLE = True#
relevanceai.utils.migration.mongo_to_relevance_ai.BSON_AVAILABLE = True#
class relevanceai.utils.migration.mongo_to_relevance_ai.MongoImporter(connection_string: str)#

Batch API client

mongo_summary(self)#

returns a dictionary {key:value} key = db names value = collection names in each db

get_mongo_db(self, db_name: str)#
get_mongo_collection(self, db_name: str, collection_name: str)#
set_mongo_db(self, db_name: str)#
set_mongo_collection(self, db_name: str, collection_name: str)#
mongo_document_count(self)#
create_relevance_ai_dataset(self, dataset_id: str)#
update_id(self, documents: List[dict])#
static parse_json(data)#
static flatten_inner_indxs(documents: List[dict])#
static remove_nan(documents: List[dict], replace_with: str = '')#
static build_range(document_count: int, chunk_size: int = 2000, start_idx: int = 0)#
fetch_mongo_collection_data(self, start_idx: int = None, end_idx: int = None)#
migrate(self, dataset_id: str, document_count: int, chunk_size: int = 2000, start_idx: int = 0, overwite: bool = False)#

Migrate your MongoDB dataset ID.

Parameters
  • dataset_id (str) – Name of your dataset

  • document_count (int) – The number of documents in your collection

  • chunk_size (int) – The number of chunks

  • start_idx (int) – The start index in case it breaks

  • overwrite (bool) – If True, then the dataset ID in Relevance AI will be overwritten