relevanceai.operations.preprocessing.text.base_text_processing#

Module Contents#

class relevanceai.operations.preprocessing.text.base_text_processing.BaseTextProcessing#
static normalize_text(txt: str, lower: bool = False, remove_digit: bool = False, remove_punct: bool = False) str#
  • Lower-casing

  • Digit removal

  • Punctuation removal

static get_word_frequency(str_list: List[str], remove_stop_words: bool = True, additional_stop_words: Optional[List[str]] = None, language='english') List#

Returns a sorted word frequency in Python

class relevanceai.operations.preprocessing.text.base_text_processing.MLStripper#

Remove HTML from the code and retrieves data.

handle_data(self, d)#
get_data(self)#