diff lexmapr/pipeline_resources.py @ 3:be95a7ce968a tip

"planemo upload"
author kkonganti
date Tue, 13 Sep 2022 11:32:24 -0400
parents 5244e7465767
children
line wrap: on
line diff
--- a/lexmapr/pipeline_resources.py	Wed Aug 31 14:32:14 2022 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-"""Cache, generate and load resources"""
-
-import csv, json, os, sys
-from nltk import word_tokenize
-from itertools import permutations
-from collections import OrderedDict
-from lexmapr.definitions import ROOT
-from lexmapr.pipeline_helpers import punctuation_treatment
-
-
-def _create_lookup_table_skeleton():
-    '''Generate an empty lookup table'''
-    return({'abbreviations':{}, 'inflection_exceptions':{}, 'non_english_words':{},
-            'spelling_mistakes':{}, 'stop_words':{}, 'suffixes':{}})
-
-
-def _get_resource_dict(resource_file_name):
-    '''Get dictionary of resources from CSV file'''
-    ret_dic = {}
-    with open(os.path.join(ROOT, 'predefined_resources', resource_file_name)) as RES_file:
-        next(RES_file)
-        for row in csv.reader(RES_file, delimiter=','):
-            try:
-                ret_dic[punctuation_treatment(row[0].strip())] = \
-                    punctuation_treatment(row[1].strip())
-            except IndexError:
-                ret_dic[punctuation_treatment(row[0].strip())] = ''
-    return(ret_dic)
-
-
-def _add_predefined_resources_to_lookup_table(lookup_table):
-    '''Adds elements from lexmapr/predefined_resources to lookup table'''
-    lookup_table['abbreviations'] = _get_resource_dict('abbrv.csv')
-    lookup_table['inflection_exceptions'] = _get_resource_dict('inflection_exceptions.csv')
-    lookup_table['non_english_words'] = _get_resource_dict('nengwords.csv')
-    lookup_table['spelling_mistakes'] = _get_resource_dict('ScorLex.csv') #only from v 0.7
-    lookup_table['spelling_mistakes'].update(_get_resource_dict('misspellings.csv'))
-    lookup_table['stop_words'] = _get_resource_dict('mining_stopwords.csv')
-    lookup_table['suffixes'] = _get_resource_dict('suffixes.csv')
-    return(lookup_table)
-
-
-def get_predefined_resources():
-    '''Creates lookup table'''
-    lookup_table_path = os.path.join(ROOT, 'predefined_resources', 'lookup_table.json')
-    if os.path.exists(lookup_table_path):
-        with open(lookup_table_path) as LT_file:
-            lookup_table = json.load(LT_file)
-    else:
-        lookup_table = _create_lookup_table_skeleton()
-        lookup_table = _add_predefined_resources_to_lookup_table(lookup_table)
-        with open(lookup_table_path, 'w') as LT_file:
-            json.dump(lookup_table, LT_file)
-    return(lookup_table)
-
-
-def get_resource_label_permutations(resource_label):
-    '''Get permutations of some term'''
-    permutations_set = list(OrderedDict.fromkeys(permutations(resource_label.split())))
-    ret_list = []
-    for permutation_tuple in permutations_set:
-        ret_list.append(' '.join(permutation_tuple))
-    return(ret_list)