diff --git a/.gitignore b/.gitignore
index cce8b2a..9b62224 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,3 @@ data/
word_embeddings/
*.txt
*.csv
-
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..0e40fe8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+
+# Default ignored files
+/workspace.xml
\ No newline at end of file
diff --git a/.idea/RelEx-relex_CNN.iml b/.idea/RelEx-relex_CNN.iml
new file mode 100644
index 0000000..7c9d48f
--- /dev/null
+++ b/.idea/RelEx-relex_CNN.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..041ee02
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..a9411ab
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Command Line Guide b/Command Line Guide
new file mode 100644
index 0000000..8cce458
--- /dev/null
+++ b/Command Line Guide
@@ -0,0 +1,67 @@
+Adding command line parameters:
+
+If you want to add command line parameters, in __main__.py use the argparser.add_argument() method
+
+I've already done this but if you were doing this from scratch first you would initialize an argparser object
+
+Example:
+ parser = argparse.ArgumentParser(prog="RelEx")
+ # prog is the name of the program - not really important
+
+
+
+
+After initializing an ArgumentParser object, call add_argument to add desired parameters.
+
+Example:
+ # say you wanted to add a flag for crf output layer
+ # you could do this
+ parser.add_argument("--crf")
+ # The string passed here is the flag that users would type when running the program
+
+ >> python relex --crf TRUE
+ # The value of crf here would be "TRUE"
+ # Format should be two hyphens followed by name
+
+
+
+It's good practice to also add help text for users. Give a short description of what the flag is for.
+This will show up to users when they run the program with the -h or --help flag
+
+Example:
+ parser.add_argument("--crf", help="Use CRF output layer")
+
+
+
+
+If you want to have a default value, just add a default parameter
+
+Example:
+ parser.add_argument("--crf", help="Use CRF output layer", default="FALSE")
+
+ >>python relex
+ # The value of crf here would be "FALSE"
+
+
+
+Finally, if you want to make a parameter required:
+
+ parser.add_argument("--crf", help="Use CRF output layer", default="FALSE", required=True)
+ # set required=True. By default all command line parameters are not required.
+
+
+
+Do this for all the parameters you'd like to have and then parse them using ArgumentParser.parse_args()
+I have done this in the code already, but if you were to recreate this following the example used thus far,
+it would look like:
+
+ args = parser.parse_args()
+ # I believe parse_args() returns some kind of object where all the command line parameters are stored
+
+
+Now to access the parameters: args.nameOfParameter
+The name of the parameter is the string of text that comes after the two hyphens.
+So to access "--crf" which we've been using as an example:
+
+ args.crf
+ # The value of crf commandline parameter
diff --git a/README.md b/README.md
index c0d3f95..11fec3e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# RelEx
+ # RelEx
RelEx is a clinical **Rel**ation **Ex**traction Framework to identify relations between two entities. The framework is divided into two main components: co-location based (rule based) Relation Extraction and deep learning based (CNN) Relation Extraction.
diff --git a/relex/RelEx_NN/__init__.pyc b/relex/RelEx_NN/__init__.pyc
new file mode 100644
index 0000000..01f97b7
Binary files /dev/null and b/relex/RelEx_NN/__init__.pyc differ
diff --git a/relex/RelEx_NN/__pycache__/__init__.cpython-36.pyc b/relex/RelEx_NN/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000..480eaf6
Binary files /dev/null and b/relex/RelEx_NN/__pycache__/__init__.cpython-36.pyc differ
diff --git a/relex/RelEx_NN/cnn/__pycache__/__init__.cpython-36.pyc b/relex/RelEx_NN/cnn/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 3c0a044..0000000
Binary files a/relex/RelEx_NN/cnn/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/cnn/__pycache__/segment_cnn.cpython-36.pyc b/relex/RelEx_NN/cnn/__pycache__/segment_cnn.cpython-36.pyc
deleted file mode 100644
index 4347f31..0000000
Binary files a/relex/RelEx_NN/cnn/__pycache__/segment_cnn.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/cnn/sentence_cnn.py b/relex/RelEx_NN/cnn/sentence_cnn.py
index c0956f6..8c96853 100644
--- a/relex/RelEx_NN/cnn/sentence_cnn.py
+++ b/relex/RelEx_NN/cnn/sentence_cnn.py
@@ -9,6 +9,7 @@
from sklearn.utils import class_weight
from utils import file, normalization
import numpy as np
+import tensorflow as tf
class Sentence_CNN:
@@ -62,16 +63,24 @@ def define_model(self, no_classes):
model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics)
else:
-
- input_shape = Input(shape=(self.data_model.maxlen,))
- embedding = Embedding(self.data_model.common_words, self.embedding.embedding_dim)(input_shape)
-
- if self.embedding:
- embedding = Embedding(self.data_model.common_words, self.embedding.embedding_dim,
- weights=[self.embedding.embedding_matrix], trainable=False)(input_shape)
+ elmo=False
+
+ if(self.sentences!=None):
+ elmo=True;
+
+ if(elmo==False):
+ input_shape = Input(shape=(self.data_model.maxlen,))
+ embedding = Embedding(self.data_model.common_words, self.embedding.embedding_dim)(input_shape)
+
+ if self.embedding:
+ embedding = Embedding(self.data_model.common_words, self.embedding.embedding_dim,
+ weights=[self.embedding.embedding_matrix], trainable=False)(input_shape)
+ else:
+ input_shape = Input(shape=(1,),dtype=tf.string)
+ embedding = ElmoEmbeddingLayer()(input_shape)
+
conv1 = Conv1D(filters=self.filters, kernel_size=self.filter_conv, activation=self.activation)(embedding)
pool1 = MaxPooling1D(pool_size=self.filter_maxPool)(conv1)
-
conv2 = Conv1D(filters=self.filters, kernel_size=self.filter_conv, activation=self.activation)(pool1)
drop = Dropout(self.drop_out)(conv2)
@@ -79,12 +88,11 @@ def define_model(self, no_classes):
dense1 = Dense(self.filters, activation=self.activation)(flat)
outputs = Dense(no_classes, activation=self.output_activation)(dense1)
- model = Model(inputs=input_shape, outputs=outputs)
+ model = Model(inputs=[input_shape], outputs=outputs)
model.compile(loss=self.loss, optimizer=self.optimizer, metrics=self.metrics)
# summarize
print(model.summary())
-
return model
def fit_Model(self, model, x_train, y_train, validation=None):
@@ -100,8 +108,10 @@ def fit_Model(self, model, x_train, y_train, validation=None):
# weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
# history = model.fit(x_train, y_train, epochs=self.epochs, batch_size=self.batch_size, validation_data=validation, class_weight=weights)
+
history = model.fit(x_train, y_train, epochs=self.epochs,
- batch_size=self.batch_size, validation_data=validation)
+ batch_size=self.batch_size, validation_data=validation)
+
# loss = history.history['loss']
# acc = history.history['acc']
@@ -216,7 +226,26 @@ def cross_validate(self, num_folds=5):
"""
X_data = self.data_model.train
Y_data = self.data_model.train_label
+ binary_Y = self.data_model.binarize_labels(Y_data, True)
+ labelfile = open("binarylabel","w")
+ for thing in binary_Y:
+ for t in thing:
+ labelfile.write(str(t))
+ labelfile.write(" ")
+ labelfile.write("\n")
+ labelfile.close()
+ exit()
+ cv_model = self.define_model(len(self.data_model.encoder.classes_))
+ for i in range(0,len(self.sentences)):
+ if(len(self.sentences[i].split())>100):
+ self.sentences[i]=" ".join(self.sentences[i].split()[:100])
+ sentences=np.array(self.sentences, dtype=object)[:, np.newaxis]
+ print(sentences.shape)
+ print(X_data.shape)
+ print(binary_Y.shape)
+ cv_model.fit(sentences, binary_Y, epochs=self.epochs, batch_size=self.batch_size)
+ """
if num_folds <= 1: raise ValueError("Number of folds for cross validation must be greater than 1")
assert X_data is not None and Y_data is not None, \
@@ -224,7 +253,6 @@ def cross_validate(self, num_folds=5):
skf = StratifiedKFold(n_splits=num_folds, shuffle=True)
skf.get_n_splits(X_data, Y_data)
-
evaluation_statistics = {}
fold = 1
@@ -292,3 +320,5 @@ def cross_validate(self, num_folds=5):
#print results using MedaCy evaluation (similar to the sklearn evaluation above)
print("---------------------medacy Results --------------------------------")
evaluate.cv_evaluation(labels, evaluation_statistics)
+
+ """
diff --git a/relex/RelEx_NN/embeddings/__pycache__/__init__.cpython-36.pyc b/relex/RelEx_NN/embeddings/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 380b888..0000000
Binary files a/relex/RelEx_NN/embeddings/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/embeddings/__pycache__/elmo_embeddings.cpython-36.pyc b/relex/RelEx_NN/embeddings/__pycache__/elmo_embeddings.cpython-36.pyc
new file mode 100644
index 0000000..e41a050
Binary files /dev/null and b/relex/RelEx_NN/embeddings/__pycache__/elmo_embeddings.cpython-36.pyc differ
diff --git a/relex/RelEx_NN/embeddings/__pycache__/embeddings.cpython-36.pyc b/relex/RelEx_NN/embeddings/__pycache__/embeddings.cpython-36.pyc
deleted file mode 100644
index f58eab0..0000000
Binary files a/relex/RelEx_NN/embeddings/__pycache__/embeddings.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/embeddings/elmo_embeddings.py b/relex/RelEx_NN/embeddings/elmo_embeddings.py
new file mode 100644
index 0000000..f97c833
--- /dev/null
+++ b/relex/RelEx_NN/embeddings/elmo_embeddings.py
@@ -0,0 +1,27 @@
+import tensorflow as tf
+import tensorflow_hub as hub
+from keras import backend as K
+from keras.engine import Layer
+
+class ElmoEmbeddingLayer(Layer):
+ def __init__(self, **kwargs):
+ self.dimensions = 1024
+ self.trainable=False
+ super(ElmoEmbeddingLayer, self).__init__(**kwargs)
+
+ def build(self, input_shape):
+ self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable, name="{}_module".format(self.name))
+ self.trainable_weights += tf.trainable_variables(scope="^{}_module/.*".format(self.name))
+ super(ElmoEmbeddingLayer, self).build(input_shape)
+
+ def call(self, x, mask=None):
+ result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),as_dict=True,signature='default',)['default']
+ result=tf.expand_dims(result,axis=0)
+ print(result.shape)
+ return result
+
+ # def compute_mask(self, inputs, mask=None):
+ # return K.not_equal(inputs, '--PAD--')
+
+ def compute_output_shape(self, input_shape):
+ return (input_shape[0],100, self.dimensions)
diff --git a/relex/RelEx_NN/model/__init__.pyc b/relex/RelEx_NN/model/__init__.pyc
new file mode 100644
index 0000000..f30eb04
Binary files /dev/null and b/relex/RelEx_NN/model/__init__.pyc differ
diff --git a/relex/RelEx_NN/model/__pycache__/__init__.cpython-36.pyc b/relex/RelEx_NN/model/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 512645e..0000000
Binary files a/relex/RelEx_NN/model/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/model/__pycache__/evalaute.cpython-36.pyc b/relex/RelEx_NN/model/__pycache__/evalaute.cpython-36.pyc
new file mode 100644
index 0000000..f7d672d
Binary files /dev/null and b/relex/RelEx_NN/model/__pycache__/evalaute.cpython-36.pyc differ
diff --git a/relex/RelEx_NN/model/__pycache__/evaluate.cpython-36.pyc b/relex/RelEx_NN/model/__pycache__/evaluate.cpython-36.pyc
deleted file mode 100644
index 3463197..0000000
Binary files a/relex/RelEx_NN/model/__pycache__/evaluate.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/model/__pycache__/model.cpython-36.pyc b/relex/RelEx_NN/model/__pycache__/model.cpython-36.pyc
deleted file mode 100644
index ae55119..0000000
Binary files a/relex/RelEx_NN/model/__pycache__/model.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/model/model.py b/relex/RelEx_NN/model/model.py
index bdb3119..cc7a46f 100644
--- a/relex/RelEx_NN/model/model.py
+++ b/relex/RelEx_NN/model/model.py
@@ -28,6 +28,7 @@ def create_validation_data(train_data, train_label, num_data=1000):
return x_train, x_val, y_train, y_val
+
def reduce_duplicate_data(train_data, train_labels):
"""
Reads the data into one dataframe. Removes the duplicated data and merges the respective labels. Also drops the
@@ -87,7 +88,6 @@ def __init__(self, data_object, data_object_test=None, segment=False, test=False
:param one_hot: Flag to be set to create one-hot vectors (default-False)
:param common_words: Number of words to consider as features (default = 10000)
:param maxlen: maximum length of the vector (default = 100)
-
"""
self.de_sample = de_sample
self.write_Predictions = write_Predictions
@@ -164,17 +164,17 @@ def __init__(self, data_object, data_object_test=None, segment=False, test=False
self.train_track = train_track
if self.test:
self.train, self.x_test, self.word_index = self.vectorize_words(train_data, test_data)
- self.train_onehot, self.x_test_onehot, self.token_index = self.one_hot_encoding(train_data, test_data)
+ #self.train_onehot, self.x_test_onehot, self.token_index = self.one_hot_encoding(train_data, test_data)
self.y_test = test_labels
self.test_track = test_track
else:
- self.train_onehot, self.token_index = self.one_hot_encoding(train_data, test_data)
+ #self.train_onehot, self.token_index = self.one_hot_encoding(train_data, test_data)
self.train, self.word_index = self.vectorize_words(train_data, test_data)
# divides train data into partial train and validation data
self.x_train, self.x_val, self.y_train, self.y_val = create_validation_data(self.train, self.train_label)
- self.x_train_onehot, self.x_val_onehot, self.y_train, self.y_val = create_validation_data(self.train_onehot,
- self.train_label)
+ # self.x_train_onehot, self.x_val_onehot, self.y_train, self.y_val = create_validation_data(self.train_onehot,
+ # self.train_label)
if self.segment:
train_preceding = data_object['seg_preceding']
train_middle = data_object['seg_middle']
diff --git a/relex/RelEx_NN/model/model.pyc b/relex/RelEx_NN/model/model.pyc
new file mode 100644
index 0000000..95b2bc0
Binary files /dev/null and b/relex/RelEx_NN/model/model.pyc differ
diff --git a/relex/RelEx_NN/model/temp.py b/relex/RelEx_NN/model/temp.py
new file mode 100644
index 0000000..a2fafd6
--- /dev/null
+++ b/relex/RelEx_NN/model/temp.py
@@ -0,0 +1,106 @@
+import os
+import pandas as pd
+from keras.preprocessing.text import Tokenizer
+from keras.preprocessing.sequence import pad_sequences
+from sklearn.preprocessing import MultiLabelBinarizer
+from sklearn.model_selection import train_test_split
+
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Embedding, Flatten, GlobalMaxPool1D, Dropout, Conv1D
+from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
+from keras.losses import binary_crossentropy
+from keras.optimizers import Adam
+
+def read_from_file(file):
+ """
+ Reads external files and insert the content to a list. It also removes whitespace
+ characters like `\n` at the end of each line
+
+ :param file: name of the input file.
+ :return : content of the file in list format
+ """
+ if not os.path.isfile(file):
+ raise FileNotFoundError("Not a valid file path")
+
+ with open(file) as f:
+ content = f.readlines()
+ content = [x.strip() for x in content]
+
+ return content
+
+
+def get_features(text_series):
+ """
+ transforms text data to feature_vectors that can be used in the ml model.
+ tokenizer must be available.
+ """
+ sequences = tokenizer.texts_to_sequences(text_series)
+ return pad_sequences(sequences, maxlen=maxlen)
+
+
+def prediction_to_label(prediction):
+ tag_prob = [(labels[i], prob) for i, prob in enumerate(prediction.tolist())]
+ return dict(sorted(tag_prob, key=lambda kv: kv[1], reverse=True))
+
+train_data = read_from_file("../../../data/segments/sentence_train")
+train_labels = read_from_file("../../../data/segments/labels_train")
+df_data = pd.DataFrame(train_data)
+df_data.columns = ['sentence']
+df_label = pd.DataFrame(train_labels)
+df_label.columns = ['label']
+
+df_data.reset_index(drop=True, inplace=True)
+df_label.reset_index(drop=True, inplace=True)
+df_new= pd.concat((df_data, df_label),axis=1)
+# print(df_new)
+# df_data["label"]=df_label.label
+df_new.drop_duplicates(inplace=True)
+#
+df= df_new.groupby('sentence').agg({'label':lambda x: ','.join(x)})
+print(df)
+df.reset_index( inplace=True)
+print(df)
+
+df['label']= df['label'].str.split(",")
+print(df.columns)
+df.columns = ['sentence', 'label']
+print(df)
+multilabel_binarizer = MultiLabelBinarizer()
+multilabel_binarizer.fit(df.label)
+labels = multilabel_binarizer.classes_
+print(len(labels))
+
+maxlen = 180
+max_words = 5000
+tokenizer = Tokenizer(num_words=max_words, lower=True)
+tokenizer.fit_on_texts(df.sentence)
+# print(df.label)
+x = get_features(df.sentence)
+y = multilabel_binarizer.transform(df.label)
+print(y)
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=9000)
+
+filter_length = 300
+
+# model = Sequential()
+# model.add(Embedding(max_words, 20, input_length=maxlen))
+# model.add(Dropout(0.1))
+# model.add(Conv1D(filter_length, 3, padding='valid', activation='relu', strides=1))
+# model.add(GlobalMaxPool1D())
+# model.add(Dense(num_classes))
+# model.add(Activation('sigmoid'))
+#
+# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
+# model.summary()
+#
+# callbacks = [
+# ReduceLROnPlateau(),
+# EarlyStopping(patience=4),
+# ModelCheckpoint(filepath='model-conv1d.h5', save_best_only=True)
+# ]
+#
+# history = model.fit(x_train, y_train,
+# epochs=20,
+# batch_size=32,
+# validation_split=0.1,
+# callbacks=callbacks)
\ No newline at end of file
diff --git a/relex/RelEx_NN/nn/__pycache__/__init__.cpython-36.pyc b/relex/RelEx_NN/nn/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 94485f2..0000000
Binary files a/relex/RelEx_NN/nn/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/relex/RelEx_NN/nn/__pycache__/simple_NN.cpython-36.pyc b/relex/RelEx_NN/nn/__pycache__/simple_NN.cpython-36.pyc
deleted file mode 100644
index 327aa6c..0000000
Binary files a/relex/RelEx_NN/nn/__pycache__/simple_NN.cpython-36.pyc and /dev/null differ
diff --git a/relex/__main__.py b/relex/__main__.py
new file mode 100644
index 0000000..324b345
--- /dev/null
+++ b/relex/__main__.py
@@ -0,0 +1,67 @@
+from RelEx_NN.model import Model
+from RelEx_NN.embeddings import Embeddings
+from RelEx_NN.cnn import Segment_CNN
+from RelEx_NN.cnn import Sentence_CNN
+import numpy
+import argparse
+import tensorflow_hub as hu
+import tensorflow as tf
+
+# embedding_path = "home/neha/Documents/mimic3_d200.txt" model = Model(True, False) embedding=Embeddings(
+# embedding_path, model) seg_cnn = Segment_CNN(model, embedding) seg_cnn.cross_validate(preceding,
+# middle, succeeding, concept1, concept2, train_label)
+from torch import hub
+
+
+
+parser = argparse.ArgumentParser(prog="RelEx")
+# parser.add_argument("-d", "--data",help="provide directory location for data")
+parser.add_argument("--sentrain", help="provide path to sentence training data",required=True)
+parser.add_argument("--labeltrain", help="provide path to label training data",required=True)
+parser.add_argument("--sentest", help="provide path to sentence testing data (if applicable)")
+parser.add_argument("--labeltest", help="provide path to label testing data (if applicable)")
+parser.add_argument("--embedding", help="provide path to embedding file", required=True) # might want to make this more specific
+parser.add_argument("--segment", help="activate segment-cnn", default="TRUE")
+parser.add_argument("--test", help="validate model on test dataset", default="FALSE")
+parser.add_argument("--multilabel", help="insert help text here", default="TRUE")
+parser.add_argument("--onehot", help="create one-hot vectors", default="FALSE")
+parser.add_argument("--commonwords", help="number of words to consider as features", default="10000")
+parser.add_argument("--maxlen", help="maximum length of vector", default="100")
+parser.add_argument("--elmo", help="use elmo embeddings", default="FALSE")
+args = parser.parse_args()
+segment = args.segment == "TRUE"
+test = args.test == "TRUE"
+multilabel = args.multilabel == "TRUE"
+onehot = args.onehot == "TRUE"
+elmo = args.elmo == "TRUE"
+model=None
+
+model = Model(sentrain=args.sentrain, sentest=args.sentest, labeltest=args.labeltest, labeltrain=args.labeltrain,
+ segment=segment, test=test, multilabel=multilabel, one_hot=onehot, common_words=args.commonwords,
+ maxlen=args.maxlen)
+
+train_preceding = open("/home/neha/Desktop/P_Tr/preceding_seg").read()
+train_middle = open("/home/neha/Desktop/P_Tr/middle_seg").read()
+train_succeeding = open("/home/neha/Desktop/P_Tr/succeeding_seg").read()
+train_concept1 = open("/home/neha/Desktop/P_Tr/concept1_seg").read()
+train_concept2 = open("/home/neha/Desktop/P_Tr/concept2_seg").read()
+train_sent = open("/home/neha/Desktop/P_Tr/sentence_train").read()
+sents = train_sent.split("\n")
+print(len(sents))
+
+
+#print(args.sentrain)
+
+
+
+
+embedding_path = args.embedding
+#
+embedding=Embeddings(embedding_path, model)
+if elmo:
+ seg_cnn = Sentence_CNN(model, elmo=args.sentrain, cross_validation=True, embedding=embedding)
+else:
+ seg_cnn = Sentence_CNN(model, cross_validation=True, embedding=embedding)
+
+
+
diff --git a/relex/__pycache__/__init__.cpython-36.pyc b/relex/__pycache__/__init__.cpython-36.pyc
new file mode 100644
index 0000000..8a8943c
Binary files /dev/null and b/relex/__pycache__/__init__.cpython-36.pyc differ
diff --git a/relex/__pycache__/__main__.cpython-36.pyc b/relex/__pycache__/__main__.cpython-36.pyc
new file mode 100644
index 0000000..3d1de8c
Binary files /dev/null and b/relex/__pycache__/__main__.cpython-36.pyc differ
diff --git a/relex/charlie.py b/relex/charlie.py
new file mode 100644
index 0000000..d975222
--- /dev/null
+++ b/relex/charlie.py
@@ -0,0 +1,13 @@
+from RelEx_NN.model import Model
+from segment import Set_Connection
+from RelEx_NN.cnn import Sentence_CNN
+from RelEx_NN.embeddings import Embeddings
+
+data = Set_Connection(CSV=True, sentence_only = True, sentences='../data/segments/sentence_train', labels='../data/segments/labels_train').data_object
+#data = Set_Connection(CSV=True, sentences='../data/segments/sentence_train', labels='../data/segments/labels_train', preceding_segs='../data/segments/preceding_seg', concept1_segs='../data/segments/concept1_seg',middle_segs='../data/segments/middle_seg',concept2_segs='../data/segments/concept2_seg', succeeding_segs='../data/segments/succeeding_seg' ).data_object
+sentences = data["sentence"]
+model = Model(data, segment=False, test=False, multilabel=False, one_hot=False)
+embedding = Embeddings("../word_embeddings/mimic3_d200.txt", model, embedding_dim=200)
+sent_cnn = Sentence_CNN(model, embedding, cross_validation=True,sentences=sentences,filters=300, drop_out=0.1, filter_conv=3, optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
+
+
diff --git a/relex/data/__pycache__/__init__.cpython-36.pyc b/relex/data/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index eae58c5..0000000
Binary files a/relex/data/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/relex/data/__pycache__/annotation.cpython-36.pyc b/relex/data/__pycache__/annotation.cpython-36.pyc
deleted file mode 100644
index 4d9d310..0000000
Binary files a/relex/data/__pycache__/annotation.cpython-36.pyc and /dev/null differ
diff --git a/relex/data/__pycache__/dataset.cpython-36.pyc b/relex/data/__pycache__/dataset.cpython-36.pyc
deleted file mode 100644
index 1a6cb44..0000000
Binary files a/relex/data/__pycache__/dataset.cpython-36.pyc and /dev/null differ
diff --git a/relex/data/dataset.py b/relex/data/dataset.py
index 1b34f55..53ed2f3 100644
--- a/relex/data/dataset.py
+++ b/relex/data/dataset.py
@@ -46,4 +46,4 @@ def __next__(self):
except IndexError:
raise StopIteration()
self.index += 1
- return word
\ No newline at end of file
+ return word
diff --git a/relex/exec.py b/relex/exec.py
new file mode 100644
index 0000000..16ef2c6
--- /dev/null
+++ b/relex/exec.py
@@ -0,0 +1,17 @@
+from segment import Segmentation
+sample_train = '/home/mahendrand/NLP/RelEx/data/sample_train'
+# sample_test = Dataset('/home/samantha/Desktop/Research/Data/i2b2/sample_test')
+# training_dataset = Dataset('/home/samantha/Desktop/Research/Data/i2b2/train_data')
+# testing_dataset = Dataset('/home/samantha/Desktop/Research/Data/i2b2/test_data')
+
+seg_sampleTrain = Segmentation(sample_train)
+# sample_sentTrain = seg_sampleTrain.segments['sentence']
+# sample_labelTrain = seg_sampleTrain.segments['label']
+
+# seg_train = Segmentation(training_dataset)
+# print(len(seg_train.segments['sentence']))
+# print(len(seg_train.segments['label']))
+
+# seg_test = Segmentation(testing_dataset)
+# print(seg_test.segments['sentence'])
+# print(seg_test.segments['label'])
\ No newline at end of file
diff --git a/relex/experiments.py b/relex/experiments.py
index b733ef0..d1eba64 100644
--- a/relex/experiments.py
+++ b/relex/experiments.py
@@ -5,7 +5,7 @@
from RelEx_NN.nn import Simple_NN
from segment import Set_Connection
import sys
-
+import os
if sys.argv[1] == 'mimic':
if int(sys.argv[2]) == 200:
embedding_path = "../word_embeddings/mimic3_d200.txt"
@@ -47,4 +47,4 @@
sent_cnn = Sentence_CNN(model, embedding, True, filters=300, drop_out=0.1, filter_conv=3, optimizer='adam', loss='binary_crossentropy', metrics=['categorical_accuracy'])
# sent_cnn = Simple_NN(model, embedding, True)
-# seg_cnn.cross_validate(model.preceding, model.middle, model.succeeding, model.concept1, model.concept2, model.train_label)
\ No newline at end of file
+# seg_cnn.cross_validate(model.preceding, model.middle, model.succeeding, model.concept1, model.concept2, model.train_label)
diff --git a/relex/run.sh b/relex/run.sh
index fafea66..d11682f 100755
--- a/relex/run.sh
+++ b/relex/run.sh
@@ -27,4 +27,4 @@ python experiments.py chem 200 segment
#echo "glove 200 multi"
#python experiments.py glove 200 multi
#echo "glove 300 multi"
-#python experiments.py glove 300 multi
\ No newline at end of file
+#python experiments.py glove 300 multi
diff --git a/relex/run_test.sh b/relex/run_test.sh
index 83dfcfa..ba46aa2 100755
--- a/relex/run_test.sh
+++ b/relex/run_test.sh
@@ -21,4 +21,4 @@ python demo_test.py mimic 300 multi
echo "glove 200 multi"
python demo_test.py glove 200 multi
echo "glove 300 multi"
-python demo_test.py glove 300 multi
\ No newline at end of file
+python demo_test.py glove 300 multi
diff --git a/relex/segment/__init__.py b/relex/segment/__init__.py
index 6b4d5ad..7da82f3 100644
--- a/relex/segment/__init__.py
+++ b/relex/segment/__init__.py
@@ -1,2 +1,2 @@
from .segmentation import Segmentation
-from .set_connection import Set_Connection
\ No newline at end of file
+from .set_connection import Set_Connection
diff --git a/relex/segment/__pycache__/segmentation.cpython-36.pyc b/relex/segment/__pycache__/segmentation.cpython-36.pyc
deleted file mode 100644
index 84d6638..0000000
Binary files a/relex/segment/__pycache__/segmentation.cpython-36.pyc and /dev/null differ
diff --git a/relex/segment/__pycache__/set_connection.cpython-36.pyc b/relex/segment/__pycache__/set_connection.cpython-36.pyc
deleted file mode 100644
index 5a0f73d..0000000
Binary files a/relex/segment/__pycache__/set_connection.cpython-36.pyc and /dev/null differ
diff --git a/relex/utils/__pycache__/__init__.cpython-36.pyc b/relex/utils/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index a612344..0000000
Binary files a/relex/utils/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/relex/utils/__pycache__/file.cpython-36.pyc b/relex/utils/__pycache__/file.cpython-36.pyc
deleted file mode 100644
index 2ddddb9..0000000
Binary files a/relex/utils/__pycache__/file.cpython-36.pyc and /dev/null differ