我们从Python开源项目中,提取了以下31个代码示例,用于说明如何使用data.Data()。
def kmeans_numpy(d, headers, K, whiten=True): # assign to A the result of getting the data from your Data object A = d.get_data(headers) # assign to W the result of calling vq.whiten on A W = vq.whiten(A) # assign to codebook, bookerror the result of calling vq.kmeans with W and K codebook, bookerror = vq.kmeans(W, K) # assign to codes, error the result of calling vq.vq with W and the codebook codes, error = vq.vq(W, codebook) # return codebook, codes, and error return codebook, codes, error # prep the k-means clustering algorithm by getting initial cluster means
def normalize_columns_separately(data_obj, column_headers): final_columns = [] # print column_headers columns = data_obj.get_data(column_headers).transpose().tolist() for column in columns: temp_column = [] max_num = max(column) min_num = min(column) for number in column: number -= min_num number *= 1 / (max_num - min_num) temp_column.append(number) final_columns.append(temp_column) # print "Normalized matrix" # print np.matrix(final_columns).transpose() print "\n\n" return np.matrix(final_columns).transpose() # Takes in a list of column headers and the Data object and returns a matrix with each entry normalized so that the # minimum value (of all the data in this set of columns) # is mapped to zero and its maximum value is mapped to 1.
def body(self, master): tk.Checkbutton(master, text="Labels included", variable=self.labels_given).grid(row=0) self.menu = apply(tk.OptionMenu, (master, self.algorithm) + tuple(["Naive Bayes", "K-Nearest Neighbors"])) self.menu_label = tk.Label(master, text="Algorithm") self.menu_label.grid(row=1, column=0) self.menu.grid(row=1, column=1) self.training_data_button = tk.Button(master, text="Choose Training Data", command=self.handle_training_button) self.training_data_button.grid(row=2) self.testing_data_button = tk.Button(master, text="Choose Testing Data", command=self.handle_testing_button) self.testing_data_button.grid(row=3) self.training_label_button = tk.Button(master, text="Choose Training labels", command=self.handle_training_label_button) self.training_label_button.config(state="disabled") self.training_label_button.grid(row=4) self.testing_label_button = tk.Button(master, text="Choose Testing labels", command=self.handle_testing_label_button) self.testing_label_button.config(state="disabled") self.testing_label_button.grid(row=5)
def load_and_run(args, trainerClass): start_time = time.time() seed = int(args.get('--seed', 0)) trainer = load_trainer(args, trainerClass, Data, seed) train_batch_name = args.get('--train-batch', None) or "train" validation_batch_name = args.get('--validation-batch', None) test_batch_name = args.get('--test-batch', None) print_params = args.get('--print-params', False) or False print_loss_breakdown = args.get('--print-loss-breakdown', False) or False num_restarts = int(args.get('--num-restarts', 1)) for i in xrange(num_restarts): (params, discretized_params) = trainer.train(train_batch_name, validation_batch_name=validation_batch_name, test_batch_name=test_batch_name, print_params=print_params, print_final_loss_breakdown=print_loss_breakdown) if '--store-data' in args and args['--store-data'] is not None: store_results_to_hdf5(args['--store-data'], trainer, train_batch_name, restart_idx=i) print ("Training stopped after %2.fs." % (time.time() - start_time))
def checkIsFloat(self,x1,x2): isFloat=False if(isinstance(x1,Data)): isFloat=(x1.type.type=='double') elif(isinstance(x1,str)): isFloat=(x1=='st') elif(isinstance(x1,float)): isFloat=True else: pass if isFloat: return isFloat if(isinstance(x2,Data)): isFloat=(x2.type.type=='double') elif(isinstance(x2,str)): isFloat=(x2=='st') elif(isinstance(x2,float)): isFloat=True else: pass return isFloat
def call(self,func,parameters=None): ''' @funcName: function @parameters: a dict like{parameter1 name: type, parameter2 ...} ''' # for vName in self.currentMap: # if(self.currentMap[vName]['reg']=='eax'): # self.currentMap[vName]['reg']=0 # self.registers['eax']=0 # self.gen.asm.append('\tmov '+self.currentMap[vName]['addr']+', eax\n') self.callOffset=0 if(isinstance(func,Data)): if(func.type.type=='function' and func.type.pointer_count()>0): self.gen.asm.append('\tcall '+self.currentMap[func.name]["addr"]+'\n') else: self.gen.asm.append('\tcall '+func.name+'\n') else: self.gen.asm.append('\tcall '+func+'\n') return 'eax'
def gen_primary_expression(self,node,context): """ :type node:TreeNode :type context:Context :rtype: Data """ if isinstance(node[1],TreeNode): if node[1][0]=="IDENTIFIER": name=node[1][1] offset=False type=deepcopy(context.get_type_by_id(name)) return Data(name,offset,type) else: if node[1][0]=="INTEGER": return int(node[1][1]) elif node[1][0]=="DOUBLE": return float(node[1][1]) elif node[1][0]=="STRING": return str(node[1][1])
def main(args): with tf.device("cpu"): data = Data(batch_size=args.batch_size, validation_size=6000) session = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=args.num_threads)) graphs = SharedResource([build_graph(reuse=i > 0) for i in range(args.num_threads)]) session.run(tf.initialize_all_variables()) train_total_time_sum = 0 for epoch in range(args.num_epochs): train_start_time = time.time() train_accuracy = accuracy(session, graphs, data.iterate_train(), num_threads=args.num_threads, train=True) train_total_time = time.time() - train_start_time train_total_time_sum += train_total_time validate_accuracy = accuracy(session, graphs, data.iterate_validate(), num_threads=args.num_threads, train=False) print ("Training epoch number %d:" % (epoch,)) print (" Time to train = %.3f s" % (train_total_time)) print (" Training set accuracy = %.1f %%" % (100.0 * train_accuracy,)) print (" Validation set accuracy = %.1f %%" % (100.0 * validate_accuracy,)) print ("") print ("Training done.") test_accuracy = accuracy(session, graphs, data.iterate_test(), num_threads=args.num_threads, train=False) print (" Average time per training epoch = %.3f s" % (train_total_time_sum / NUM_EPOCHS,)) print (" Test set accuracy = %.1f %%" % (100.0 * test_accuracy,))
def data_range(data_obj, column_headers): range_list = [] columns = data_obj.get_data(column_headers).transpose().tolist() for column in columns: min_max_list = [max(column), min(column)] range_list.append(min_max_list) return range_list # Takes in a list of column headers and the Data object and returns a list of the mean values for each column
def mean(data_obj, column_headers): mean_list = [] columns = data_obj.get_data(column_headers).transpose().tolist() for column in columns: mean_list.append(np.mean(column)) return mean_list # Takes in a list of column headers and the Data object and returns a list of the standard deviation # for each specified column
def stdev(data_obj, column_headers): stdev_list = [] columns = data_obj.get_data(column_headers).transpose().tolist() for column in columns: stdev_list.append(np.std(column)) return stdev_list # Takes in a list of column headers and the Data object and returns a list of the median # for each specified column
def median(data_obj, column_headers): median_list = [] columns = data_obj.get_data(column_headers).tolist() for column in columns: median_list.append(np.median(column)) return median_list # Takes in a list of column headers and the Data object and returns a matrix with each column normalized # so its minimum value is mapped to zero and its maximum value is mapped to 1
def kmeans(d, headers, K, metric, whiten=True, categories=None): '''Takes in a Data object, a set of headers, and the number of clusters to create Computes and returns the codebook, codes and representation errors. If given an Nx1 matrix of categories, it uses the category labels to calculate the initial cluster means. ''' # assign to A the result getting the data given the headers try: A = d.get_data(headers) except AttributeError: A = d if whiten: W = vq.whiten(A) else: W = A codebook = kmeans_init(W, K, categories) # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook codebook, codes, errors = kmeans_algorithm(W, codebook, metric) # return the codebook, codes, and representation error return codebook, codes, errors # test function
def test_lin_reg(filename, ind, dep): b, sse, r2, t, p = linear_regression(data.Data(filename), ind, dep) savefilename = filename.split('.')[0] + "-analysis" save_analysis(savefilename, filename, b, sse, r2, t, p, ind, dep)
def handleOpen(self, event=None): fn = tkFileDialog.askopenfilename(parent=self.root, title="Choose a Data file", initialdir='.') if fn.split('.')[1] != "csv" and fn.split('.')[1] != "xls": tkMessageBox.showwarning("Open File", "Cannot open this file\n(%s)" % fn) return self.data = dt.Data(filename=fn) self.handlePlotData() self.filename = fn # allows users to choose which features from the data are to be displayed
def setTransformationParameters(self): print 'handling Data Transformation stuff' dialog = TPDialog(self.root, "Choose Transformation Parameters", self.scaling_speed, self.pan_speed, self.rotation_speed) if dialog.result is not None: self.scaling_speed = max(1, min(dialog.result[0], 10)) self.pan_speed = max(1, min(dialog.result[1], 10)) self.rotation_speed = max(1, min(dialog.result[2], 10))
def read_training_data(training_file, training_labels_file=None): if training_labels_file is None: d = data.Data(training_file) training_cats = d.get_data([d.get_headers()[-1]]) training_data = d.get_data(d.get_headers()[:-1]) else: d = data.Data(training_file) l = data.Data(training_labels_file) training_cats = l.get_data(l.get_headers()) training_data = d.get_data(d.get_headers()) return training_data, training_cats, d
def read_testing_data(testing_file, testing_labels_file=None): if testing_labels_file is None: d = data.Data(testing_file) testing_cats = d.get_data([d.get_headers()[-1]]) testing_data = d.get_data(d.get_headers()[:-1]) else: d = data.Data(testing_file) l = data.Data(testing_labels_file) testing_cats = l.get_data(l.get_headers()) testing_data = d.get_data(d.get_headers()) return testing_data, testing_cats, d
def test(result_filename, model_filename, data_filename, test_batches=None): (hypers, params) = load_result(result_filename) # This is the nasty bit: # It generates a function containing the model with hardcoded params, # Var() represented as a simple wrapper object that holds data, # and a single argument that is used to get input/set output. # We use this by compiling the function and evaling it in our context, # and then calling into it once per input/output pair. (fun_name, runnable_model) = instantiate_model(model_filename, hypers, params) eval(compile(runnable_model, '<generated>', 'exec')) # Get the data: data = Data(data_filename) if test_batches is None: test_batches = data.get_batch_names() elif isinstance(test_batches, str): test_batches = test_batches.split(',') correct_instances = 0 total_instances = 0 for batch_name in test_batches: _, batch_data = data.get_batch(batch_name) ex_idx = 0 for data_instance in batch_data: print("Testing on batch %s (example %i)" % (batch_name, ex_idx)) ex_idx += 1 total_instances += 1 runtime_data = TerpreTRuntime(data_instance) eval("%s(runtime_data)" % fun_name) test_correct = runtime_data.check() if test_correct: correct_instances += 1 acc = correct_instances / float(total_instances) * 100.0 print("Test accuracy: %i/%i (%6.2f%%) correct." % (correct_instances, total_instances, acc))
def save_data(self, preprocess, max_len, qid, q1, q2, label): if preprocess: q1 = preprocess_sentence(q1, max_len) q2 = preprocess_sentence(q2, max_len) # This is a non-duplicate sentence -> dissimilar if label == '0': self._non_sim_data.append(Data(qid, q1, q2, label, [0, 1])) # This is a duplicate sentence -> similar else: self._sim_data.append(Data(qid, q1, q2, label, [1, 0]))
def refresh_results_display(self): network_data = Data().network_info vert_sb = ttk.Scrollbar(self.mainframe, orient=tk.VERTICAL) horz_sb = ttk.Scrollbar(self.mainframe, orient=tk.HORIZONTAL) self.results_display = self.multi_column_listbox(ColumnSelect.column_names) self.fill_multi_column_listbox(self.results_display, network_data) self.results_display.grid(row=0, column=0, in_=self.mainframe, sticky='NSEW') self.results_display.configure(yscrollcommand=vert_sb.set, xscrollcommand=horz_sb.set) vert_sb.grid(row=0, column=1, sticky="NS") vert_sb.config(command=self.results_display.yview) horz_sb.grid(row=1, column=0, sticky="EW") horz_sb.config(command=self.results_display.xview) self.results_display['displaycolumns'] = ColumnSelect.columns_shown
def And(self,x1,x2): if(isinstance(x1,Data)): y1addr=self.getAbsoluteAdd(x1) y1=x1.name if(isinstance(x2,Data)): y2addr=self.getAbsoluteAdd(x2) y2=x2.name #x2 is not a imm if(x2=='eax'): tmp=y1 y1=y2 y2=tmp tmp=y1addr y1addr=y2addr y2addr=tmp if(y1 in self.currentMap and y2 in self.currentMap): self.gen.asm.append("\tmov eax, "+y1addr+'\n') self.gen.asm.append("\tand eax, "+y2addr+'\n') elif(isinstance(y2,str)): if(y2 in self.currentMap): self.gen.asm.append('\tand eax, '+y2addr+'\n') else: self.gen.asm.append('\tand eax, '+y2+'\n') else: if(y2 in self.currentMap): self.gen.asm.append("\tmov eax, "+y1addr+'\n') self.gen.asm.append("\tand eax, "+str(y2)+'\n') return 'eax'
def Or(self,x1,x2): if(isinstance(x1,Data)): y1addr=self.getAbsoluteAdd(x1) y1=x1.name if(isinstance(x2,Data)): y2addr=self.getAbsoluteAdd(x2) y2=x2.name #x2 is not a imm if(x2=='eax'): tmp=y1 y1=y2 y2=tmp tmp=y1addr y1addr=y2addr y2addr=tmp if(y1 in self.currentMap and y2 in self.currentMap): self.gen.asm.append("\tmov eax, "+y1addr+'\n') self.gen.asm.append("\tor eax, "+y2addr+'\n') elif(isinstance(y2,str)): if(y2 in self.currentMap): self.gen.asm.append('\tor eax, '+y2addr+'\n') else: self.gen.asm.append('\tor eax, '+y2+'\n') else: if(y2 in self.currentMap): self.gen.asm.append("\tmov eax, "+y1addr+'\n') self.gen.asm.append("\tor eax, "+str(y2)+'\n') return 'eax'
def lea(self,x): if(isinstance(x,Data)): xaddr=self.getAbsoluteAdd(x) if(x in self.registers): self.gen.asm.append('\tlea '+'eax, '+'['+x+']'+'\n') else: self.gen.asm.append('\tlea '+'eax, '+xaddr+'\n') return 'eax'
def cmp(self,x1,x2): if(isinstance(x1,Data) and isinstance(x2,Data)): x1addr=self.getAbsoluteAdd(x1) x2addr=self.getAbsoluteAdd(x2) self.gen.asm.append("\tmov eax, "+x1addr+'\n') self.gen.asm.append('\tcmp '+'eax'+', '+x2addr+'\n') return dataflag1=False; if(isinstance(x1,Data)): x1=self.getAbsoluteAdd(x1) dataflag1=True dataflag2=False; if(isinstance(x2,Data)): x2=self.getAbsoluteAdd(x2) dataflag2=True # if(isinstance(x1,Data)): # x1addr=self.getAbsoluteAdd(x1) # x1=x1.name # if(isinstance(x2,Data)): # x2addr=self.getAbsoluteAdd(x2) # x2=x2.name # if(x1 in self.currentMap and x2 in self.currentMap): # self.gen.asm.append("\tmov eax, "+x1addr+'\n') # self.gen.asm.append('\tcmp '+'eax'+', '+x2+'\n') # return if(dataflag1): self.gen.asm.append('\tcmp DWORD PTR '+str(x1)+', '+str(x2)+'\n') return if(dataflag2): self.gen.asm.append('\tcmp '+str(x1)+', DWORD PTR '+str(x2)+'\n') return self.gen.asm.append('\tcmp '+str(x1)+', '+str(x2)+'\n') return
def sal(self,x,offset): if(isinstance(x,Data)): x=self.getAbsoluteAdd(x) self.gen.asm.append('\tsal '+x+str(offset)+'\n') return
def sar(self,x,offset): if(isinstance(x,Data)): x=self.getAbsoluteAdd(x) self.gen.asm.append('\tsar '+x+str(offset)+'\n') return
def gen_unary_expression(self,node,context): """ :type node:TreeNode :type context:Context :rtype: str """ operand=self.expression_handler[node[2][0]](node[2],context) if isinstance(node[1],TreeNode): operator=self.gen_unary_operator(node[1],context) if operator=="&": if isinstance(operand,Data): ret=self.tools.lea(operand) operand.type.is_const.append(False) return ret elif operator=="*": if isinstance(operand,Data): self.tools.mov(self.tools.getEax(),operand) operand.name=self.tools.getNull() operand.offset=True operand.type.is_const.pop() return operand else: if node[1]=="++": ret=self.tools.add(operand,1) self.tools.mov(operand,ret) return operand elif node[1]=="--": self.tools.sub(operand,1) return operand
def buildControls(self): ### Control ### # make a control frame on the right self.rightcntlframe = tk.Frame(self.root) self.rightcntlframe.pack(side=tk.RIGHT, padx=2, pady=2, fill=tk.Y) # make a separator frame sep = tk.Frame(self.root, height=self.initDy, width=2, bd=1, relief=tk.SUNKEN) sep.pack(side=tk.RIGHT, padx=2, pady=2, fill=tk.Y) # use a label to set the size of the right panel label = tk.Label(self.rightcntlframe, text="Control Panel", width=20) label.pack(side=tk.TOP, pady=10) # # make a menubutton # self.colorOption = tk.StringVar(self.root) # self.colorOption.set("black") # colorMenu = tk.OptionMenu(rightcntlframe, self.colorOption, # "black", "blue", "red", "green") # can add a command to the menu # colorMenu.pack(side=tk.TOP) # # # make a button in the frame # # and tell it to call the handleButton method when it is pressed. # button = tk.Button(rightcntlframe, text="Update Color", # command=self.handleButton1) # button.pack(side=tk.TOP) # default side is top # # # make a button in the frame to generate random data points on the canvas # button2 = tk.Button(rightcntlframe, text="Generate Random", command=self.generateRandomData) # button2.pack(side=tk.TOP) # # # make a button in the frame to let the user choose what sort of probability distribution to use # button3 = tk.Button(rightcntlframe, text="Choose Distributions", command=self.chooseDist) # button3.pack(side=tk.TOP) # # # make a widget to allow users to specify the number of random points to be generated. # label3 = tk.Label(rightcntlframe, text="Number of Data Points", width=20) # label3.pack(side=tk.BOTTOM, pady=10) # # entry = tk.Entry(rightcntlframe, textvariable=self.num_pts) # entry.pack(side=tk.BOTTOM) return # Lets users open the csv files which they want to analyze.
def main(argv): if len(argv) < 4: print 'Usage: python %s <classification method> <train data file> <test data file> <optional train categories> <optional test categories>' % (argv[0]) exit(-1) elif len(argv) > 4: print "Reading data..." training_data,training_labels, dOb_train = read_training_data(argv[2],argv[4]) testing_data, testing_labels, dObj_test = read_testing_data(argv[3],argv[5]) else: training_data, training_labels, dOb_train = read_training_data(argv[2]) testing_data, testing_labels, dObj_test = read_testing_data(argv[3]) print "Building the Classifier..." classifier = build_classifier(training_data,training_labels,argv[1]) print "Classifying test and training data..." ctraincats, ctrainlabels = classifier.classify(training_data) ctestcats, ctestlabels = classifier.classify(testing_data) # recast labels to [0-C-1] unique1, mapping1 = np.unique(training_labels.T.tolist()[0],return_inverse=True) unique2, mapping2 = np.unique(testing_labels.T.tolist()[0], return_inverse=True) mapping1 = np.matrix(mapping1).T mapping2 = np.matrix(mapping2).T print "Constructing the Confusion matrices" cmtx_train = classifier.confusion_matrix(mapping1,ctraincats) cmtx_test = classifier.confusion_matrix(mapping2,ctestcats) print cmtx_train print cmtx_test print "\nTraining Data" print classifier.confusion_matrix_str(cmtx_train) print "\nTesting Data" print classifier.confusion_matrix_str(cmtx_test) print "Writing to file" dObj_test.add_column("class","numeric",ctestcats.T.tolist()[0]) dObj_test.write_to_file(argv[3].split('.')[0] + "-" + argv[1] + "-classified",dObj_test.get_headers())
def allocateNewReg(self,vName): """ get a new free reg, if full get a mem address """ if(isinstance(vName,Data)): Type=vName.type.type # if(vName in self.currentMap): # Type=self.currentMap[vName]['Type'] elif(vName in self.registers): if(vName=='eax'): Type='int' elif(vName=='st'): Type='double' elif(isinstance(vName,int)): Type='int' elif(isinstance(vName,float)): Type='double' else: raise TypeError("error in allocateNewReg\n") if(Type=='double'): newTmp=self.tmpName+str(self.tmpNum) self.tmpNum+=1 self.currentMap.update({newTmp:{'reg':0,'type':Type,'addr':'[esp+%d]'%(self.tmpSP)}}) newType=CType('double',8) newTmp=Data(newTmp,False,newType) return newTmp reg=self.checkFull() if(reg!=-1): newTmp=self.tmpName+str(self.tmpNum) self.tmpNum+=1 self.currentMap.update({newTmp:{'reg':reg,'type':Type,'addr':0}}) return reg else: newTmp=self.tmpName+str(self.tmpNum) self.tmpNum+=1 self.currentMap.update({newTmp:{'reg':0,'type':Type,'addr':'[esp+%d]'%(self.tmpSP)}}) newType=CType('int',4) newTmp=Data(newTmp,False,newType) return newTmp