public static Instance getInstanceObject (String[] instanceText, String[] globalFeatureVector, String docID, String classValue, Instances ds) throws Exception { FeatureVector instanceFeatureVector = getInstanceFeatureVector(instanceText, globalFeatureVector, docID); Instance instance = new Instance(globalFeatureVector.length + 2); instance.setDataset(ds); instance.setValue(0, docID); for(int i = 0; i < globalFeatureVector.length; i++) { double value = 0; if(instanceFeatureVector.m_FeatureVector[0].containsKey(i)) { value = instanceFeatureVector.m_FeatureVector[0].get(i); } instance.setValue(i + 1, value); } instance.setValue(globalFeatureVector.length + 1, classValue); return new SparseInstance(instance); }
private void processSingleton(Instance current, ArrayList<BinaryItem> singletons) throws Exception { if (current instanceof SparseInstance) { for (int j = 0; j < current.numValues(); j++) { int attIndex = current.index(j); singletons.get(attIndex).increaseFrequency(); } } else { for (int j = 0; j < current.numAttributes(); j++) { if (!current.isMissing(j)) { if (current.attribute(j).numValues() == 1 || current.value(j) == m_positiveIndex - 1) { singletons.get(j).increaseFrequency(); } } } } }
/** * format must be different in precision (e.g., yyyy-MM instead of * yyyy-MM-dd) from the one in "weka.filters.data.FilterTest.arff", otherwise * this test will fail! * Note: Sparse instances are skipped. */ public void testTypical() { m_Filter = getFilter(); ((ChangeDateFormat) m_Filter).setDateFormat("yyyy-MM"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // all instance's must be different boolean equal = false; for (int i = 0; i < m_Instances.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (m_Comparator.compare( m_Instances.instance(i), result.instance(i)) == 0) { equal = true; break; } } if (equal) fail("Instances not changed!"); }
/** * format must be the same as in "weka.filters.data.FilterTest.arff", * otherwise this test will fail! * Note: Sparse instances are skipped. */ public void testSameFormat() { m_Filter = getFilter(); ((ChangeDateFormat) m_Filter).setDateFormat("yyyy-MM-dd"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // all instance's must be the same boolean equal = true; for (int i = 0; i < m_Instances.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (m_Comparator.compare( m_Instances.instance(i), result.instance(i)) != 0) { equal = false; break; } } if (!equal) fail("Instances modified!"); }
/** * checks whether attribute value stays the same */ public void testEquality() { m_Filter = getFilter("A"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( m_Instances.instance(i).value(m_AttIndex), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter modifies attribute values)!"); }
public void testAbs() { m_Filter = getFilter("abs(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.abs(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testsqrt() { m_Filter = getFilter("sqrt(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.sqrt(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testLog() { m_Filter = getFilter("log(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.log(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testExp() { m_Filter = getFilter("exp(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.exp(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testSin() { m_Filter = getFilter("sin(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.sin(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testCos() { m_Filter = getFilter("cos(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.cos(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testTan() { m_Filter = getFilter("tan(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.tan(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testRint() { m_Filter = getFilter("rint(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.rint(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testFloor() { m_Filter = getFilter("floor(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.floor(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testPow2() { m_Filter = getFilter("pow(A,2)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.pow(m_Instances.instance(i).value(m_AttIndex), 2), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
public void testCeil() { m_Filter = getFilter("ceil(A)"); Instances result = useFilter(); assertEquals(m_Instances.numAttributes(), result.numAttributes()); assertEquals(m_Instances.numInstances(), result.numInstances()); // check equality boolean equal = true; for (int i = 0; i < result.numInstances(); i++) { if (m_Instances.instance(i) instanceof SparseInstance) continue; if (!Utils.eq( Math.ceil(m_Instances.instance(i).value(m_AttIndex)), result.instance(i).value(m_AttIndex))) { equal = false; break; } } if (!equal) fail("Filter produces different result)!"); }
/** * Convert a single instance over. The converted instance is added to the end * of the output queue. * * @param instance the instance to convert * @throws Exception if something goes wrong */ protected void convertInstance(Instance instance) throws Exception { // Make copy and set weight to one Instance cp = (Instance) instance.copy(); cp.setWeight(1.0); // Set up values double[] instanceVals = new double[outputFormatPeek().numAttributes()]; double[] vals = m_partitionGenerator.getMembershipValues(cp); System.arraycopy(vals, 0, instanceVals, 0, vals.length); if (instance.classIndex() >= 0) { instanceVals[instanceVals.length - 1] = instance.classValue(); } push(new SparseInstance(instance.weight(), instanceVals)); }
/** * Add instance to cluster */ public void AddInstance(Instance inst) { if (inst instanceof SparseInstance) { // System.out.println(Thread.currentThread().getStackTrace()[1].getClassName() +"AddSparceInstance"); for (int i = 0; i < inst.numValues(); i++) { AddItem(inst.index(i)); // for(int i=0;i<inst.numAttributes();int++){ // AddItem(inst.index(i)+inst.value(i)); } } else { for (int i = 0; i < inst.numAttributes(); i++) { if (!inst.isMissing(i)) { AddItem(i + inst.toString(i)); } } } this.W = this.occ.size(); this.N++; }
/** * Delete instance from cluster */ public void DeleteInstance(Instance inst) { if (inst instanceof SparseInstance) { // System.out.println(Thread.currentThread().getStackTrace()[1].getClassName() +"DeleteSparceInstance"); for (int i = 0; i < inst.numValues(); i++) { DeleteItem(inst.index(i)); } } else { for (int i = 0; i <= inst.numAttributes() - 1; i++) { if (!inst.isMissing(i)) { DeleteItem(i + inst.toString(i)); } } } this.W = this.occ.size(); this.N--; }
/** * Input an instance for filtering. Ordinarily the instance is processed * and made available for output immediately. Some filters require all * instances be read before producing output. * * @param instance the input instance * @return true if the filtered instance may now be * collected with output(). * @throws IllegalStateException if no input structure has been defined */ public boolean input(Instance instance) { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } Instance inst = null; if (instance instanceof SparseInstance) { inst = new Instance(instance.weight(), instance.toDoubleArray()); inst.setDataset(instance.dataset()); } else { inst = instance; } push(inst); return true; }
Instance wordsToInstance(WordSet words) { Instance item = new SparseInstance( attributeSpecification.numAttributes()); item.setDataset(attributeSpecification); // Words for (String word : words.getWords()) { Attribute attribute = attributeSpecification.attribute(word); if (attribute != null) { item.setValue(attribute, 1); } } item.replaceMissingValues(missingVal); return item; }
@Override protected Instances doFilter(Matrix matrix) { ArrayList<Attribute> attributes = getAttributes(matrix); Map<String, Integer> attributeIndices = getAttributeMap(attributes); Instances instances = new Instances("Matrix", attributes, matrix.getRowNames().size()); if(classAttributeName != null) { Attribute classAttribute = getClassAttribute(attributes); instances.setClass(classAttribute); } for(String rowName: matrix.getRowNames()) { Vector row= matrix.getRowVector(rowName); SparseInstance instance = getSparseInstance(matrix, attributeIndices, rowName); instance.setDataset(instances); Iterator<String> iterator = row.getAnnotations().iterator(); if( iterator.hasNext() ) { instance.setClassValue(iterator.next()); } instances.add(instance); } return instances; }
private Instance createFeatureVector(String[] extractedValues, boolean label) { Instance featureVector = new SparseInstance(label ? numberOfAttributes + 1 : numberOfAttributes); featureVector.setDataset(dataset); for(int i = 0; i < numberOfAttributes; i++) { try{ String s = extractedValues[i]; switch(featureType) { case counts: featureVector.setValue(i, Integer.valueOf(s)); break; case values: // treat nominal values (if they exist) differently { switch(featureExtractor.getFeatures().get(i).getType()) { case NUM: featureVector.setValue(i, Integer.valueOf(s)); break; case CAT: featureVector.setValue(i, s); break; } } break; } }catch(Exception e) { // System.out.println(e); } } // for if(label) featureVector.setValue(numberOfAttributes, Integer.valueOf(extractedValues[numberOfAttributes])); return featureVector; }
/** * Inserts a single instance into the FPTree. * * @param current the instance to insert * @param singletons the singleton item sets * @param tree the tree to insert into * @param minSupport the minimum support threshold */ private void insertInstance(Instance current, ArrayList<BinaryItem> singletons, FPTreeRoot tree, int minSupport) { ArrayList<BinaryItem> transaction = new ArrayList<BinaryItem>(); if (current instanceof SparseInstance) { for (int j = 0; j < current.numValues(); j++) { int attIndex = current.index(j); if (singletons.get(attIndex).getFrequency() >= minSupport) { transaction.add(singletons.get(attIndex)); } } Collections.sort(transaction); tree.addItemSet(transaction, 1); } else { for (int j = 0; j < current.numAttributes(); j++) { if (!current.isMissing(j)) { if (current.attribute(j).numValues() == 1 || current.value(j) == m_positiveIndex - 1) { if (singletons.get(j).getFrequency() >= minSupport) { transaction.add(singletons.get(j)); } } } } Collections.sort(transaction); tree.addItemSet(transaction, 1); } }