Hbase过滤器(一):比较过滤器API
过滤器(filter)
目录
一:行过滤器(rowFilter)
解析:行过滤器基于rowkey来过滤数据。使用多种运算符返回符合条件的行键,同时过滤掉不符合条件的rowkey。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class rowfilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根据rowkey查询
*/
@Test
public void rowfilter() throws IOException {
System.out.print("begin\n");
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
//根据表名获取表实体
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//创建扫描实体
Scan scan = new Scan();
//添加扫描的列族 参数1.列族 参数2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
//添加扫描的列族 参数1.列族 参数2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
//创建过滤器实体
Filter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row3")));
//将filter实体放入扫描实体
scan.setFilter(filter);
//创建扫描返回类
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
Integer age = Bytes.toInt(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("age")));
System.out.println(name);
System.out.println(age);
}
resultScanner.close();
table.close();
System.out.print("end\n");
}
/*
* 根据rowkey正则表达式查询
*/
@Test
public void rowRegexfilter() throws IOException {
System.out.print("begin\n");
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
//根据表名获取表实体
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//创建扫描实体
Scan scan = new Scan();
//添加扫描的列族 参数1.列族 参数2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
//添加扫描的列族 参数1.列族 参数2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
//创建过滤器实体
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".3"));
//将filter实体放入扫描实体
scan.setFilter(filter);
//创建扫描返回类
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
resultScanner.close();
table.close();
System.out.print("end\n");
}
/*
* 根据rowkey字符串查询
*/
@Test
public void rowSubStringfilter() throws IOException {
System.out.print("begin\n");
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
//根据表名获取表实体
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//创建扫描实体
Scan scan = new Scan();
//添加扫描的列族 参数1.列族 参数2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
//添加扫描的列族 参数1.列族 参数2.列名
scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
//创建过滤器实体
Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("3"));
//将filter实体放入扫描实体
scan.setFilter(filter);
//创建扫描返回类
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
resultScanner.close();
table.close();
System.out.print("end\n");
}
}
二:列族过滤器(FamilyFilter)
解析:列族过滤器于行过滤器相似,不过它是通过比较列族而不是比较rowkey来返回结果的。通过使用不同组合的运算符和比较器,用户可以在列族一级筛选所需的数据。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class familyfilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根据列族查询
*/
@Test
public void familyfilter() throws IOException {
System.out.print("begin\n");
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
//根据表名获取表实体
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//创建扫描实体
Scan scan = new Scan();
//创建过滤器实体
Filter filter = new FamilyFilter(CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes.toBytes("cf2")));
//将filter实体放入扫描实体
scan.setFilter(filter);
//创建扫描返回类
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
Get get = new Get(Bytes.toBytes("row1"));
get.setFilter(filter);
Result result = table.get(get);
System.out.println("result:"+result);
resultScanner.close();
table.close();
System.out.print("end\n");
}
}
三:列名过滤器(QualifierFilter)
解析:使用列名进行筛选的类似逻辑,这种操作可以帮助用户筛选特定的列。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class qualifierfilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根据列名查询
*/
@Test
public void familynamefilter() throws IOException {
System.out.print("begin\n");
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
//根据表名获取表实体
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
//创建扫描实体
Scan scan = new Scan();
//创建过滤器实体
Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("age")));
//将filter实体放入扫描实体
scan.setFilter(filter);
//创建扫描返回类
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
System.out.println(name);
}
Get get = new Get(Bytes.toBytes("row1"));
get.setFilter(filter);
Result result = table.get(get);
System.out.println("result:"+result);
resultScanner.close();
table.close();
System.out.print("end\n");
}
}
四:值过滤器(ValueFilter)
解析:这个过滤器可以帮助用户筛选某个特定值得单元格,与RegexStringComparator配合使用,可以使用功能强大的表达式来进行筛选,需要注意的是,在使用特定比较器的时候,只能与部分运算符配合使用。
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.junit.Test;
import java.io.IOException;
public class valuefilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根据值查询
*/
@Test
public void valueFilter() throws IOException {
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
Scan scan = new Scan();
Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(".4"));
//创建扫描返回类
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
for (KeyValue kv :result.raw())
{
System.out.println(kv);
System.out.println(kv.getValue());
}
}
resultScanner.close();
table.close();
}
}
五:参考列过滤器(DependentColumnFilter)
解析:DependentColumnFilter主要根据所选列的时间戳的时间过滤所要查询的数据
此过滤器提供了四种构造函数:
(1)DependentColumnFilter()
(2)DependentColumnFilter(byte[] family,byte[] qulifier)
(3)DependentColumnFilter(byte[] family,byte[] qulifier,boolean dropDependentColumn)
(4)DependentColumnFilter(byte[]family,byte[]qulifier,boolean dropDependentColumn,CompareOp valueCompareOp, WritableByteArrayComparable valueComparator)
相关参数:
boolean dropDependentColumn -- 决定参考列被返回还是丢弃,为true时表示参考列被返回,为false时表示被丢弃
CompareOp valueCompareOp -- 比较运算符
WritableByteArrayComparable valueComparator -- 比较器
package compareFilter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;
public class valuefilter {
private Configuration configuration = null;
private Connection connection = null;
/*
* 根据参考列查询
*/
@Test
public void valueFilter() throws IOException {
//创建Hbase配置文件
configuration = HBaseConfiguration.create();
//创建连接
connection = ConnectionFactory.createConnection(configuration);
Table table = connection.getTable(TableName.valueOf("ns1:t1"));
Scan scan = new Scan();
Filter filter = new DependentColumnFilter(Bytes.toBytes("cf1"),Bytes.toBytes("name"),false);
//创建扫描返回类
scan.setFilter(filter);
ResultScanner resultScanner = table.getScanner(scan);
for (Result result:resultScanner){
System.out.println(result);
}
resultScanner.close();
table.close();
}
}