Hbase过滤器（一）：比较过滤器API

程序员文章站 2022-03-10 16:53:07

...

过滤器（filter）

一：行过滤器（rowFilter）

解析：行过滤器基于rowkey来过滤数据。使用多种运算符返回符合条件的行键，同时过滤掉不符合条件的rowkey。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class rowfilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据rowkey查询
    */
    @Test
    public void rowfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
        //创建过滤器实体
        Filter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row3")));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            Integer age = Bytes.toInt(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("age")));
            System.out.println(name);
            System.out.println(age);
        }
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }

    /*
    * 根据rowkey正则表达式查询
    */
    @Test
    public void rowRegexfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
        //创建过滤器实体
        Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".3"));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }

    /*
    * 根据rowkey字符串查询
    */
    @Test
    public void rowSubStringfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
        //创建过滤器实体
        Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("3"));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }
}

二：列族过滤器（FamilyFilter）

解析：列族过滤器于行过滤器相似，不过它是通过比较列族而不是比较rowkey来返回结果的。通过使用不同组合的运算符和比较器，用户可以在列族一级筛选所需的数据。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class familyfilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据列族查询
    */
    @Test
    public void familyfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //创建过滤器实体
        Filter filter = new FamilyFilter(CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes.toBytes("cf2")));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        Get get = new Get(Bytes.toBytes("row1"));
        get.setFilter(filter);
        Result result = table.get(get);
        System.out.println("result:"+result);
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }
}

三：列名过滤器（QualifierFilter）

解析：使用列名进行筛选的类似逻辑，这种操作可以帮助用户筛选特定的列。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class qualifierfilter {


    private Configuration configuration = null;
    private Connection connection = null;
    /*
    * 根据列名查询
    */
    @Test
    public void familynamefilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //创建过滤器实体
        Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("age")));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        Get get = new Get(Bytes.toBytes("row1"));
        get.setFilter(filter);
        Result result = table.get(get);
        System.out.println("result:"+result);
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }
}

四：值过滤器（ValueFilter）

解析：这个过滤器可以帮助用户筛选某个特定值得单元格，与RegexStringComparator配合使用，可以使用功能强大的表达式来进行筛选，需要注意的是，在使用特定比较器的时候，只能与部分运算符配合使用。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.junit.Test;

import java.io.IOException;

public class valuefilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据值查询
    */
    @Test
    public void valueFilter() throws IOException {
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        Scan scan = new Scan();
        Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(".4"));
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            for (KeyValue kv :result.raw())
            {
                System.out.println(kv);
                System.out.println(kv.getValue());
            }
        }
        resultScanner.close();
        table.close();
    }
}

五：参考列过滤器（DependentColumnFilter）

解析：DependentColumnFilter主要根据所选列的时间戳的时间过滤所要查询的数据

此过滤器提供了四种构造函数：

（1）DependentColumnFilter()

（2）DependentColumnFilter(byte[] family,byte[] qulifier)

（3）DependentColumnFilter(byte[] family,byte[] qulifier，boolean dropDependentColumn)

（4）DependentColumnFilter(byte[]family,byte[]qulifier，boolean dropDependentColumn，CompareOp valueCompareOp, WritableByteArrayComparable valueComparator)

Hbase过滤器（一）：比较过滤器API

过滤器（filter）

目录

一：行过滤器（rowFilter）

二：列族过滤器（FamilyFilter）

三：列名过滤器（QualifierFilter）

四：值过滤器（ValueFilter）

五：参考列过滤器（DependentColumnFilter）

Java得到一个整数的绝对值，不使用任何判断和比较语句，包括API

Vue自定义过滤器格式化数字三位加一逗号实现代码

基于Cookie使用过滤器实现客户每次访问只登录一次

HBase 系列（七）——HBase 过滤器详解

django 过滤器-查询集-比较运算符-FQ对象-mysql的命令窗口

Hbase过滤器

Java得到一个整数的绝对值，不使用任何判断和比较语句，包括API

记一次面试被问到的布隆过滤器(能不能叫布罗姆过滤器...) 如何代码简单实现

ASP.NET Web API 过滤器创建、执行过程(一)

hbase 过滤器 rowfilter