1. 背景

近期在做视频流量的统计,通过定时拉取云厂商的视频播放统计数据。由于数据比较多,而且每天都要处理,这样数据膨胀的非常快,每年的数据量达到了千万级别,因此有必要采取分库分表的方案进行数据分片。当然有很多的NewSQL数据库方案,比如TiDB或OceanBase等分布式存储的数据库,但是对于目前我们的维护成本是不可接受的。故而还是采取 Middleware + MySQL的方式满足现在的业务需求。

2. 部署说明

  • 因为是中间件类型的软件,所以采用Docker部署,docker-compose 便于编排。
  • ShardingSphere-Proxy作为代理,本质就是Java程序解析应用端的SQL并分发,需要根据自己的并发体量选择适当配置的机器。

3. 脚本

3.1 目录说明

  • conf:存放配置文件
  • ext-lib:ShardingSphere-Proxy的扩展类库,如数据库连接的jar包
├── conf
│   ├── config-database-discovery.yaml
│   ├── config-encrypt.yaml
│   ├── config-readwrite-splitting.yaml
│   ├── config-shadow.yaml
│   ├── config-sharding.yaml
│   ├── logback.xml
│   └── server.yaml
├── docker-compose.yml
└── ext-lib
    └── mysql-connector-java-8.0.11.jar

3.2 业务领域模型

为了最终验证分库分表后能够满足日常开发的需求,需要做相应的测试,本篇文章采用 订单和子订单表作为数据表,并录入基础的数据进行CRUD的测试。

3.2.1 表结构

 


CREATE TABLE `t_order_0` (
  `order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
  `user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
  `total_money` int(10) UNSIGNED NOT NULL COMMENT '订单总金额',
  PRIMARY KEY (`order_id`),
  KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单总表';

CREATE TABLE `t_order_1` (
  `order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
  `user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
  `total_money` int(10) UNSIGNED NOT NULL COMMENT '订单总金额',
  PRIMARY KEY (`order_id`),
  KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单总表';

CREATE TABLE `t_order_item_0` (
  `order_item_id` bigint(20) UNSIGNED NOT NULL COMMENT '子订单ID',
  `order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
  `user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
  `money` int(10) UNSIGNED NOT NULL COMMENT '子订单金额',
  PRIMARY KEY (`order_item_id`),
  KEY `idx_order_id` (`order_id`) USING BTREE,
  KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单子表';

CREATE TABLE `t_order_item_1` (
  `order_item_id` bigint(20) UNSIGNED NOT NULL COMMENT '子订单ID',
  `order_id` bigint(20) UNSIGNED NOT NULL COMMENT '主键ID',
  `user_id` bigint(20) UNSIGNED NOT NULL COMMENT '用户ID',
  `money` int(10) UNSIGNED NOT NULL COMMENT '子订单金额',
  PRIMARY KEY (`order_item_id`),
  KEY `idx_order_id` (`order_id`) USING BTREE,
  KEY `idx_user_id` (`user_id`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='订单子表';

3.3 docker-compose.yml

version: "3"
services:
  ShardingSphereProxy:
    image: apache/shardingsphere-proxy
    container_name: shardingsphere-proxy
    network_mode: "host"
    restart: always
    command: server /data
    ports:
      - 13307:3307
    volumes:
      - ./conf:/opt/shardingsphere-proxy/conf
      - ./ext-lib:/opt/shardingsphere-proxy/ext-lib
    environment:
      - JVM_OPTS="-Djava.awt.headless=true"

3.4 数据分片配置:config-sharding.yaml

######################################################################################################
# 
# 用于配置:数据分片规则
# 
######################################################################################################

schemaName: data-center_db

dataSources:
  ds_0:
    url: jdbc:mysql://192.168.0.60:3306/data-center_0?serverTimezone=UTC&useSSL=false
    username: root
    password: root
    connectionTimeoutMilliseconds: 30000
    idleTimeoutMilliseconds: 60000
    maxLifetimeMilliseconds: 1800000
    maxPoolSize: 50
    minPoolSize: 1
  ds_1:
    url: jdbc:mysql://192.168.0.60:3306/data-center_1?serverTimezone=UTC&useSSL=false
    username: root
    password: root
    connectionTimeoutMilliseconds: 30000
    idleTimeoutMilliseconds: 60000
    maxLifetimeMilliseconds: 1800000
    maxPoolSize: 50
    minPoolSize: 1

rules:
- !SHARDING
  tables: # 数据分片规则配置
    t_order: # 订单逻辑表名称
      actualDataNodes: ds_${0..1}.t_order_${0..1}
      databaseStrategy: # 配置分库策略
        standard:
          shardingColumn: user_id
          shardingAlgorithmName: database_user_inline
      tableStrategy: # 分表策略
        standard:
          shardingColumn: order_id
          shardingAlgorithmName: t_order_inline
      keyGenerateStrategy:
        column: order_id
        keyGeneratorName: snowflake 
    t_order_item: # 子订单逻辑表名称
      actualDataNodes: ds_${0..1}.t_order_item_${0..1}
      databaseStrategy: # 配置分库策略
        standard:
          shardingColumn: user_id
          shardingAlgorithmName: database_user_inline
      tableStrategy: # 分表策略
        standard:
          shardingColumn: order_id
          shardingAlgorithmName: t_order_item_inline
      keyGenerateStrategy:
        column: order_item_id
        keyGeneratorName: snowflake
  bindingTables: # 绑定表规则列表
    - t_order,t_order_item
  
  # 分片算法配置
  shardingAlgorithms: 
    database_user_inline:
      type: INLINE
      props:
        algorithm-expression: ds_${user_id % 2}
    t_order_inline: # 订单表分片算法名称
      type: INLINE
      props:
        algorithm-expression: t_order_${order_id % 2}
        allow-range-query-with-inline-sharding: true
    t_order_item_inline: # 子订单表分片算法名称
      type: INLINE
      props:
        algorithm-expression: t_order_item_${order_id % 2}
        allow-range-query-with-inline-sharding: true
  
  # 分布式序列算法配置
  keyGenerators:
    snowflake:
      type: SNOWFLAKE
      props:
        worker-id: 1

3.5 代理相关配置:server.yaml

######################################################################################################
# 
# 用于配置:数据接入迁移&弹性伸缩、分布式治理模式、权限、代理属性.
# 
######################################################################################################

#scaling:
#  blockQueueSize: 10000 # 数据传输通道队列大小
#  workerThread: 40 # 工作线程池大小,允许同时运行的迁移任务线程数
#  clusterAutoSwitchAlgorithm:
#    type: IDLE
#    props:
#      incremental-task-idle-minute-threshold: 30
#  dataConsistencyCheckAlgorithm:
#    type: DEFAULT
#
#mode:
#  type: Cluster
#  repository:
#    type: ZooKeeper
#    props:
#      namespace: governance_ds
#      server-lists: localhost:2181
#      retryIntervalMilliseconds: 500
#      timeToLiveSeconds: 60
#      maxRetries: 3
#      operationTimeoutMilliseconds: 500
#  overwrite: false
#
rules:
  - !AUTHORITY
    users:
      - root@%:root
      - sharding@:sharding
    provider:
      type: ALL_PRIVILEGES_PERMITTED
  - !TRANSACTION
    defaultType: XA
    providerType: Atomikos

props:
  max-connections-size-per-query: 1
  kernel-executor-size: 16  # Infinite by default.
  proxy-frontend-flush-threshold: 128  # The default value is 128.
#  proxy-opentracing-enabled: false
#  proxy-hint-enabled: false
  sql-show: true
#  check-table-metadata-enabled: false
#  show-process-list-enabled: false
#    # Proxy backend query fetch size. A larger value may increase the memory usage of ShardingSphere Proxy.
#    # The default value is -1, which means set the minimum value for different JDBC drivers.
#  proxy-backend-query-fetch-size: -1
  check-duplicate-table-enabled: true
#  sql-comment-parse-enabled: false
#  proxy-frontend-executor-size: 0 # Proxy frontend executor size. The default value is 0, which means let Netty decide.
#    # Available options of proxy backend executor suitable: OLAP(default), OLTP. The OLTP option may reduce time cost of writing packets to client, but it may increase the latency of SQL execution
#    # if client connections are more than proxy-frontend-netty-executor-size, especially executing slow SQL.
#  proxy-backend-executor-suitable: OLAP
#  proxy-frontend-max-connections: 0 # Less than or equal to 0 means no limitation.
#  sql-federation-enabled: false

4. 运行

4.1 运行容器

docker-compose up -d 起容器
之后可以删除容器,里面的yaml文件不够

拷贝镜像中的conf目录到数据卷

docker container create --name test01 apache/shardingsphere-proxy
docker container cp test01:/opt/shardingsphere-proxy/conf /docker/shardingproxy
docker container rm -f test01

安装容器内的应用:

//1.先备份
cp /etc/apt/sources.list /etc/apt/sources.list.bak

//2.清空
echo " " > /etc/apt/sources.list

//3.写阿里源
echo "deb https://mirrors.aliyun.com/debian stable main contrib non-free">>/etc/apt/sources.list

echo "deb https://mirrors.aliyun.com/debian stable-updates main contrib non-free">>/etc/apt/sources.list

//4.清空缓存
apt-get clean
apt-get update


# 安装telnet
apt-get install  telnet

# 安装curl
apt-get install  curl

# 安装ifconfig
apt-get install  net-tools

# 安装vim
apt-get install vim

# 安装ping
apt-get install inetutils-ping

navicat中配置sharding,注意端口3307

 yaml:

spring:
  autoconfigure:
    exclude:
      - org.apache.shardingsphere.shardingjdbc.spring.boot.SpringBootConfiguration
  datasource:
    dynamic:
      seata: false
      # 设置默认的数据源或者数据源组,默认值即为 master
      primary: master
      datasource:
        # 主库数据源
        master:
          type: com.zaxxer.hikari.HikariDataSource
          driver-class-name: com.mysql.cj.jdbc.Driver
          url: jdbc:mysql://192.168.0.60:3307/data-center_db?useSSL=false&useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&transformedBitIsBoolean=true&tinyInt1isBit=false&allowMultiQueries=true&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true
          username: root
          password: root
 @Test
    void page() {
        Page<TOrder> page = new Page<>();
        page.setCurrent(3L);
        QueryWrapper<TOrder> queryWrapper = new QueryWrapper<>();
        queryWrapper.orderByAsc("order_id");
        torderMapper.selectPage(page,queryWrapper);
        System.out.println(page.getTotal());
        for(TOrder order : page.getRecords()){
            System.out.print(order.getTotalMoney()+" ");
        }
    }

    @Test
    void insert() {
        for(Long i = 1L; i <= 100L; i++){
            TOrder torder = new TOrder();
            torder.setUserId(i);
            torder.setTotalMoney(100 + Integer.parseInt(i+""));
            torderMapper.insert(torder);
        }

Logo

快速构建 Web 应用程序

更多推荐