Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions hbase-server/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@
<artifactId>junit-vintage-engine</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1799,8 +1799,8 @@ private void setupWALAndReplication() throws IOException {
throw new RegionServerRunningException(
"Region server has already created directory at " + this.serverName.toString());
}
// Always create wal directory as now we need this when master restarts to find out the live
// region servers.
// Create wal directory here and we will never create it again in other places. This is
// important to make sure that our fencing way takes effect. See HBASE-29797 for more details.
if (!this.walFs.mkdirs(logDir)) {
throw new IOException("Can not create wal directory " + logDir);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,10 +536,8 @@ protected AbstractFSWAL(final FileSystem fs, final Abortable abortable, final Pa
this.remoteFs = remoteFs;
this.remoteWALDir = remoteWALDir;

if (!fs.exists(walDir) && !fs.mkdirs(walDir)) {
throw new IOException("Unable to mkdir " + walDir);
}

// Here we only crate archive dir, without wal dir. This is to make sure that our fencing way
// takes effect. See HBASE-29797 for more details.
if (!fs.exists(this.walArchiveDir)) {
if (!fs.mkdirs(this.walArchiveDir)) {
throw new IOException("Unable to mkdir " + this.walArchiveDir);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hbase.regionserver.wal.ProtobufWALTailingReader;
import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
import org.apache.hadoop.hbase.wal.WALProvider.Writer;
Expand Down Expand Up @@ -211,7 +212,7 @@ static WALProvider createProvider(Class<? extends WALProvider> clazz) throws IOE
public WALFactory(Configuration conf, String factoryId) throws IOException {
// default enableSyncReplicationWALProvider is true, only disable SyncReplicationWALProvider
// for HMaster or HRegionServer which take system table only. See HBASE-19999
this(conf, factoryId, null);
this(conf, factoryId, null, true);
}

/**
Expand All @@ -228,17 +229,30 @@ public WALFactory(Configuration conf, String factoryId) throws IOException {
*/
public WALFactory(Configuration conf, ServerName serverName, Abortable abortable)
throws IOException {
this(conf, serverName.toString(), abortable);
this(conf, serverName.toString(), abortable, false);
}

private static void createWALDirectory(Configuration conf, String factoryId) throws IOException {
FileSystem walFs = CommonFSUtils.getWALFileSystem(conf);
Path walRootDir = CommonFSUtils.getWALRootDir(conf);
Path walDir = new Path(walRootDir, AbstractFSWALProvider.getWALDirectoryName(factoryId));
if (!walFs.exists(walDir) && !walFs.mkdirs(walDir)) {
throw new IOException("Can not create wal directory " + walDir);
}
}

/**
* @param conf must not be null, will keep a reference to read params in later reader/writer
* instances.
* @param factoryId a unique identifier for this factory. used i.e. by filesystem implementations
* to make a directory
* @param abortable the server associated with this WAL file
* @param conf must not be null, will keep a reference to read params in later
* reader/writer instances.
* @param factoryId a unique identifier for this factory. used i.e. by filesystem
* implementations to make a directory
* @param abortable the server associated with this WAL file
* @param createWalDirectory pass {@code true} for testing purpose, to create the wal directory
* automatically. In normal code path, we should create it in
* HRegionServer setup.
*/
private WALFactory(Configuration conf, String factoryId, Abortable abortable) throws IOException {
private WALFactory(Configuration conf, String factoryId, Abortable abortable,
boolean createWalDirectory) throws IOException {
// until we've moved reader/writer construction down into providers, this initialization must
// happen prior to provider initialization, in case they need to instantiate a reader/writer.
timeoutMillis = conf.getInt("hbase.hlog.open.timeout", 300000);
Expand All @@ -259,6 +273,10 @@ private WALFactory(Configuration conf, String factoryId, Abortable abortable) th
REPLICATION_WAL_PROVIDER, this.abortable);
// end required early initialization
if (conf.getBoolean(WAL_ENABLED, true)) {
if (createWalDirectory) {
// for testing only
createWALDirectory(conf, factoryId);
}
WALProvider provider = createProvider(getProviderClass(WAL_PROVIDER, DEFAULT_WAL_PROVIDER));
provider.init(this, conf, null, this.abortable);
provider.addWALActionsListener(new MetricsWAL());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2194,8 +2194,9 @@ public static WAL createWal(final Configuration conf, final Path rootDir, final
// The WAL subsystem will use the default rootDir rather than the passed in rootDir
// unless I pass along via the conf.
Configuration confForWAL = new Configuration(conf);
confForWAL.set(HConstants.HBASE_DIR, rootDir.toString());
return new WALFactory(confForWAL, "hregion-" + RandomStringUtils.randomNumeric(8)).getWAL(hri);
CommonFSUtils.setRootDir(confForWAL, rootDir);
return new WALFactory(confForWAL, "hregion-" + RandomStringUtils.insecure().nextNumeric(8))
.getWAL(hri);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;

import static org.awaitility.Awaitility.await;

import java.io.IOException;
import java.time.Duration;
import java.util.Collections;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;

/**
* Testcase for HBASE-29797, where the lazy initialized WALProvider may recreate the WAL directory
* and cause our fencing way loses efficacy.
*/
@Tag(MasterTests.TAG)
@Tag(MediumTests.TAG)
public class TestWALFencing {

private static final HBaseTestingUtil UTIL = new HBaseTestingUtil();

@BeforeAll
public static void setUp() throws Exception {
UTIL.startMiniCluster(3);
UTIL.getAdmin().balancerSwitch(false, true);
}

@AfterAll
public static void tearDown() throws IOException {
UTIL.shutdownMiniCluster();
}

@Test
public void testMoveMeta() throws Exception {
HRegionServer metaRs = UTIL.getRSForFirstRegionInTable(TableName.META_TABLE_NAME);
HRegionServer otherRs = UTIL.getOtherRegionServer(metaRs);
// do fencing here, i.e, kill otherRs
Path splittingDir = UTIL.getMiniHBaseCluster().getMaster().getMasterWalManager()
.getLogDirs(Collections.singleton(otherRs.getServerName())).get(0);
for (FileStatus walFile : otherRs.getWALFileSystem().listStatus(splittingDir)) {
RecoverLeaseFSUtils.recoverFileLease(otherRs.getWALFileSystem(), walFile.getPath(),
otherRs.getConfiguration());
}
// move meta region to otherRs, which should fail and crash otherRs, and then master will try to
// assign meta region to another rs
RegionInfo metaRegionInfo = metaRs.getRegions().stream().map(Region::getRegionInfo)
.filter(RegionInfo::isMetaRegion).findAny().get();
UTIL.getAdmin().move(metaRegionInfo.getRegionName(), otherRs.getServerName());
// make sure that meta region is not on otherRs
await().during(Duration.ofSeconds(5)).atMost(Duration.ofSeconds(6))
.until(() -> otherRs.getRegions(TableName.META_TABLE_NAME).isEmpty());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ protected void initialize() throws IOException {
.setColumnFamily(familyDescriptor).build();
RegionInfo info = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();

fs.mkdirs(new Path(basedir, logName));
hlog = new FSHLog(fs, basedir, logName, conf);
hlog.init();
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ public void testLockupAroundBadAssignSync() throws IOException {
// the test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
fs.mkdirs(new Path(rootDir, getName()));
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF);
dodgyWAL.init();
LogRoller logRoller = new LogRoller(services);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public void setUp() throws Exception {
final Path hbaseWALDir = TEST_UTIL.createWALRootDir();
DIR = new Path(hbaseWALDir, currentTest.getMethodName());
assertNotEquals(hbaseDir, hbaseWALDir);
FS.mkdirs(DIR);
}

@BeforeClass
Expand Down Expand Up @@ -393,9 +394,8 @@ public void testFindMemStoresEligibleForFlush() throws Exception {
@Test(expected = IOException.class)
public void testFailedToCreateWALIfParentRenamed()
throws IOException, CommonFSUtils.StreamLacksCapabilityException {
final String name = "testFailedToCreateWALIfParentRenamed";
AbstractFSWAL<?> wal = newWAL(FS, CommonFSUtils.getWALRootDir(CONF), name,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
AbstractFSWAL<?> wal = newWAL(FS, CommonFSUtils.getWALRootDir(CONF),
currentTest.getMethodName(), HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
long filenum = EnvironmentEdgeManager.currentTime();
Path path = wal.computeFilename(filenum);
wal.createWriterInstance(FS, path);
Expand Down Expand Up @@ -544,6 +544,7 @@ public void testRollWriterForClosedWAL() throws IOException {

private AbstractFSWAL<?> createHoldingWAL(String testName, AtomicBoolean startHoldingForAppend,
CountDownLatch holdAppend) throws IOException {
FS.mkdirs(new Path(CommonFSUtils.getRootDir(CONF), testName));
AbstractFSWAL<?> wal = newWAL(FS, CommonFSUtils.getRootDir(CONF), testName,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
// newWAL has already called wal.init()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public static void tearDownAfterClass() throws Exception {
}

@Override
protected CustomAsyncFSWAL getWAL(FileSystem fs, Path root, String logDir, Configuration conf)
protected CustomAsyncFSWAL getWAL0(FileSystem fs, Path root, String logDir, Configuration conf)
throws IOException {
CustomAsyncFSWAL wal =
new CustomAsyncFSWAL(fs, root, logDir, conf, GROUP, NioSocketChannel.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ protected void atHeadOfRingBufferEventHandlerAppend() {
public void testSyncRunnerIndexOverflow() throws IOException, NoSuchFieldException,
SecurityException, IllegalArgumentException, IllegalAccessException {
final String name = this.name.getMethodName();
FS.mkdirs(new Path(CommonFSUtils.getRootDir(CONF), name));
FSHLog log = new FSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null);
log.init();
Expand Down Expand Up @@ -140,6 +141,7 @@ public void testUnflushedSeqIdTracking() throws IOException, InterruptedExceptio
final CountDownLatch flushFinished = new CountDownLatch(1);
final CountDownLatch putFinished = new CountDownLatch(1);

FS.mkdirs(new Path(CommonFSUtils.getRootDir(CONF), name));
try (FSHLog log = new FSHLog(FS, CommonFSUtils.getRootDir(CONF), name,
HConstants.HREGION_OLDLOGDIR_NAME, CONF, null, true, null, null)) {
log.init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class TestFSHLogDurability extends WALDurabilityTestBase<CustomFSHLog> {
HBaseClassTestRule.forClass(TestFSHLogDurability.class);

@Override
protected CustomFSHLog getWAL(FileSystem fs, Path root, String logDir, Configuration conf)
protected CustomFSHLog getWAL0(FileSystem fs, Path root, String logDir, Configuration conf)
throws IOException {
CustomFSHLog wal = new CustomFSHLog(fs, root, logDir, conf);
wal.init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,13 @@ public void tearDown() throws IOException {
TEST_UTIL.cleanupTestDir();
}

protected abstract T getWAL(FileSystem fs, Path root, String logDir, Configuration conf)
protected final T getWAL(FileSystem fs, Path root, String logDir, Configuration conf)
throws IOException {
fs.mkdirs(new Path(root, logDir));
return getWAL0(fs, root, logDir, conf);
}

protected abstract T getWAL0(FileSystem fs, Path root, String logDir, Configuration conf)
throws IOException;

protected abstract void resetSyncFlag(T wal);
Expand Down
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,7 @@
<jruby.version>9.4.14.0</jruby.version>
<junit.jupiter.version>5.13.4</junit.jupiter.version>
<junit.vintage.version>5.13.4</junit.vintage.version>
<awaitility.version>4.3.0</awaitility.version>
<hamcrest.version>1.3</hamcrest.version>
<opentelemetry.version>1.49.0</opentelemetry.version>
<opentelemetry-semconv.version>1.29.0-alpha</opentelemetry-semconv.version>
Expand Down Expand Up @@ -1700,6 +1701,12 @@
<version>${junit.vintage.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<version>${awaitility.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
Expand Down