diff --git a/tensorflow_datasets/community-datasets.toml b/tensorflow_datasets/community-datasets.toml
new file mode 100644
index 00000000000..c60bf6f4c0f
--- /dev/null
+++ b/tensorflow_datasets/community-datasets.toml
@@ -0,0 +1,3 @@
+[Namespaces]
+# You can add your own datasets here to register them in TFDS. See details
+# at: https://www.tensorflow.org/datasets/community
diff --git a/tensorflow_datasets/core/community/__init__.py b/tensorflow_datasets/core/community/__init__.py
new file mode 100644
index 00000000000..f8ef68a5335
--- /dev/null
+++ b/tensorflow_datasets/core/community/__init__.py
@@ -0,0 +1,30 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Community public API."""
+
+from tensorflow_datasets.core.community.register import community_config_path
+from tensorflow_datasets.core.community.register import COMMUNITY_EXPORTED_PATH
+from tensorflow_datasets.core.community.dataset_spec import DatasetSource
+from tensorflow_datasets.core.community.dataset_spec import DatasetSpec
+from tensorflow_datasets.core.community.dataset_spec import GithubSource
+
+__all__ = [
+    'community_config_path',
+    'COMMUNITY_EXPORTED_PATH',
+    'DatasetSource',
+    'DatasetSpec',
+    'GithubSource',
+]
diff --git a/tensorflow_datasets/core/community/dataset_spec.py b/tensorflow_datasets/core/community/dataset_spec.py
new file mode 100644
index 00000000000..321737e4567
--- /dev/null
+++ b/tensorflow_datasets/core/community/dataset_spec.py
@@ -0,0 +1,137 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Community utils."""
+
+import abc
+from typing import ClassVar, Dict
+
+import dataclasses
+from tensorflow_datasets.core import github_api
+from tensorflow_datasets.core import utils
+
+Json = utils.Json
+
+
+class DatasetSource(abc.ABC):
+  """Source indicating the dataset location (abstract class).
+
+  Additional user-defined sources can be registered by subclassing this class.
+
+  Attributes:
+    SCHEME: URI scheme (e.g. `github://`).
+  """
+
+  # Abstract class attribute
+  SCHEME: ClassVar[str]
+
+  # Use non-mutable dict to prevent collision if two subclass try to use the
+  # same scheme
+  _subclasses: Dict[str, 'DatasetSource'] = utils.NonMutableDict()
+
+  def __init_subclass__(cls, **kwargs):
+    """Subclasses are automatically registered."""
+    super().__init_subclass__(**kwargs)
+    cls._subclasses[cls.SCHEME] = cls  # Subclasses should have a unique SCHEME
+
+  @classmethod
+  @abc.abstractmethod
+  def from_json(cls, value: Json) -> 'DatasetSource':
+    """Factory which will instancite the source from the registered class.
+
+    ```
+    source = DatasetSource.from_json({'type': 'github://', ...})
+    assert isinstance(source, GithubSource)
+    ```
+
+    Args:
+      value: Json dict containing the constructor information.
+
+    Returns:
+      The created source instance.
+    """
+    source_type = dict(value).pop('scheme')
+    subclass = cls._subclasses.get(source_type)
+    if subclass is None:
+      raise ValueError(
+          f'Invalid source type {source_type} of: {value}\n'
+          f'Supported: {list(cls._subclasses)}'
+      )
+    return subclass.from_json(value)
+
+  @abc.abstractmethod
+  def to_json(self) -> Json:
+    """Exports the object to Json. Subclasses should call `super()`."""
+    return {'scheme': self.SCHEME}
+
+
+@dataclasses.dataclass
+class GithubSource(DatasetSource):
+  """Dataset loaded from Github.
+
+  Attributes:
+    path: The github path of the dataset
+    SCHEME: See parent class
+  """
+  path: github_api.GithubPath
+
+  SCHEME: ClassVar[str] = 'github://'  # pylint: disable=invalid-name
+
+  @classmethod
+  def from_json(cls, value: Json):
+    return cls(path=github_api.GithubPath(value['path']))
+
+  def to_json(self) -> Json:
+    value = super().to_json()
+    value['path'] = str(self.path)
+    return value
+
+
+@dataclasses.dataclass(frozen=True)
+class DatasetSpec:
+  """Contains specs required to lazily load a dataset.
+
+  The specs match the `COMMUNITY_EXPORTED_PATH` content (one row == one spec)
+
+  Attributes:
+    name: dataset name (e.g. `mnist`)
+    namespace: user/organization namespace (e.g. `mlds`)
+    source: Location of the dataset (e.g. Github)
+  """
+  name: str
+  namespace: str
+  source: DatasetSource
+
+  @classmethod
+  def from_json(cls, value: Json) -> 'DatasetSpec':
+    """Load the specs from a Json dict."""
+    return cls(
+        name=value['name'],
+        namespace=value['namespace'],
+        source=DatasetSource.from_json(value['source']),
+    )
+
+  def to_json(self) -> Json:
+    """Export the specs as a Json dict."""
+    return {
+        'name': self.name,
+        'namespace': self.namespace,
+        'source': self.source.to_json(),
+    }
+
+  @property
+  def cannonical_name(self) -> str:
+    """Returns the `namespace/dataset_name` string."""
+    return f'{self.namespace}/{self.name}'
diff --git a/tensorflow_datasets/core/community/dataset_spec_test.py b/tensorflow_datasets/core/community/dataset_spec_test.py
new file mode 100644
index 00000000000..993f589bec9
--- /dev/null
+++ b/tensorflow_datasets/core/community/dataset_spec_test.py
@@ -0,0 +1,58 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for tensorflow_datasets.core.community.dataset_specs."""
+
+from tensorflow_datasets.core import github_api
+from tensorflow_datasets.core.community import dataset_spec
+
+
+def test_import_export_json_source():
+  p = github_api.GithubPath('/tensorflow/graphics/tree/path/to/datasets')
+  source = dataset_spec.GithubSource(p)
+
+  json_source = source.to_json()
+  assert json_source == {
+      'scheme': 'github://',
+      'path': '/tensorflow/graphics/tree/path/to/datasets',
+  }
+
+  reconstructed_source = dataset_spec.DatasetSource.from_json(json_source)
+  assert isinstance(reconstructed_source, dataset_spec.GithubSource)
+  assert json_source == reconstructed_source.to_json()
+
+
+def test_import_export_json_spec():
+  p = github_api.GithubPath('/tensorflow/graphics/tree/path/to/datasets')
+  spec = dataset_spec.DatasetSpec(
+      name='mnist',
+      namespace='tensorflow_graphics',
+      source=dataset_spec.GithubSource(p),
+  )
+  assert spec.cannonical_name == 'tensorflow_graphics/mnist'
+
+  json_spec = spec.to_json()
+  assert json_spec == {
+      'name': 'mnist',
+      'namespace': 'tensorflow_graphics',
+      'source': {
+          'scheme': 'github://',
+          'path': '/tensorflow/graphics/tree/path/to/datasets',
+      },
+  }
+
+  reconstructed_spec = dataset_spec.DatasetSpec.from_json(json_spec)
+  assert isinstance(reconstructed_spec.source, dataset_spec.GithubSource)
+  assert json_spec == reconstructed_spec.to_json()
diff --git a/tensorflow_datasets/core/community/register.py b/tensorflow_datasets/core/community/register.py
new file mode 100644
index 00000000000..cbeeda00fb3
--- /dev/null
+++ b/tensorflow_datasets/core/community/register.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Community datasets register."""
+
+from tensorflow_datasets.core import utils
+
+
+# Community datasets are parsed from the config files and exported on GCS
+COMMUNITY_EXPORTED_PATH = utils.gcs_path('community-datasets-list.jsonl')
+
+
+def community_config_path() -> str:
+  """Returns the community config path."""
+  # Is dynamically loaded as it is only required by specific scripts so may
+  # not always be present.
+  return utils.get_tfds_path('community-datasets.toml')
diff --git a/tensorflow_datasets/core/github_api/__init__.py b/tensorflow_datasets/core/github_api/__init__.py
new file mode 100644
index 00000000000..ca2f823bf03
--- /dev/null
+++ b/tensorflow_datasets/core/github_api/__init__.py
@@ -0,0 +1,22 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Github util API."""
+
+from tensorflow_datasets.core.github_api.github_path import GithubPath
+
+__all__ = [
+    'GithubPath',
+]
diff --git a/tensorflow_datasets/core/github_api/github_path.py b/tensorflow_datasets/core/github_api/github_path.py
new file mode 100644
index 00000000000..797b2b27621
--- /dev/null
+++ b/tensorflow_datasets/core/github_api/github_path.py
@@ -0,0 +1,327 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Github pathlib-like util."""
+
+import enum
+import functools
+import os
+import pathlib
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+
+import requests
+
+# TODO(pytype): Should be recursive
+Json = Union[str, int, bool, float, List[Any], Dict[str, Any]]
+
+
+class _PathType(enum.Enum):
+  """Path type (See: https://developer.github.com/v3/git/trees/#tree-object).
+
+  Attributes:
+    FILE: File
+    DIRECTORY: Directory
+    COMMIT: Git submodule (https://git-scm.com/book/en/v2/Git-Tools-Submodules)
+  """
+  FILE = enum.auto()
+  DIRECTORY = enum.auto()
+  SUBMODULE = enum.auto()
+
+
+class _PathMetadata:
+  """Class storing the Github metadata for a file/directory.
+
+  Note:
+
+  * _PathMetadata are cached, so two path pointing to the same file will
+    only launch one query.
+  * Attributes are dynamically fetched from the github API only when
+    requested to avoid unecessary queries.
+  * Directory also cache entries for the childs to reduce the
+    number of queries. For instance, `[f for f in p.iterdir() if f.is_file()]`
+    only use a single query in `iterdir()`, rather than one per `is_file()`.
+
+  Attributes:
+    repo: e.g. `tensorflow/datasets`
+    branch: e.g. `master`
+    subpath: e.g. `core/__init__.py`
+  """
+
+  @staticmethod
+  @functools.lru_cache(maxsize=None)
+  def from_cache(path: str) -> '_PathMetadata':
+    """Factory which cache metadata (to avoid querying API multiple times)."""
+    # In the future, the cache might contains the full file content, this might
+    # grow big. We should add a cleanup mechanism (use weakref ?).
+    return _PathMetadata(path, private=True)
+
+  def __init__(self, path: str, *, private=False):
+    if not private:
+      raise AssertionError(
+          'Metadata should be created using `_PathMetadata.from_cache`'
+      )
+    repo, branch, subpath = _parse_github_path(path)  # pytype: disable=name-error
+
+    # Read-only attributes
+    self._path: str = path
+    self.repo: str = repo  # e.g. `tensorflow/datasets`
+    self.branch: str = branch  # e.g. `master`
+    self.subpath: str = subpath  # e.g 'core/__init__.py'
+
+    # Dynamically loaded properties
+    self._exists: Optional[bool] = None
+    self._type: Optional[_PathType] = None  # FILE, DIRECTORY, SUBMODULE
+    self._childs: Optional[List[str]] = None  # ['README.md', 'docs',...]
+
+  @property
+  def type(self) -> _PathType:
+    """Type of the path (file, dir, submodule)."""
+    if not self._type:
+      self._init_and_cache_content()
+    return self._type
+
+  def _set_type_from_str(self, value: str) -> None:
+    """Sets or validates the file type.
+
+    This is called in `_init_and_cache_content` either by `self` or the parent
+    directory.
+
+    If the type is already set, this function make sure the new type match.
+
+    Args:
+      value: The github type string (see:
+        https://developer.github.com/v3/repos/contents/ for available values)
+    """
+    str_to_type = {
+        'file': _PathType.FILE,
+        'dir': _PathType.DIRECTORY,
+    }
+    if value not in str_to_type:
+      raise ValueError(f'Unsuported file type: {value} for {self._path}')
+    new_type = str_to_type[value]
+    if self._type and self._type is not new_type:
+      raise AssertionError(
+          f'Cannot overwrite type {self._type} with {new_type} for {self._path}'
+      )
+    self._type = new_type
+
+  def listdir(self) -> List[str]:
+    """Returns the filenames in the directory (e.g. `['.gitignore', 'src']`)."""
+    if self.type != _PathType.DIRECTORY:
+      raise NotADirectoryError(f'{self._path} is not a directory.')
+    # self.type could have been computed by the parent dir, so
+    # `_init_and_cache_content` may not have been called yet.
+    if self._childs is None:
+      self._init_and_cache_content()
+    return self._childs
+
+  def exists(self) -> bool:
+    """Returns True if the file/dir exists."""
+    if self._exists is not None:
+      return self._exists
+    elif self._type:  # If type has been set, the file/dir exists
+      return True
+    else:
+      try:
+        self._init_and_cache_content()
+        self._exists = True
+      except FileNotFoundError:
+        self._exists = False
+      return self._exists
+
+  def _init_and_cache_content(self) -> None:
+    """Query github to get the file/directory content.
+
+    See doc at: https://developer.github.com/v3/repos/contents/
+
+    Note:
+
+     * After this function is called, `_type` and `_childs` (for directories)
+       are guarantee to be initialized.
+     * For directory, it will create a new `_PathMetadata` entry per
+       child (to cache the filetype).
+
+    """
+    # e.g. 'https://api.github.com/repos/tensorflow/datasets/contents/docs'
+    url = (
+        f'https://api.github.com/repos/{self.repo}/contents/{self.subpath}'
+        f'?ref={self.branch}'
+    )
+    data = self._query_github(url)
+    if isinstance(data, list):  # Directory
+      self._init_directory(data)
+    elif isinstance(data, dict):  # File
+      self._init_file(data)
+    else:
+      raise AssertionError(f'Unknown content: {data}')
+
+  def _init_directory(self, data: Json) -> None:
+    """Set the dynamic fields."""
+    self._type = _PathType.DIRECTORY
+    self._childs = [f['name'] for f in data]
+
+    # Create or update the child metadata type
+    for f in data:
+      metadata = _PathMetadata.from_cache(f"{self._path}/{f['name']}")
+      metadata._set_type_from_str(f['type'])  # pylint: disable=protected-access
+
+  def _init_file(self, data: Json) -> None:
+    self._set_type_from_str(data['type'])
+
+  def _query_github(self, url: str) -> Json:
+    """Launches a github API query and returns the result."""
+    # Get the secret API token to avoid the 60 calls/hour limit
+    # To get the current quota or test the token:
+    # curl -H "Authorization: token ${GITHUB_TOKEN}" https://api.github.com/rate_limit  # pylint: disable=line-too-long
+    token = os.environ.get('GITHUB_TOKEN')
+    headers = {}
+    if token:
+      headers['Authorization'] = f'token {token}'
+    resp = requests.get(url, headers=headers)
+    if resp.status_code != 200:
+      raise FileNotFoundError(
+          f'Request failed for {self._path}:\n'
+          f' Request: {url}\n'
+          f' Error: {resp.status_code}\n'
+          f' Reason: {resp.content}',
+      )
+    return resp.json()
+
+  def __repr__(self) -> str:
+    return f'{type(self).__name__}({self._path})'
+
+
+class GithubPath(pathlib.PurePosixPath):
+  """`pathlib.Path` like object for manipulating Github paths.
+
+  Example:
+
+  ```
+  path = GithubPath.from_repo('tensorflow/datasets')
+  path = path / 'docs' / 'catalog'
+  assert path.is_dir()
+  datasets = [
+      p.name for p in path.iterdir() if p.match('*.md')
+  ]
+
+  path = GithubPath('/tensorflow/datasets/tree/master/docs/README.md')
+  assert path.subpath == 'docs/README.md'
+  assert path.repo == 'tensorflow/datasets'
+  assert path.branch == 'master'
+  ```
+
+  """
+  _metadata: _PathMetadata  # Additional file metadata
+
+  @classmethod
+  def from_repo(cls, repo: str, branch: str = 'master') -> 'GithubPath':
+    """Factory to creates a GithubPath from a repo name.
+
+    Args:
+      repo: Repo name (e.g. `tensorflow/datasets`)
+      branch: Branch name (e.g. `master`, 'v1.2.0', '0d240e8b85c'). Default to
+        master.
+
+    Returns:
+      github_path: The repository root dir at head
+    """
+    return cls(f'/{repo}/tree/{branch}')
+
+  def _init(self, *args, **kwargs):
+    """Constructor."""
+    # Currently, the best way for subclassing `pathlib` objects is to
+    # overload `_init` (see: https://bugs.python.org/issue41109)
+    # Future Python version may have a cleaner Path extension system:
+    # https://discuss.python.org/t/make-pathlib-extensible/3428/24
+    super()._init(*args, **kwargs)  # pytype: disable=attribute-error
+    # The metadata object manage the cache and will dynamically query Github
+    # API as needed.
+    self._metadata = _PathMetadata.from_cache(str(self))
+
+  @property
+  def subpath(self) -> str:
+    """The inner path (e.g. `core/__init__.py`)."""
+    return self._metadata.subpath
+
+  @property
+  def repo(self) -> str:
+    """The repository identifier (e.g. `tensorflow/datasets`)."""
+    return self._metadata.repo
+
+  @property
+  def branch(self) -> str:
+    """The branch (e.g. `master`, `v2`, `43bbad116df`,...)."""
+    return self._metadata.branch
+
+  def as_raw_url(self) -> str:
+    """Returns the raw content url (https://raw.githubusercontent.com)."""
+    return (
+        'https://raw.githubusercontent.com/'
+        f'{self.repo}/{self.branch}/{self.subpath}'
+    )
+
+  def iterdir(self) -> Iterator['GithubPath']:
+    """Yields the sub-paths."""
+    for filename in self._metadata.listdir():
+      yield self / filename
+
+  def is_dir(self) -> bool:
+    """Returns True if the path is a directory or submodule."""
+    return self._metadata.type in (_PathType.DIRECTORY, _PathType.SUBMODULE)
+
+  def is_file(self) -> bool:
+    """Returns True if the path is a file."""
+    return self._metadata.type is _PathType.FILE
+
+  def exists(self) -> bool:
+    """Returns True if the path exists."""
+    return self._metadata.exists()
+
+
+def _parse_github_path(path: str) -> Tuple[str, str, str]:
+  """Parse the absolute github path.
+
+  Args:
+    path: The full github path.
+
+  Returns:
+    repo: The repository identifiant.
+    branch: Repository branch.
+    subpath: The inner path.
+
+  Raises:
+    ValueError: If the path is invalid
+  """
+  if path.endswith('/'):
+    raise ValueError(
+        f'Invalid github path: {path}. Trailing `/` not supported.'
+    )
+  parts = pathlib.PurePosixPath(path).parts
+  if len(parts) < 5:
+    raise ValueError(
+        f'Invalid github path: {path}. Expected format: '
+        '`/<owner>/<name>/tree/<branch>[/<path>]`.'
+    )
+
+  # '/', 'tensorflow', 'datasets', 'tree', 'master', ...
+  root, owner, repo, tree, branch, *subpath = parts
+  if root != '/' or tree != 'tree':
+    raise ValueError(
+        f'Invalid github path: {path}. Expected format: '
+        '`/<owner>/<name>/tree/<branch>[/<path>]`. Note that `/blob/` isn\'t '
+        'accepted. Only `/tree/`.'
+    )
+
+  return f'{owner}/{repo}', branch, '/'.join(subpath)
diff --git a/tensorflow_datasets/core/github_api/github_path_test.py b/tensorflow_datasets/core/github_api/github_path_test.py
new file mode 100644
index 00000000000..73bd0229f92
--- /dev/null
+++ b/tensorflow_datasets/core/github_api/github_path_test.py
@@ -0,0 +1,190 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Github API util tests."""
+
+import contextlib
+from unittest import mock
+
+import pytest
+
+from tensorflow_datasets.core.github_api import github_path
+
+
+_SKIP_NON_HERMETIC = False
+
+# Non hermetic tests are explicitly marked and skipped if `_SKIP_NON_HERMETIC`
+# is True.
+non_hermetic_test = pytest.mark.skipif(
+    _SKIP_NON_HERMETIC, reason='Non-hermetic test skipped.',
+)
+
+_original_query_github = github_path._PathMetadata._query_github
+
+
+@pytest.fixture(scope='module', autouse=True)
+def assert_no_api_call():
+  """Globally disable github API calls."""
+  with mock.patch.object(
+      github_path._PathMetadata,
+      '_query_github',
+      side_effect=AssertionError('Forbidden API call'),
+  ):
+    yield
+
+
+@contextlib.contextmanager
+def enable_api_call():
+  """Contextmanager which locally re-enable API calls."""
+  with mock.patch.object(
+      github_path._PathMetadata, '_query_github', _original_query_github
+  ):
+    yield
+
+
+def test_parse_github_path():
+  url = '/tensorflow/datasets/tree/master/docs/README.md'
+  repo, branch, path = github_path._parse_github_path(url)
+  assert repo == 'tensorflow/datasets'
+  assert branch == 'master'
+  assert path == 'docs/README.md'
+
+  url = '/tensorflow/datasets/tree/master'
+  repo, branch, path = github_path._parse_github_path(url)
+  assert repo == 'tensorflow/datasets'
+  assert branch == 'master'
+  assert path == ''  # pylint: disable=g-explicit-bool-comparison
+
+
+def test_invalid_github_path():
+
+  with pytest.raises(ValueError, match='Invalid github path'):
+    github_path.GithubPath()
+
+  with pytest.raises(ValueError, match='Invalid github path'):
+    github_path.GithubPath('')
+
+  with pytest.raises(ValueError, match='Invalid github path'):
+    github_path.GithubPath('/not/a/path')
+
+  with pytest.raises(ValueError, match='Invalid github path'):
+    github_path.GithubPath('/tensorflow/tree/master/docs/README.md')
+
+  # `blob` isn't accepted for consistency between paths.
+  with pytest.raises(ValueError, match='Invalid github path'):
+    github_path.GithubPath('/tensorflow/datasets/blob/master/docs/README.md')
+
+  p = github_path.GithubPath('/tensorflow/datasets/tree/master/docs/README.md')
+  p = p.parent  # /docs
+  p = p.parent  # /
+  with pytest.raises(ValueError, match='Invalid github path'):
+    p.parent  # pylint: disable=pointless-statement
+
+
+def test_github_path_purepath():
+  """Tests that pathlib methods works as expected."""
+  p = github_path.GithubPath('/tensorflow/datasets/tree/master/')
+  sub_p = p / 'some_folder'
+  assert isinstance(sub_p, github_path.GithubPath)
+  assert str(p) == '/tensorflow/datasets/tree/master'
+  assert p == github_path.GithubPath.from_repo('tensorflow/datasets')
+
+
+def test_github_path_as_url():
+  p = github_path.GithubPath.from_repo('tensorflow/datasets', 'v3.1.0')
+  p /= 'README.md'
+  expected = 'https://raw.githubusercontent.com/tensorflow/datasets/v3.1.0/README.md'
+  assert p.as_raw_url() == expected
+
+
+@non_hermetic_test
+def test_github_api_listdir():
+  """Test query github API."""
+  # PurePath ops do not trigger API calls
+  p = github_path.GithubPath.from_repo('tensorflow/datasets', 'v3.1.0')
+  p = p / 'tensorflow_datasets' / 'testing'
+
+  with enable_api_call():
+    sub_dirs = sorted(p.iterdir())
+
+  # `listdir` call cache the filetype of all childs
+  all_dir_names = [d.name for d in sub_dirs if d.is_dir()]
+  all_file_names = [d.name for d in sub_dirs if d.is_file()]
+  all_names = [d.name for d in sub_dirs]
+
+  with pytest.raises(NotADirectoryError):
+    list((p / '__init__.py').iterdir())
+
+  assert all_names == [
+      '__init__.py',
+      'dataset_builder_testing.py',
+      'dataset_builder_testing_test.py',
+      'fake_data_generation',
+      'fake_data_utils.py',
+      'generate_archives.sh',
+      'metadata',
+      'mocking.py',
+      'mocking_test.py',
+      'test_case.py',
+      'test_data',
+      'test_utils.py',
+      'test_utils_test.py',
+  ]
+  assert all_dir_names == [
+      'fake_data_generation',
+      'metadata',
+      'test_data',
+  ]
+  assert all_file_names == [
+      '__init__.py',
+      'dataset_builder_testing.py',
+      'dataset_builder_testing_test.py',
+      'fake_data_utils.py',
+      'generate_archives.sh',
+      'mocking.py',
+      'mocking_test.py',
+      'test_case.py',
+      'test_utils.py',
+      'test_utils_test.py',
+  ]
+
+
+@non_hermetic_test
+def test_github_api_exists():
+  """Test query github API."""
+  p = github_path.GithubPath.from_repo('tensorflow/datasets', 'v3.1.0')
+  with enable_api_call():
+    assert p.exists()
+    assert not (p / 'unnknown_dir').exists()
+
+  readme = p / 'README.md'
+  core = p / 'tensorflow_datasets' / 'core'
+  with enable_api_call():
+    assert readme.is_file()
+    assert core.is_dir()
+
+  # Data should have been cached (no API calls required)
+  assert not readme.is_dir()
+  assert not core.is_file()
+  assert readme.exists()
+  assert core.exists()
+  # Recreating a new Path reuse the cache
+  assert (core.parent.parent / 'README.md').is_file()
+  assert (core.parent.parent / 'README.md')._metadata is readme._metadata
+
+
+def test_assert_no_api_call():
+  with pytest.raises(AssertionError, match='Forbidden API call'):
+    github_path.GithubPath.from_repo('tensorflow/datasets', 'v1.0.0').exists()
diff --git a/tensorflow_datasets/core/utils/__init__.py b/tensorflow_datasets/core/utils/__init__.py
index d3ad53125e4..37bbbc70372 100644
--- a/tensorflow_datasets/core/utils/__init__.py
+++ b/tensorflow_datasets/core/utils/__init__.py
@@ -22,6 +22,7 @@
 from tensorflow_datasets.core.utils.py_utils import *
 from tensorflow_datasets.core.utils.tf_utils import *
 from tensorflow_datasets.core.utils.tqdm_utils import *
+from tensorflow_datasets.core.utils.type_utils import *
 from tensorflow_datasets.core.utils.version import Experiment
 from tensorflow_datasets.core.utils.version import Version
 # pylint: enable=wildcard-import
diff --git a/tensorflow_datasets/core/utils/type_utils.py b/tensorflow_datasets/core/utils/type_utils.py
new file mode 100644
index 00000000000..532d5f05699
--- /dev/null
+++ b/tensorflow_datasets/core/utils/type_utils.py
@@ -0,0 +1,27 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Type utils."""
+
+from typing import Any, Dict, List, Union
+
+__all__ = [
+    'Json',
+    'JsonValue',
+]
+
+# TODO(pytype): Should use recursive type
+JsonValue = Union[str, int, float, bool, Dict[str, Any], List[Any]]
+Json = Dict[str, JsonValue]
diff --git a/tensorflow_datasets/scripts/deployment/export_community_datasets.py b/tensorflow_datasets/scripts/deployment/export_community_datasets.py
new file mode 100644
index 00000000000..d5652263a14
--- /dev/null
+++ b/tensorflow_datasets/scripts/deployment/export_community_datasets.py
@@ -0,0 +1,164 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Script which parse registered repositories and save datasets found."""
+
+import itertools
+import json
+import pathlib
+from typing import List
+
+from absl import app
+import tensorflow as tf
+
+from tensorflow_datasets.core import community
+from tensorflow_datasets.core import github_api
+import toml
+
+
+def _is_dataset_path(ds_path: github_api.GithubPath) -> bool:
+  """Returns True if the given path correspond to a dataset.
+
+  Currently a simple heuristic is used. This function checks the path has the
+  following structure:
+
+  ```
+  <ds_name>/
+      <ds_name>.py
+  ```
+
+  Args:
+    ds_path: Path of the dataset module
+
+  Returns:
+    True if the path match the expected file structure
+  """
+  return ds_path.is_dir() and (ds_path / f'{ds_path.name}.py').exists()
+
+
+def _list_namespace_ds_specs(
+    namespace: str,
+    path: str,
+) -> List[community.DatasetSpec]:
+  """Returns the dataset names found in a specific directory.
+
+  The directory should have the following structure:
+
+  ```
+  <path>/
+      <dataset0>/
+      <dataset1>/
+      ...
+  ```
+
+  Additional files or folders which are not detected as datasets will be
+  ignored (e.g. `__init__.py`).
+
+  Args:
+    namespace: Namespace of the datasets
+    path: The directory path containing the datasets.
+
+  Returns:
+    ds_specs: The dataset specs found in the directory (sorted for determinism).
+
+  Raises:
+    FileNotFoundError: If the path cannot be reached.
+  """
+  path = github_api.GithubPath(path)
+  if not path.exists():
+    # Should be fault-tolerant in the future
+    raise FileNotFoundError(f'Could not find datasets at {path}')
+  all_specs = [
+      community.DatasetSpec(  # pylint: disable=g-complex-comprehension
+          name=ds_path.name,
+          namespace=namespace,
+          source=community.GithubSource(ds_path),
+      ) for ds_path in path.iterdir() if _is_dataset_path(ds_path)
+  ]
+  return sorted(all_specs, key=lambda spec: spec.cannonical_name)
+
+
+def _find_community_ds_specs(
+    config_path: pathlib.Path,
+) -> List[community.DatasetSpec]:
+  """Find all namepaces/dataset from the config.
+
+  Config should contain the instructions in the following format:
+
+  ```
+  [Namespace]
+  <namespace0> = '<owner0>/<github_repo0>/tree/<path/to/dataset/dir>'
+  <namespace1> = '<owner1>/<github_repo1>/tree/<path/to/dataset/dir>'
+  ```
+
+  Args:
+    config_path: Path to the config file containing lookup instructions.
+
+  Returns:
+    ds_specs: list of all found datasets.
+  """
+  config = toml.load(config_path)
+  all_specs = itertools.chain.from_iterable(
+      _list_namespace_ds_specs(namespace, path)
+      for namespace, path in config['Namespaces'].items()
+  )
+  return sorted(all_specs, key=lambda spec: spec.cannonical_name)
+
+
+def _save_community_ds_specs(
+    file_path: str, ds_specs: List[community.DatasetSpec]
+) -> None:
+  """Save all loaded datasets.
+
+  Saved file will have the following `.tsv` format:
+
+  ```
+  namespace0 dataset0 /path/to/dataset0/
+  namespace0 dataset1 /path/to/dataset1/
+  ...
+  ```
+
+  Args:
+    file_path: `.jsonl` destination to which save the dataset
+    ds_specs: Dataset paths to save
+  """
+  # TODO(tfds): Replace GFile by a pathlib-like abstraction for GCS.
+  with tf.io.gfile.GFile(file_path, 'w') as f:
+    for spec in ds_specs:
+      f.write(json.dumps(spec.to_json()))
+      f.write('\n')
+
+
+def export_community_datasets(in_path: pathlib.Path, out_path: str) -> None:
+  """Exports community datasets.
+
+  Args:
+    in_path: Config path containing the namespaces and dataset lookup
+      instructions.
+    out_path: File containing all detected datasets. Detected dataset will
+      be saved to this file. Previous content is erased.
+  """
+  ds_specs = _find_community_ds_specs(in_path)
+  _save_community_ds_specs(out_path, ds_specs)
+
+
+def main(_):
+  config_path = pathlib.Path(community.community_config_path())
+  exported_path = community.COMMUNITY_EXPORTED_PATH
+  export_community_datasets(in_path=config_path, out_path=exported_path)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/tensorflow_datasets/scripts/deployment/export_community_datasets_test.py b/tensorflow_datasets/scripts/deployment/export_community_datasets_test.py
new file mode 100644
index 00000000000..96eeebe4a7e
--- /dev/null
+++ b/tensorflow_datasets/scripts/deployment/export_community_datasets_test.py
@@ -0,0 +1,86 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for external datasets."""
+
+import pathlib
+import string
+import textwrap
+from typing import List
+
+from unittest import mock
+
+from tensorflow_datasets.core import github_api
+from tensorflow_datasets.scripts.deployment import export_community_datasets
+
+
+def _write_dataset_files(
+    root_path: pathlib.Path, namespace: str, datasets: List[str]
+) -> str:
+  """Write the repo content containing the datasets."""
+  repo_path = root_path / namespace
+  # Create all datasets
+  for ds_name in datasets:
+    ds_path = repo_path / ds_name / f'{ds_name}.py'
+    ds_path.parent.mkdir(parents=True)  # Create the containing dir
+    ds_path.touch()  # Create the file
+
+  # Additional noisy files should be ignored
+  (repo_path / '__init__.py').touch()
+  (repo_path / 'empty_dir').mkdir()
+  return str(repo_path)
+
+
+def test_export_community_datasets(tmp_path):
+
+  # Create the community dataset repositories
+  tfg_path = _write_dataset_files(
+      tmp_path, namespace='tensorflow_graphics', datasets=['cifar']
+  )
+  nlp_path = _write_dataset_files(
+      tmp_path, namespace='nlp', datasets=['mnist', 'robotnet']
+  )
+
+  # Write a dummy `community-datasets.toml`
+  in_path = tmp_path / 'config.toml'
+  in_path.write_text(
+      textwrap.dedent(
+          f"""\
+          [Namespaces]
+          tensorflow_graphics = '{tfg_path}'
+          nlp = '{nlp_path}'
+          """
+      )
+  )
+
+  # Load registered dataset and export the list.
+  # We patch `GithubPath` with `pathlib.Path` as the two have the same API.
+  out_path = tmp_path / 'out.tsv'
+  with mock.patch.object(github_api, 'GithubPath', pathlib.Path):
+    export_community_datasets.export_community_datasets(in_path, str(out_path))
+
+  # Ensure datasets where correctly exported
+  expected_output = textwrap.dedent(
+      """\
+      {"name": "mnist", "namespace": "nlp", "source": {"scheme": "github://", "path": "${nlp_path}/mnist"}}
+      {"name": "robotnet", "namespace": "nlp", "source": {"scheme": "github://", "path": "${nlp_path}/robotnet"}}
+      {"name": "cifar", "namespace": "tensorflow_graphics", "source": {"scheme": "github://", "path": "${tfg_path}/cifar"}}
+      """
+  )
+  expected_output = string.Template(expected_output).substitute(
+      tfg_path=tfg_path,
+      nlp_path=nlp_path,
+  )
+  assert out_path.read_text() == expected_output