PaddlePaddle
diff --git a/‎paddle/fluid/framework/proto_desc.h‎
Lines changed: 2 additions & 2 deletions b/‎paddle/fluid/framework/proto_desc.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/capi_exp/pd_config.cc‎
Lines changed: 44 additions & 2 deletions b/‎paddle/fluid/inference/capi_exp/pd_config.cc‎
Lines changed: 44 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/capi_exp/pd_config.h‎
Lines changed: 64 additions & 1 deletion b/‎paddle/fluid/inference/capi_exp/pd_config.h‎
Lines changed: 64 additions & 1 deletion
diff --git a/‎paddle/fluid/inference/goapi/config.go‎
Lines changed: 67 additions & 2 deletions b/‎paddle/fluid/inference/goapi/config.go‎
Lines changed: 67 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/goapi/config_test.go‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/goapi/config_test.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/inference/goapi/predictor_test.go‎
Lines changed: 48 additions & 0 deletions b/‎paddle/fluid/inference/goapi/predictor_test.go‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/goapi/test.sh‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/inference/goapi/test.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc‎
Lines changed: 1 addition & 1 deletion
@@ -27,8 +27,8 @@ constexpr int kNoneProcessMeshIndex = -1;
 
 // If a attribute name has a certain suffix, it means that the
 // atrribute is a distributed-related attribute for auto parallel.
-// e.g., "mesh_id@PARALLEL".
-constexpr char kAutoParallelSuffix[] = "@PARALLEL";
+// e.g., "mesh_id@AUTO_PARALLEL".
+constexpr char kAutoParallelSuffix[] = "@AUTO_PARALLEL";
 
 }  // namespace framework
 }  // namespace paddle
@@ -231,6 +231,48 @@ void PD_ConfigSetTrtDynamicShapeInfo(__pd_keep PD_Config* pd_config,
                                  optim_input_shapes, disable_trt_plugin_fp16);
 }
 
+PD_Bool PD_ConfigTensorRtDynamicShapeEnabled(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->tensorrt_dynamic_shape_enabled();
+}
+
+void PD_ConfigEnableTunedTensorRtDynamicShape(__pd_keep PD_Config* pd_config,
+                                              const char* shape_range_info_path,
+                                              PD_Bool allow_build_at_runtime) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->EnableTunedTensorRtDynamicShape(shape_range_info_path,
+                                          allow_build_at_runtime);
+}
+
+PD_Bool PD_ConfigTunedTensorRtDynamicShape(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->tuned_tensorrt_dynamic_shape();
+}
+
+PD_Bool PD_ConfigTrtAllowBuildAtRuntime(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->trt_allow_build_at_runtime();
+}
+
+void PD_ConfigCollectShapeRangeInfo(__pd_keep PD_Config* pd_config,
+                                    const char* shape_range_info_path) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  config->CollectShapeRangeInfo(shape_range_info_path);
+}
+
+const char* PD_ConfigShapeRangeInfoPath(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  auto shape_str = config->shape_range_info_path();
+  char* c = reinterpret_cast<char*>(malloc(shape_str.length() + 1));
+  snprintf(c, shape_str.length() + 1, "%s", shape_str.c_str());
+  return c;
+}
+
+PD_Bool PD_ConfigShapeRangeInfoCollected(__pd_keep PD_Config* pd_config) {
+  CHECK_AND_CONVERT_PD_CONFIG;
+  return config->shape_range_info_collected();
+}
+
 void PD_ConfigDisableTensorRtOPs(__pd_keep PD_Config* pd_config, size_t ops_num,
                                  const char** ops_name) {
   CHECK_AND_CONVERT_PD_CONFIG;
@@ -358,9 +400,9 @@ PD_Bool PD_ConfigModelFromMemory(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
   return config->model_from_memory();
 }
-void PD_ConfigEnableMemoryOptim(__pd_keep PD_Config* pd_config) {
+void PD_ConfigEnableMemoryOptim(__pd_keep PD_Config* pd_config, PD_Bool x) {
   CHECK_AND_CONVERT_PD_CONFIG;
-  config->EnableMemoryOptim();
+  config->EnableMemoryOptim(x);
 }
 PD_Bool PD_ConfigMemoryOptimEnabled(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
 
@@ -324,6 +324,69 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigSetTrtDynamicShapeInfo(
     size_t* shapes_num, int32_t** min_shape, int32_t** max_shape,
     int32_t** optim_shape, PD_Bool disable_trt_plugin_fp16);
 ///
+/// \brief A boolean state telling whether the trt dynamic_shape is used.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigTensorRtDynamicShapeEnabled(
+    __pd_keep PD_Config* pd_config);
+///
+/// \brief Enable tuned tensorrt dynamic shape.
+///
+/// \param[in] pd_onfig config
+/// \param[in] shape_range_info_path the path to shape_info file got in
+/// CollectShapeInfo mode.
+/// \param[in] allow_build_at_runtime allow build trt engine at runtime.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigEnableTunedTensorRtDynamicShape(
+    __pd_keep PD_Config* pd_config, const char* shape_range_info_path,
+    PD_Bool allow_build_at_runtime);
+
+///
+/// \brief A boolean state telling whether to use tuned tensorrt dynamic
+/// shape.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigTunedTensorRtDynamicShape(
+    __pd_keep PD_Config* pd_config);
+
+///
+/// \brief A boolean state telling whether to allow building trt engine at
+/// runtime.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigTrtAllowBuildAtRuntime(
+    __pd_keep PD_Config* pd_config);
+
+///
+/// \brief Collect shape info of all tensors in compute graph.
+///
+/// \param[in] pd_onfig config
+/// \param[in] shape_range_info_path the path to save shape info.
+///
+PADDLE_CAPI_EXPORT extern void PD_ConfigCollectShapeRangeInfo(
+    __pd_keep PD_Config* pd_config, const char* shape_range_info_path);
+
+///
+/// \brief the shape info path in CollectShapeInfo mode.
+/// Attention, Please release the string manually.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern const char* PD_ConfigShapeRangeInfoPath(
+    __pd_keep PD_Config* pd_config);
+
+///
+/// \brief A boolean state telling whether to collect shape info.
+///
+/// \param[in] pd_onfig config
+///
+PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigShapeRangeInfoCollected(
+    __pd_keep PD_Config* pd_config);
+
+///
 /// \brief Prevent ops running in Paddle-TRT
 /// NOTE: just experimental, not an official stable API, easy to be broken.
 ///
@@ -542,7 +605,7 @@ PADDLE_CAPI_EXPORT extern PD_Bool PD_ConfigModelFromMemory(
 /// \param[in] pd_onfig config
 ///
 PADDLE_CAPI_EXPORT extern void PD_ConfigEnableMemoryOptim(
-    __pd_keep PD_Config* pd_config);
+    __pd_keep PD_Config* pd_config, PD_Bool x);
 ///
 /// \brief A boolean state telling whether the memory optimization is
 /// activated.
 
@@ -383,6 +383,71 @@ func (config *Config) SetTRTDynamicShapeInfo(minInputShape map[string][]int32, m
 		cvtGoBoolToPD(disableTrtPluginFp16))
 }
 
+///
+/// \brief A boolean state telling whether the trt dynamic_shape is used.
+///
+func (config *Config) TensorRtDynamicShapeEnabled() bool {
+	return cvtPDBoolToGo(C.PD_ConfigTensorRtDynamicShapeEnabled(config.c))
+}
+
+///
+/// \brief Enable tuned tensorrt dynamic shape.
+///
+/// \param shapeRangeInfoPath the path to shape_info file got in
+/// CollectShapeInfo mode.
+/// \param allowBuildAtRuntime allow build trt engine at runtime.
+///
+func (config *Config) EnableTunedTensorRtDynamicShape(shapeRangeInfoPath string, allowBuildAtRuntime bool) {
+	cstr := C.CString(shapeRangeInfoPath)
+	C.PD_ConfigEnableTunedTensorRtDynamicShape(config.c, cstr, cvtGoBoolToPD(allowBuildAtRuntime))
+	defer C.free(unsafe.Pointer(cstr))
+}
+
+///
+/// \brief A boolean state telling whether to use tuned tensorrt dynamic
+/// shape.
+///
+func (config *Config) TunedTensorRtDynamicShape() bool {
+	return cvtPDBoolToGo(C.PD_ConfigTunedTensorRtDynamicShape(config.c))
+}
+
+///
+/// \brief A boolean state telling whether to allow building trt engine at
+/// runtime.
+///
+func (config *Config) TrtAllowBuildAtRuntime() bool {
+	return cvtPDBoolToGo(C.PD_ConfigTrtAllowBuildAtRuntime(config.c))
+}
+
+///
+/// \brief Collect shape info of all tensors in compute graph.
+///
+/// \param shapeRangeInfoPath the path to save shape info.
+///
+func (config *Config) CollectShapeRangeInfo(shapeRangeInfoPath string) {
+	cstr := C.CString(shapeRangeInfoPath)
+	C.PD_ConfigCollectShapeRangeInfo(config.c, cstr)
+	defer C.free(unsafe.Pointer(cstr))
+}
+
+///
+/// \brief the shape info path in CollectShapeInfo mode.
+/// Attention, Please release the string manually.
+///
+func (config *Config) ShapeRangeInfoPath() string {
+	cstr := C.PD_ConfigShapeRangeInfoPath(config.c)
+	str := C.GoString(cstr)
+	C.free(unsafe.Pointer(cstr))
+	return str
+}
+
+///
+/// \brief A boolean state telling whether to collect shape info.
+///
+func (config *Config) ShapeRangeInfoCollected() bool {
+	return cvtPDBoolToGo(C.PD_ConfigShapeRangeInfoCollected(config.c))
+}
+
 ///
 /// \brief Prevent ops running in Paddle-TRT
 /// NOTE: just experimental, not an official stable API, easy to be broken.
@@ -649,8 +714,8 @@ func (config *Config) ModelFromMemory() bool {
 /// \brief Turn on memory optimize
 /// NOTE still in development.
 ///
-func (config *Config) EnableMemoryOptim() {
-	C.PD_ConfigEnableMemoryOptim(config.c)
+func (config *Config) EnableMemoryOptim(x bool) {
+	C.PD_ConfigEnableMemoryOptim(config.c, cvtGoBoolToPD(x))
 }
 
 ///
 
@@ -69,7 +69,7 @@ func TestNewConfig(t *testing.T) {
 
 	config.EnableMKLDNN()
 
-	config.EnableMemoryOptim()
+	config.EnableMemoryOptim(true)
 	t.Logf("MemoryOptimEnabled:%+v", config.MemoryOptimEnabled())
 
 	config.EnableProfile()
 
@@ -17,7 +17,9 @@ package paddle
 import (
 	"io/ioutil"
 	"os"
+	"runtime"
 	"testing"
+	"time"
 )
 
 func TestNewPredictor(t *testing.T) {
@@ -106,6 +108,52 @@ func TestFromBuffer(t *testing.T) {
 	t.Log(outData)
 }
 
+func TestCollectShapeInfo(t *testing.T) {
+	config := NewConfig()
+	config.SetModel("./mobilenetv1/inference.pdmodel", "./mobilenetv1/inference.pdiparams")
+	config.CollectShapeRangeInfo("shape_range_info.pbtxt")
+	config.EnableUseGpu(100, 0)
+	t.Logf("ShapeRangeInfoCollected:%+v", config.ShapeRangeInfoCollected())
+	t.Logf("ShapeRangeInfoPath:%+v", config.ShapeRangeInfoPath())
+	predictor := NewPredictor(config)
+	inNames := predictor.GetInputNames()
+	outNames := predictor.GetOutputNames()
+	inHandle := predictor.GetInputHandle(inNames[0])
+	inHandle.Reshape([]int32{1, 3, 224, 224})
+
+	data := make([]float32, numElements([]int32{1, 3, 224, 224}))
+	for i := 0; i < int(numElements([]int32{1, 3, 224, 224})); i++ {
+		data[i] = float32(i%255) * 0.1
+	}
+	inHandle.CopyFromCpu(data)
+
+	predictor.Run()
+
+	outHandle := predictor.GetOutputHandle(outNames[0])
+	outShape := outHandle.Shape()
+	outData := make([]float32, numElements(outShape))
+	outHandle.CopyToCpu(outData)
+	// Go is a GC language, so we must wait for gc to get shape_range_info.pbtxt
+	predictor = nil
+	runtime.GC()
+	time.Sleep(2 * time.Second)
+
+	trt_config := NewConfig()
+	trt_config.SetModel("./mobilenetv1/inference.pdmodel", "./mobilenetv1/inference.pdiparams")
+	trt_config.EnableUseGpu(100, 0)
+	trt_config.EnableTensorRtEngine(102400, 4, 3, PrecisionFloat32, false, false)
+	trt_config.EnableTunedTensorRtDynamicShape("shape_range_info.pbtxt", true)
+	trt_predictor := NewPredictor(trt_config)
+	trt_inNames := trt_predictor.GetInputNames()
+	trt_inHandle := trt_predictor.GetInputHandle(trt_inNames[0])
+	trt_inHandle.Reshape([]int32{1, 3, 224, 224})
+
+	trt_inHandle.CopyFromCpu(data)
+
+	trt_predictor.Run()
+
+}
+
 func numElements(shape []int32) int32 {
 	n := int32(1)
 	for _, v := range shape {
 
@@ -24,4 +24,5 @@ fi
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/
 
 # 3. go test
+go clean -testcache
 go test -v ./...
@@ -120,7 +120,7 @@ TEST(PD_Config, use_gpu) {
                                 FALSE, FALSE);
   bool trt_enable = PD_ConfigTensorRtEngineEnabled(config);
   EXPECT_TRUE(trt_enable);
-  PD_ConfigEnableMemoryOptim(config);
+  PD_ConfigEnableMemoryOptim(config, true);
   bool memory_optim_enable = PD_ConfigMemoryOptimEnabled(config);
   EXPECT_TRUE(memory_optim_enable);
   PD_ConfigEnableProfile(config);
 
@@ -83,7 +83,7 @@ TEST(PD_Config, interface) {
   EXPECT_TRUE(mkldnn_bf16_enabled);
 #endif
 
-  PD_ConfigEnableMemoryOptim(config);
+  PD_ConfigEnableMemoryOptim(config, true);
   bool memory_enabled = PD_ConfigMemoryOptimEnabled(config);
   EXPECT_TRUE(memory_enabled);
 
 
@@ -42,7 +42,7 @@ TEST(PD_Config, use_xpu) {
   PD_ConfigSwitchIrOptim(config, TRUE);
   bool ir_optim = PD_IrOptim(config);
   EXPECT_TRUE(ir_optim);
-  PD_ConfigEnableMemoryOptim(config);
+  PD_ConfigEnableMemoryOptim(config, true);
   bool memory_optim_enable = PD_ConfigMemoryOptimEnabled(config);
   EXPECT_TRUE(memory_optim_enable);
   PD_ConfigEnableProfile(config);